# HG changeset patch # User David Bateman # Date 1209328457 -7200 # Node ID 82be108cc5583e5ddef84f8948825bb96f43d2af # Parent 45f5faba05a2311b3fb4478a20dabbe633341aa3 First attempt at single precision tyeps * * * corrections to qrupdate single precision routines * * * prefer demotion to single over promotion to double * * * Add single precision support to log2 function * * * Trivial PROJECT file update * * * Cache optimized hermitian/transpose methods * * * Add tests for tranpose/hermitian and ChangeLog entry for new transpose code diff -r 45f5faba05a2 -r 82be108cc558 ChangeLog --- a/ChangeLog Wed May 14 18:09:56 2008 +0200 +++ b/ChangeLog Sun Apr 27 22:34:17 2008 +0200 @@ -1,3 +1,8 @@ +2008-05-20 David Bateman + + * configure.in (AC_CHECK_FUNCS): Add expm1, lgammaf, lgammaf_r, + log1pf and tgammaf. Also check for libfftw3f. + 2008-04-09 Rafael Laboissiere * example/octave.desktop.in: Drop the Encoding key, which is diff -r 45f5faba05a2 -r 82be108cc558 PROJECTS --- a/PROJECTS Wed May 14 18:09:56 2008 +0200 +++ b/PROJECTS Sun Apr 27 22:34:17 2008 +0200 @@ -117,7 +117,6 @@ - minres - qmr - symmlq - - spaugment ------- Strings: @@ -143,6 +142,9 @@ * Template functions for mixed-type ops. + * Convert other functions for use with the floating point type + including quad, dasrt, daspk, etc. + ------------ Input/Output: ------------ diff -r 45f5faba05a2 -r 82be108cc558 configure.in --- a/configure.in Wed May 14 18:09:56 2008 +0200 +++ b/configure.in Sun Apr 27 22:34:17 2008 +0200 @@ -562,7 +562,8 @@ with_fftw3=no AC_CHECK_HEADER(fftw3.h, [have_fftw3_header=yes]) if test "$have_fftw3_header" = yes; then - AC_CHECK_LIB(fftw3, fftw_plan_dft_1d, [FFTW_LIBS="-lfftw3"; with_fftw3=yes]) + AC_CHECK_LIB(fftw3, fftw_plan_dft_1d, [ + AC_CHECK_LIB(fftw3f, fftwf_plan_dft_1d, [FFTW_LIBS="-lfftw3 -lfftw3f"; with_fftw3=yes])]) fi fi @@ -1319,15 +1320,15 @@ ### Checks for functions and variables. AC_CHECK_FUNCS(atexit basename bcopy bzero canonicalize_file_name \ - chmod dup2 endgrent endpwent execvp expm1 fcntl fork getcwd \ + chmod dup2 endgrent endpwent execvp expm1 expm1f fcntl fork getcwd \ getegid geteuid getgid getgrent getgrgid getgrnam getpgrp getpid \ getppid getpwent getpwuid gettimeofday getuid getwd _kbhit kill \ - lgamma lgamma_r link localtime_r log1p lstat memmove mkdir mkfifo \ - mkstemp on_exit pipe poll putenv raise readlink realpath rename \ - resolvepath rindex rmdir round select setgrent setlocale setpwent \ - setvbuf sigaction siglongjmp sigpending sigprocmask sigsuspend \ + lgamma lgammaf lgamma_r lgammaf_r link localtime_r log1p log1pf lstat \ + memmove mkdir mkfifo mkstemp on_exit pipe poll putenv raise readlink \ + realpath rename resolvepath rindex rmdir round select setgrent setlocale \ + setpwent setvbuf sigaction siglongjmp sigpending sigprocmask sigsuspend \ snprintf stat strcasecmp strdup strerror stricmp strncasecmp \ - strnicmp strptime strsignal symlink tempnam tgamma trunc umask \ + strnicmp strptime strsignal symlink tempnam tgamma tgammaf trunc umask \ uname unlink usleep utime vfprintf vsprintf vsnprintf waitpid \ _chmod _snprintf x_utime _utime32) @@ -1539,7 +1540,7 @@ ### Check for nonstandard but common math functions that we need. -AC_CHECK_FUNCS(acosh asinh atanh erf erfc exp2 log2) +AC_CHECK_FUNCS(acosh acoshf asinh asinhf atanh atanhf erf erff erfc erfcf exp2 exp2f log2 log2f) ### Checks for OS specific cruft. diff -r 45f5faba05a2 -r 82be108cc558 libcruft/ChangeLog --- a/libcruft/ChangeLog Wed May 14 18:09:56 2008 +0200 +++ b/libcruft/ChangeLog Sun Apr 27 22:34:17 2008 +0200 @@ -1,3 +1,191 @@ +2008-05-20 Jaroslav Hajek + + * qrupdate/cch1dn.f, qrupdate/cchinx.f, qrupdate/cqhqr.f, + qrupdate/cqrinc.f, qrupdate/cqrinr.f, qrupdate/cqrqhu.f, + qrupdate/cqrqhv.f, qrupdate/sch1dn.f, qrupdate/schinx.f, + qrupdate/sqhqr.f, qrupdate/sqrinc.f, qrupdate/sqrinr.f, + qrupdate/sqrqhu.f: Convert DOUBLE PRECISION constants to REAL. + * qrupdate/cqrinr.f, qrupdate/sqrinr.f: Correct EXTERNAL + declarations. + * qrupdate/sqrinr.f: Convert DOUBLE PRECISION calls to + REAL counterparts. + +2008-05-20 David Bateman + + * Makefile.in (MISC_OBJ): Add misc/smachar.o + * Makerules.in (CRUFT_CSRC, CRUFT_CPICOBJ): Add CEXTRA, allowing + objects files with no corresponding source file in the + distribution. + + * amos/cacai.f, amos/cacon.f, amos/cbesh.f, amos/cbesi.f, + amos/cbesj.f, amos/cbesk.f, amos/cbesy.f, amos/cbinu.f, + amos/cbuni.f, amos/cbunk.f, amos/cunk1.f amos/cunk2.f, + amos/crati.f, amos/cshch.f, amos/cuni1.f, amos/cuoik.f, + amos/cairy.f, amos/cbiry.f, amos/ckscl.f, amos/cs1s2.f, + amos/cuchk.f, amos/cuni2.f, amos/cwrsk.f, amos/casyi.f, + amos/cbknu.f, amos/cmlri.f, amos/cseri.f, amos/cunhj.f, + amos/cunik.f: New files. + * amos/Makefile.in (FSRC): Add them. + + * blas-xtra/xsdot.f, blas-xtra/xsnrm2.f, blas-xtra/xscnrm2.f, + blas-xtra/xcdotc.f, blas-xtra/xcdotu.f: New files + * blas-xtra/Makefile.in (FSRC): Add them. + + * blas/sasum.f, blas/saxpy.f, blas/scabs1.f, blas/scopy.f, + blas/sger.f, blas/smach.f, blas/snrm2.f, blas/srot.f, + blas/sswap.f, blas/ssymv.f, blas/ssyr.f, blas/ssyr2.f, + blas/ssyr2k.f, blas/stbsv.f, blas/strmm.f, blas/strmv.f, + blas/strsv.f, blas/scasum.f, blas/scnrm2.f, blas/caxpy.f, + blas/ccopy.f, blas/cdotc.f, blas/cdotu.f, blas/, blas/csrot.f, + blas/csscal.f, blas/cgemm.f, blas/cgemv.f, blas/cgerc.f, + blas/cgeru.f, blas/chemv.f, blas/cher.f, blas/cher2.f, + blas/cher2k.f, blas/cherk.f, blas/cscal.f, blas/cswap.f, + blas/ctbsv.f, blas/ctrmm.f, blas/ctrmv.f, blas/, blas/ctrsm.f, + blas/ctrsv.f: New files + * blas/Makefile.in (FSRC): Add them. + + * fftpack/zfftb.f, zfftb1.f, fftpack/zfftf.f, fftpack/zfftf1.f, + fftpack/zffti.f, fftpack/zffti1.f, fftpack/zpassb.f, + fftpack/zpassb2.f, fftpack/zpassb3.f, fftpack/zpassb4.f, + fftpack/zpassb5.f, fftpack/zpassf.f, fftpack/zpassf2.f, + fftpack/zpassf3.f, fftpack/zpassf4.f, fftpack/zpassf5.f: Rename + function (c -> z | add z). + * fftpack/cfftb.f, cfftb1.f, fftpack/cfftf.f, fftpack/cfftf1.f, + fftpack/cffti.f, fftpack/cffti1.f, fftpack/passb.f, + fftpack/passb2.f, fftpack/passb3.f, fftpack/passb4.f, + fftpack/passb5.f, fftpack/passf.f, fftpack/passf2.f, + fftpack/passf3.f, fftpack/passf4.f, fftpack/passf5.f: New files + for single precision. + * fftpack/Makefile.in (FSRC): Add new files. + + * lapack-xtra/xclange.f, lapack-xtra/xslamch.f, + lapack-xtra/xslange.f: New files. + * lapack-xtra/Makefile.in (FSRC): Add them. + + * lapack/cbdsqr.f, lapack/csrscl.f, lapack/cgbcon.f, + lapack/cgbtf2.f, lapack/cgbtrf.f, lapack/cgbtrs.f, + lapack/cgebak.f, lapack/cgebal.f, lapack/cgebd2.f, + lapack/cgebrd.f, lapack/cgecon.f, lapack/cgeesx.f, lapack/cgeev.f, + lapack/cgehd2.f, lapack/cgehrd.f, lapack/cgelq2.f, + lapack/cgelqf.f, lapack/cgelsd.f, lapack/cgelss.f, + lapack/cgelsy.f, lapack/cgeqp3.f, lapack/cgeqpf.f, + lapack/cgeqr2.f, lapack/cgeqrf.f, lapack/cgesv.f, lapack/cgesvd.f, + lapack/cgetf2.f, lapack/cgetrf.f, lapack/cgetri.f, + lapack/cgetrs.f, lapack/cggbal.f, lapack/cgtsv.f, lapack/cgttrf.f, + lapack/cgttrs.f, lapack/cgtts2.f, lapack/cheev.f, lapack/chetd2.f, + lapack/chetrd.f, lapack/chseqr.f, lapack/clabrd.f, + lapack/clacgv.f, lapack/clacn2.f, lapack/clacon.f, + lapack/clacpy.f, lapack/cladiv.f, lapack/clahqr.f, + lapack/clahr2.f, lapack/clahrd.f, lapack/claic1.f, + lapack/clals0.f, lapack/clalsa.f, lapack/clalsd.f, + lapack/clange.f, lapack/clanhe.f, lapack/clanhs.f, + lapack/clantr.f, lapack/claqp2.f, lapack/claqps.f, + lapack/claqr0.f, lapack/claqr1.f, lapack/claqr2.f, + lapack/claqr3.f, lapack/claqr4.f, lapack/claqr5.f, lapack/clarf.f, + lapack/clarfb.f, lapack/clarfg.f, lapack/clarft.f, + lapack/clarfx.f, lapack/clartg.f, lapack/clarz.f, lapack/clarzb.f, + lapack/clarzt.f, lapack/clascl.f, lapack/claset.f, lapack/clasr.f, + lapack/classq.f, lapack/claswp.f, lapack/clatbs.f, + lapack/clatrd.f, lapack/clatrs.f, lapack/clatrz.f, + lapack/clauu2.f, lapack/clauum.f, lapack/cpbcon.f, + lapack/cpbtf2.f, lapack/cpbtrf.f, lapack/cpbtrs.f, + lapack/cpocon.f, lapack/cpotf2.f, lapack/cpotrf.f, + lapack/cpotri.f, lapack/cpotrs.f, lapack/cptsv.f, lapack/cpttrf.f, + lapack/cpttrs.f, lapack/cptts2.f, lapack/crot.f, lapack/csteqr.f, + lapack/ctrcon.f, lapack/ctrevc.f, lapack/ctrexc.f, + lapack/ctrsen.f, lapack/ctrsyl.f, lapack/ctrti2.f, + lapack/ctrtri.f, lapack/ctrtrs.f, lapack/ctzrzf.f, + lapack/cung2l.f, lapack/cung2r.f, lapack/cungbr.f, + lapack/cunghr.f, lapack/cungl2.f, lapack/cunglq.f, + lapack/cungql.f, lapack/cungqr.f, lapack/cungtr.f, + lapack/cunm2r.f, lapack/cunmbr.f, lapack/cunml2.f, + lapack/cunmlq.f, lapack/cunmqr.f, lapack/cunmr3.f, + lapack/cunmrz.f, lapack/sbdsqr.f, lapack/sgbcon.f, + lapack/sgbtf2.f, lapack/sgbtrf.f, lapack/sgbtrs.f, + lapack/sgebak.f, lapack/sgebal.f, lapack/sgebd2.f, + lapack/sgebrd.f, lapack/sgecon.f, lapack/sgeesx.f, lapack/sgeev.f, + lapack/sgehd2.f, lapack/sgehrd.f, lapack/sgelq2.f, + lapack/sgelqf.f, lapack/sgelsd.f, lapack/sgelss.f, + lapack/sgelsy.f, lapack/sgeqp3.f, lapack/sgeqpf.f, + lapack/sgeqr2.f, lapack/sgeqrf.f, lapack/sgesv.f, lapack/sgesvd.f, + lapack/sgetf2.f, lapack/sgetrf.f, lapack/sgetri.f, + lapack/sgetrs.f, lapack/sggbak.f, lapack/sggbal.f, + lapack/sgghrd.f, lapack/sgtsv.f, lapack/sgttrf.f, lapack/sgttrs.f, + lapack/sgtts2.f, lapack/shgeqz.f, lapack/shseqr.f, + lapack/slabad.f, lapack/slabrd.f, lapack/slacn2.f, + lapack/slacon.f, lapack/slacpy.f, lapack/sladiv.f, lapack/slae2.f, + lapack/slaed6.f, lapack/slaev2.f, lapack/slaexc.f, lapack/slag2.f, + lapack/slahqr.f, lapack/slahr2.f, lapack/slahrd.f, + lapack/slaic1.f, lapack/slaln2.f, lapack/slals0.f, + lapack/slalsa.f, lapack/slalsd.f, lapack/slamc1.f, + lapack/slamc2.f, lapack/slamc3.f, lapack/slamc4.f, + lapack/slamc5.f, lapack/slamch.f, lapack/slamrg.f, + lapack/slange.f, lapack/slanhs.f, lapack/slanst.f, + lapack/slansy.f, lapack/slantr.f, lapack/slanv2.f, + lapack/slapy2.f, lapack/slapy3.f, lapack/slaqp2.f, + lapack/slaqps.f, lapack/slaqr0.f, lapack/slaqr1.f, + lapack/slaqr2.f, lapack/slaqr3.f, lapack/slaqr4.f, + lapack/slaqr5.f, lapack/slarf.f, lapack/slarfb.f, lapack/slarfg.f, + lapack/slarft.f, lapack/slarfx.f, lapack/slartg.f, lapack/slarz.f, + lapack/slarzb.f, lapack/slarzt.f, lapack/slas2.f, lapack/slascl.f, + lapack/slasd0.f, lapack/slasd1.f, lapack/slasd2.f, + lapack/slasd3.f, lapack/slasd4.f, lapack/slasd5.f, + lapack/slasd6.f, lapack/slasd7.f, lapack/slasd8.f, + lapack/slasda.f, lapack/slasdq.f, lapack/slasdt.f, + lapack/slaset.f, lapack/slasq1.f, lapack/slasq2.f, + lapack/slasq3.f, lapack/slasq4.f, lapack/slasq5.f, + lapack/slasq6.f, lapack/slasr.f, lapack/slasrt.f, lapack/slassq.f, + lapack/slasv2.f, lapack/slaswp.f, lapack/slasy2.f, + lapack/slatbs.f, lapack/slatrd.f, lapack/slatrs.f, + lapack/slatrz.f, lapack/slauu2.f, lapack/slauum.f, + lapack/slazq3.f, lapack/slazq4.f, lapack/sorg2l.f, + lapack/sorg2r.f, lapack/sorgbr.f, lapack/sorghr.f, + lapack/sorgl2.f, lapack/sorglq.f, lapack/sorgql.f, + lapack/sorgqr.f, lapack/sorgtr.f, lapack/sorm2r.f, + lapack/sormbr.f, lapack/sorml2.f, lapack/sormlq.f, + lapack/sormqr.f, lapack/sormr3.f, lapack/sormrz.f, + lapack/spbcon.f, lapack/spbtf2.f, lapack/spbtrf.f, + lapack/spbtrs.f, lapack/spocon.f, lapack/spotri.f, + lapack/spotrs.f, lapack/sptsv.f, lapack/spttrf.f, lapack/spttrs.f, + lapack/sptts2.f, lapack/srscl.f, lapack/ssteqr.f, lapack/ssterf.f, + lapack/ssyev.f, lapack/ssytd2.f, lapack/ssytrd.f, lapack/stgevc.f, + lapack/strcon.f, lapack/strevc.f, lapack/strexc.f, + lapack/strsen.f, lapack/strsyl.f, lapack/strti2.f, + lapack/strtri.f, lapack/strtrs.f, lapack/stzrzf.f, + lapack/scsum1.f: New files + * lapack/Makefile.in (FSRC): Add them. + + * misc/r1mach.f: New file + * misc/machar.cc: Modify to allow to be build twice, once for + double precision and once for single precision. + * misc/Makefile.in (FSRC): Add it. + (CEXTRA): Add smachar.c, and target for smachar.o + (MAKEDEPS): Include CEXTRA. + + * qrupdate/sch1up.f, qrupdate/cch1up.f, qrupdate/sqrinc.f, + qrupdate/cqrinc.f, qrupdate/sqrdec.f, qrupdate/cqrdec.f, + qrupdate/sqrinr.f, qrupdate/cqrinr.f, qrupdate/sqrder.f, + qrupdate/cqrder.f, qrupdate/sqrshc.f, qrupdate/cqrshc.f, + qrupdate/sqr1up.f, qrupdate/cqr1up.f, qrupdate/sch1dn.f, + qrupdate/cch1dn.f, qrupdate/schinx.f, qrupdate/cchinx.f, + qrupdate/schdex.f, qrupdate/cchdex.f, qrupdate/sqrqhu.f, + qrupdate/cqrqhu.f, qrupdate/sqrqhv.f, qrupdate/cqrqhv.f, + qrupdate/sqhqr.f, qrupdate/cqhqr.f: New files. + * qrupdate/Makefile.in (FSRC): Add them. + + * slatec-fn/acosh.f, slatec-fn/albeta.f, slatec-fn/algams.f, + slatec-fn/alngam.f, slatec-fn/alnrel.f, slatec-fn/asinh.f, + slatec-fn/atanh.f, slatec-fn/betai.f, slatec-fn/csevl.f, + slatec-fn/erf.f, slatec-fn/erfc.f, slatec-fn/gami.f, + slatec-fn/gamit.f, slatec-fn/gamlim.f, slatec-fn/gamma.f, + slatec-fn/gamr.f, slatec-fn/inits.f, slatec-fn/pchim.f, + slatec-fn/pchst.f, slatec-fn/r9gmit.f, slatec-fn/r9lgic.f, + slatec-fn/r9lgit.f, slatec-fn/r9lgmc.f, slatec-fn/xacosh.f, + slatec-fn/xasinh.f, slatec-fn/xatanh.f, slatec-fn/xbetai.f, + slatec-fn/xerf.f, slatec-fn/xerfc.f, slatec-fn/xgamma.f, + slatec-fn/xsgmainc.f: New files. + * slatec-fn/Makefile.in (FSRC): Add them. + 2008-04-20 Jaroslav Hajek * qrupdate/dch1dn.f, qrupdate/dchdex.f, qrupdate/dchinx.f, diff -r 45f5faba05a2 -r 82be108cc558 libcruft/Makefile.in --- a/libcruft/Makefile.in Wed May 14 18:09:56 2008 +0200 +++ b/libcruft/Makefile.in Sun Apr 27 22:34:17 2008 +0200 @@ -75,7 +75,7 @@ # FIXME -- this should build the shared library directly from # a normal archive file (created from PIC code, though). -MISC_OBJ := misc/machar.o misc/f77-extern.o \ +MISC_OBJ := misc/machar.o misc/smachar.o misc/f77-extern.o \ misc/f77-fcn.o misc/lo-error.o misc/quit.o misc/cquit.o CRUFT_FSRC := $(foreach dir, $(SUBDIRS), $(wildcard $(srcdir)/$(dir)/*.f)) diff -r 45f5faba05a2 -r 82be108cc558 libcruft/Makerules.in --- a/libcruft/Makerules.in Wed May 14 18:09:56 2008 +0200 +++ b/libcruft/Makerules.in Sun Apr 27 22:34:17 2008 +0200 @@ -29,7 +29,7 @@ DLL_CXXDEFS = @CRUFT_DLL_DEFS@ CRUFT_FSRC = $(addprefix $(srcdir)/, $(FSRC)) -CRUFT_CSRC = $(addprefix $(srcdir)/, $(CSRC)) +CRUFT_CSRC = $(addprefix $(srcdir)/, $(CSRC) $(CEXTRA)) CRUFT_CXXSRC = $(addprefix $(srcdir)/, $(CXXSRC)) CRUFT_SRC = $(CRUFT_FSRC) $(CRUFT_CSRC) $(CRUFT_CXXSRC) @@ -61,9 +61,9 @@ CRUFT_FPICOBJ := $(CRUFT_FOBJ) endif ifdef CPICFLAG - CRUFT_CPICOBJ := $(addprefix pic/, $(CRUFT_COBJ)) + CRUFT_CPICOBJ := $(addprefix pic/, $(CRUFT_COBJ) $(CEXTRA)) else - CRUFT_CPICOBJ := $(CRUFT_COBJ) + CRUFT_CPICOBJ := $(CRUFT_COBJ) $(CEXTRA) endif ifdef CXXPICFLAG CRUFT_CXXPICOBJ := $(addprefix pic/, $(CRUFT_CXXOBJ)) diff -r 45f5faba05a2 -r 82be108cc558 libcruft/amos/Makefile.in --- a/libcruft/amos/Makefile.in Wed May 14 18:09:56 2008 +0200 +++ b/libcruft/amos/Makefile.in Sun Apr 27 22:34:17 2008 +0200 @@ -26,7 +26,11 @@ EXTERNAL_DISTFILES = $(DISTFILES) -FSRC = dgamln.f xzabs.f xzexp.f xzlog.f xzsqrt.f zacai.f zacon.f \ +FSRC = cacai.f cacon.f cbesh.f cbesi.f cbesj.f cbesk.f cbesy.f cbinu.f \ + cbuni.f cbunk.f cunk1.f cunk2.f crati.f cshch.f cuni1.f \ + cuoik.f cairy.f cbiry.f ckscl.f cs1s2.f cuchk.f cuni2.f cwrsk.f \ + casyi.f cbknu.f cmlri.f cseri.f cunhj.f cunik.f dgamln.f gamln.f \ + xzabs.f xzexp.f xzlog.f xzsqrt.f zacai.f zacon.f \ zairy.f zasyi.f zbesh.f zbesi.f zbesj.f zbesk.f zbesy.f zbinu.f \ zbiry.f zbknu.f zbuni.f zbunk.f zdiv.f zkscl.f zmlri.f zmlt.f \ zrati.f zs1s2.f zseri.f zshch.f zuchk.f zunhj.f zuni1.f zuni2.f \ diff -r 45f5faba05a2 -r 82be108cc558 libcruft/amos/cacai.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/amos/cacai.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,90 @@ + SUBROUTINE CACAI(Z, FNU, KODE, MR, N, Y, NZ, RL, TOL, ELIM, ALIM) +C***BEGIN PROLOGUE CACAI +C***REFER TO CAIRY +C +C CACAI APPLIES THE ANALYTIC CONTINUATION FORMULA +C +C K(FNU,ZN*EXP(MP))=K(FNU,ZN)*EXP(-MP*FNU) - MP*I(FNU,ZN) +C MP=PI*MR*CMPLX(0.0,1.0) +C +C TO CONTINUE THE K FUNCTION FROM THE RIGHT HALF TO THE LEFT +C HALF Z PLANE FOR USE WITH CAIRY WHERE FNU=1/3 OR 2/3 AND N=1. +C CACAI IS THE SAME AS CACON WITH THE PARTS FOR LARGER ORDERS AND +C RECURRENCE REMOVED. A RECURSIVE CALL TO CACON CAN RESULT IF CACON +C IS CALLED FROM CAIRY. +C +C***ROUTINES CALLED CASYI,CBKNU,CMLRI,CSERI,CS1S2,R1MACH +C***END PROLOGUE CACAI + COMPLEX CSGN, CSPN, C1, C2, Y, Z, ZN, CY + REAL ALIM, ARG, ASCLE, AZ, CPN, DFNU, ELIM, FMR, FNU, PI, RL, + * SGN, SPN, TOL, YY, R1MACH + INTEGER INU, IUF, KODE, MR, N, NN, NW, NZ + DIMENSION Y(N), CY(2) + DATA PI / 3.14159265358979324E0 / + NZ = 0 + ZN = -Z + AZ = CABS(Z) + NN = N + DFNU = FNU + FLOAT(N-1) + IF (AZ.LE.2.0E0) GO TO 10 + IF (AZ*AZ*0.25E0.GT.DFNU+1.0E0) GO TO 20 + 10 CONTINUE +C----------------------------------------------------------------------- +C POWER SERIES FOR THE I FUNCTION +C----------------------------------------------------------------------- + CALL CSERI(ZN, FNU, KODE, NN, Y, NW, TOL, ELIM, ALIM) + GO TO 40 + 20 CONTINUE + IF (AZ.LT.RL) GO TO 30 +C----------------------------------------------------------------------- +C ASYMPTOTIC EXPANSION FOR LARGE Z FOR THE I FUNCTION +C----------------------------------------------------------------------- + CALL CASYI(ZN, FNU, KODE, NN, Y, NW, RL, TOL, ELIM, ALIM) + IF (NW.LT.0) GO TO 70 + GO TO 40 + 30 CONTINUE +C----------------------------------------------------------------------- +C MILLER ALGORITHM NORMALIZED BY THE SERIES FOR THE I FUNCTION +C----------------------------------------------------------------------- + CALL CMLRI(ZN, FNU, KODE, NN, Y, NW, TOL) + IF(NW.LT.0) GO TO 70 + 40 CONTINUE +C----------------------------------------------------------------------- +C ANALYTIC CONTINUATION TO THE LEFT HALF PLANE FOR THE K FUNCTION +C----------------------------------------------------------------------- + CALL CBKNU(ZN, FNU, KODE, 1, CY, NW, TOL, ELIM, ALIM) + IF (NW.NE.0) GO TO 70 + FMR = FLOAT(MR) + SGN = -SIGN(PI,FMR) + CSGN = CMPLX(0.0E0,SGN) + IF (KODE.EQ.1) GO TO 50 + YY = -AIMAG(ZN) + CPN = COS(YY) + SPN = SIN(YY) + CSGN = CSGN*CMPLX(CPN,SPN) + 50 CONTINUE +C----------------------------------------------------------------------- +C CALCULATE CSPN=EXP(FNU*PI*I) TO MINIMIZE LOSSES OF SIGNIFICANCE +C WHEN FNU IS LARGE +C----------------------------------------------------------------------- + INU = INT(FNU) + ARG = (FNU-FLOAT(INU))*SGN + CPN = COS(ARG) + SPN = SIN(ARG) + CSPN = CMPLX(CPN,SPN) + IF (MOD(INU,2).EQ.1) CSPN = -CSPN + C1 = CY(1) + C2 = Y(1) + IF (KODE.EQ.1) GO TO 60 + IUF = 0 + ASCLE = 1.0E+3*R1MACH(1)/TOL + CALL CS1S2(ZN, C1, C2, NW, ASCLE, ALIM, IUF) + NZ = NZ + NW + 60 CONTINUE + Y(1) = CSPN*C1 + CSGN*C2 + RETURN + 70 CONTINUE + NZ = -1 + IF(NW.EQ.(-2)) NZ=-2 + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/amos/cacon.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/amos/cacon.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,149 @@ + SUBROUTINE CACON(Z, FNU, KODE, MR, N, Y, NZ, RL, FNUL, TOL, ELIM, + * ALIM) +C***BEGIN PROLOGUE CACON +C***REFER TO CBESK,CBESH +C +C CACON APPLIES THE ANALYTIC CONTINUATION FORMULA +C +C K(FNU,ZN*EXP(MP))=K(FNU,ZN)*EXP(-MP*FNU) - MP*I(FNU,ZN) +C MP=PI*MR*CMPLX(0.0,1.0) +C +C TO CONTINUE THE K FUNCTION FROM THE RIGHT HALF TO THE LEFT +C HALF Z PLANE +C +C***ROUTINES CALLED CBINU,CBKNU,CS1S2,R1MACH +C***END PROLOGUE CACON + COMPLEX CK, CONE, CS, CSCL, CSCR, CSGN, CSPN, CSS, CSR, C1, C2, + * RZ, SC1, SC2, ST, S1, S2, Y, Z, ZN, CY + REAL ALIM, ARG, ASCLE, AS2, BSCLE, BRY, CPN, C1I, C1M, C1R, ELIM, + * FMR, FNU, FNUL, PI, RL, SGN, SPN, TOL, YY, R1MACH + INTEGER I, INU, IUF, KFLAG, KODE, MR, N, NN, NW, NZ + DIMENSION Y(N), CY(2), CSS(3), CSR(3), BRY(3) + DATA PI / 3.14159265358979324E0 / + DATA CONE / (1.0E0,0.0E0) / + NZ = 0 + ZN = -Z + NN = N + CALL CBINU(ZN, FNU, KODE, NN, Y, NW, RL, FNUL, TOL, ELIM, ALIM) + IF (NW.LT.0) GO TO 80 +C----------------------------------------------------------------------- +C ANALYTIC CONTINUATION TO THE LEFT HALF PLANE FOR THE K FUNCTION +C----------------------------------------------------------------------- + NN = MIN0(2,N) + CALL CBKNU(ZN, FNU, KODE, NN, CY, NW, TOL, ELIM, ALIM) + IF (NW.NE.0) GO TO 80 + S1 = CY(1) + FMR = FLOAT(MR) + SGN = -SIGN(PI,FMR) + CSGN = CMPLX(0.0E0,SGN) + IF (KODE.EQ.1) GO TO 10 + YY = -AIMAG(ZN) + CPN = COS(YY) + SPN = SIN(YY) + CSGN = CSGN*CMPLX(CPN,SPN) + 10 CONTINUE +C----------------------------------------------------------------------- +C CALCULATE CSPN=EXP(FNU*PI*I) TO MINIMIZE LOSSES OF SIGNIFICANCE +C WHEN FNU IS LARGE +C----------------------------------------------------------------------- + INU = INT(FNU) + ARG = (FNU-FLOAT(INU))*SGN + CPN = COS(ARG) + SPN = SIN(ARG) + CSPN = CMPLX(CPN,SPN) + IF (MOD(INU,2).EQ.1) CSPN = -CSPN + IUF = 0 + C1 = S1 + C2 = Y(1) + ASCLE = 1.0E+3*R1MACH(1)/TOL + IF (KODE.EQ.1) GO TO 20 + CALL CS1S2(ZN, C1, C2, NW, ASCLE, ALIM, IUF) + NZ = NZ + NW + SC1 = C1 + 20 CONTINUE + Y(1) = CSPN*C1 + CSGN*C2 + IF (N.EQ.1) RETURN + CSPN = -CSPN + S2 = CY(2) + C1 = S2 + C2 = Y(2) + IF (KODE.EQ.1) GO TO 30 + CALL CS1S2(ZN, C1, C2, NW, ASCLE, ALIM, IUF) + NZ = NZ + NW + SC2 = C1 + 30 CONTINUE + Y(2) = CSPN*C1 + CSGN*C2 + IF (N.EQ.2) RETURN + CSPN = -CSPN + RZ = CMPLX(2.0E0,0.0E0)/ZN + CK = CMPLX(FNU+1.0E0,0.0E0)*RZ +C----------------------------------------------------------------------- +C SCALE NEAR EXPONENT EXTREMES DURING RECURRENCE ON K FUNCTIONS +C----------------------------------------------------------------------- + CSCL = CMPLX(1.0E0/TOL,0.0E0) + CSCR = CMPLX(TOL,0.0E0) + CSS(1) = CSCL + CSS(2) = CONE + CSS(3) = CSCR + CSR(1) = CSCR + CSR(2) = CONE + CSR(3) = CSCL + BRY(1) = ASCLE + BRY(2) = 1.0E0/ASCLE + BRY(3) = R1MACH(2) + AS2 = CABS(S2) + KFLAG = 2 + IF (AS2.GT.BRY(1)) GO TO 40 + KFLAG = 1 + GO TO 50 + 40 CONTINUE + IF (AS2.LT.BRY(2)) GO TO 50 + KFLAG = 3 + 50 CONTINUE + BSCLE = BRY(KFLAG) + S1 = S1*CSS(KFLAG) + S2 = S2*CSS(KFLAG) + CS = CSR(KFLAG) + DO 70 I=3,N + ST = S2 + S2 = CK*S2 + S1 + S1 = ST + C1 = S2*CS + ST = C1 + C2 = Y(I) + IF (KODE.EQ.1) GO TO 60 + IF (IUF.LT.0) GO TO 60 + CALL CS1S2(ZN, C1, C2, NW, ASCLE, ALIM, IUF) + NZ = NZ + NW + SC1 = SC2 + SC2 = C1 + IF (IUF.NE.3) GO TO 60 + IUF = -4 + S1 = SC1*CSS(KFLAG) + S2 = SC2*CSS(KFLAG) + ST = SC2 + 60 CONTINUE + Y(I) = CSPN*C1 + CSGN*C2 + CK = CK + RZ + CSPN = -CSPN + IF (KFLAG.GE.3) GO TO 70 + C1R = REAL(C1) + C1I = AIMAG(C1) + C1R = ABS(C1R) + C1I = ABS(C1I) + C1M = AMAX1(C1R,C1I) + IF (C1M.LE.BSCLE) GO TO 70 + KFLAG = KFLAG + 1 + BSCLE = BRY(KFLAG) + S1 = S1*CS + S2 = ST + S1 = S1*CSS(KFLAG) + S2 = S2*CSS(KFLAG) + CS = CSR(KFLAG) + 70 CONTINUE + RETURN + 80 CONTINUE + NZ = -1 + IF(NW.EQ.(-2)) NZ=-2 + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/amos/cairy.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/amos/cairy.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,336 @@ + SUBROUTINE CAIRY(Z, ID, KODE, AI, NZ, IERR) +C***BEGIN PROLOGUE CAIRY +C***DATE WRITTEN 830501 (YYMMDD) +C***REVISION DATE 890801 (YYMMDD) +C***CATEGORY NO. B5K +C***KEYWORDS AIRY FUNCTION,BESSEL FUNCTIONS OF ORDER ONE THIRD +C***AUTHOR AMOS, DONALD E., SANDIA NATIONAL LABORATORIES +C***PURPOSE TO COMPUTE AIRY FUNCTIONS AI(Z) AND DAI(Z) FOR COMPLEX Z +C***DESCRIPTION +C +C ON KODE=1, CAIRY COMPUTES THE COMPLEX AIRY FUNCTION AI(Z) OR +C ITS DERIVATIVE DAI(Z)/DZ ON ID=0 OR ID=1 RESPECTIVELY. ON +C KODE=2, A SCALING OPTION CEXP(ZTA)*AI(Z) OR CEXP(ZTA)* +C DAI(Z)/DZ IS PROVIDED TO REMOVE THE EXPONENTIAL DECAY IN +C -PI/3.LT.ARG(Z).LT.PI/3 AND THE EXPONENTIAL GROWTH IN +C PI/3.LT.ABS(ARG(Z)).LT.PI WHERE ZTA=(2/3)*Z*CSQRT(Z) +C +C WHILE THE AIRY FUNCTIONS AI(Z) AND DAI(Z)/DZ ARE ANALYTIC IN +C THE WHOLE Z PLANE, THE CORRESPONDING SCALED FUNCTIONS DEFINED +C FOR KODE=2 HAVE A CUT ALONG THE NEGATIVE REAL AXIS. +C DEFINITIONS AND NOTATION ARE FOUND IN THE NBS HANDBOOK OF +C MATHEMATICAL FUNCTIONS (REF. 1). +C +C INPUT +C Z - Z=CMPLX(X,Y) +C ID - ORDER OF DERIVATIVE, ID=0 OR ID=1 +C KODE - A PARAMETER TO INDICATE THE SCALING OPTION +C KODE= 1 RETURNS +C AI=AI(Z) ON ID=0 OR +C AI=DAI(Z)/DZ ON ID=1 +C = 2 RETURNS +C AI=CEXP(ZTA)*AI(Z) ON ID=0 OR +C AI=CEXP(ZTA)*DAI(Z)/DZ ON ID=1 WHERE +C ZTA=(2/3)*Z*CSQRT(Z) +C +C OUTPUT +C AI - COMPLEX ANSWER DEPENDING ON THE CHOICES FOR ID AND +C KODE +C NZ - UNDERFLOW INDICATOR +C NZ= 0 , NORMAL RETURN +C NZ= 1 , AI=CMPLX(0.0,0.0) DUE TO UNDERFLOW IN +C -PI/3.LT.ARG(Z).LT.PI/3 ON KODE=1 +C IERR - ERROR FLAG +C IERR=0, NORMAL RETURN - COMPUTATION COMPLETED +C IERR=1, INPUT ERROR - NO COMPUTATION +C IERR=2, OVERFLOW - NO COMPUTATION, REAL(ZTA) +C TOO LARGE WITH KODE=1. +C IERR=3, CABS(Z) LARGE - COMPUTATION COMPLETED +C LOSSES OF SIGNIFCANCE BY ARGUMENT REDUCTION +C PRODUCE LESS THAN HALF OF MACHINE ACCURACY +C IERR=4, CABS(Z) TOO LARGE - NO COMPUTATION +C COMPLETE LOSS OF ACCURACY BY ARGUMENT +C REDUCTION +C IERR=5, ERROR - NO COMPUTATION, +C ALGORITHM TERMINATION CONDITION NOT MET +C +C +C***LONG DESCRIPTION +C +C AI AND DAI ARE COMPUTED FOR CABS(Z).GT.1.0 FROM THE K BESSEL +C FUNCTIONS BY +C +C AI(Z)=C*SQRT(Z)*K(1/3,ZTA) , DAI(Z)=-C*Z*K(2/3,ZTA) +C C=1.0/(PI*SQRT(3.0)) +C ZTA=(2/3)*Z**(3/2) +C +C WITH THE POWER SERIES FOR CABS(Z).LE.1.0. +C +C IN MOST COMPLEX VARIABLE COMPUTATION, ONE MUST EVALUATE ELE- +C MENTARY FUNCTIONS. WHEN THE MAGNITUDE OF Z IS LARGE, LOSSES +C OF SIGNIFICANCE BY ARGUMENT REDUCTION OCCUR. CONSEQUENTLY, IF +C THE MAGNITUDE OF ZETA=(2/3)*Z**1.5 EXCEEDS U1=SQRT(0.5/UR), +C THEN LOSSES EXCEEDING HALF PRECISION ARE LIKELY AND AN ERROR +C FLAG IERR=3 IS TRIGGERED WHERE UR=R1MACH(4)=UNIT ROUNDOFF. +C ALSO, IF THE MAGNITUDE OF ZETA IS LARGER THAN U2=0.5/UR, THEN +C ALL SIGNIFICANCE IS LOST AND IERR=4. IN ORDER TO USE THE INT +C FUNCTION, ZETA MUST BE FURTHER RESTRICTED NOT TO EXCEED THE +C LARGEST INTEGER, U3=I1MACH(9). THUS, THE MAGNITUDE OF ZETA +C MUST BE RESTRICTED BY MIN(U2,U3). ON 32 BIT MACHINES, U1,U2, +C AND U3 ARE APPROXIMATELY 2.0E+3, 4.2E+6, 2.1E+9 IN SINGLE +C PRECISION ARITHMETIC AND 1.3E+8, 1.8E+16, 2.1E+9 IN DOUBLE +C PRECISION ARITHMETIC RESPECTIVELY. THIS MAKES U2 AND U3 LIMIT- +C ING IN THEIR RESPECTIVE ARITHMETICS. THIS MEANS THAT THE MAG- +C NITUDE OF Z CANNOT EXCEED 3.1E+4 IN SINGLE AND 2.1E+6 IN +C DOUBLE PRECISION ARITHMETIC. THIS ALSO MEANS THAT ONE CAN +C EXPECT TO RETAIN, IN THE WORST CASES ON 32 BIT MACHINES, +C NO DIGITS IN SINGLE PRECISION AND ONLY 7 DIGITS IN DOUBLE +C PRECISION ARITHMETIC. SIMILAR CONSIDERATIONS HOLD FOR OTHER +C MACHINES. +C +C THE APPROXIMATE RELATIVE ERROR IN THE MAGNITUDE OF A COMPLEX +C BESSEL FUNCTION CAN BE EXPRESSED BY P*10**S WHERE P=MAX(UNIT +C ROUNDOFF,1.0E-18) IS THE NOMINAL PRECISION AND 10**S REPRE- +C SENTS THE INCREASE IN ERROR DUE TO ARGUMENT REDUCTION IN THE +C ELEMENTARY FUNCTIONS. HERE, S=MAX(1,ABS(LOG10(CABS(Z))), +C ABS(LOG10(FNU))) APPROXIMATELY (I.E. S=MAX(1,ABS(EXPONENT OF +C CABS(Z),ABS(EXPONENT OF FNU)) ). HOWEVER, THE PHASE ANGLE MAY +C HAVE ONLY ABSOLUTE ACCURACY. THIS IS MOST LIKELY TO OCCUR WHEN +C ONE COMPONENT (IN ABSOLUTE VALUE) IS LARGER THAN THE OTHER BY +C SEVERAL ORDERS OF MAGNITUDE. IF ONE COMPONENT IS 10**K LARGER +C THAN THE OTHER, THEN ONE CAN EXPECT ONLY MAX(ABS(LOG10(P))-K, +C 0) SIGNIFICANT DIGITS; OR, STATED ANOTHER WAY, WHEN K EXCEEDS +C THE EXPONENT OF P, NO SIGNIFICANT DIGITS REMAIN IN THE SMALLER +C COMPONENT. HOWEVER, THE PHASE ANGLE RETAINS ABSOLUTE ACCURACY +C BECAUSE, IN COMPLEX ARITHMETIC WITH PRECISION P, THE SMALLER +C COMPONENT WILL NOT (AS A RULE) DECREASE BELOW P TIMES THE +C MAGNITUDE OF THE LARGER COMPONENT. IN THESE EXTREME CASES, +C THE PRINCIPAL PHASE ANGLE IS ON THE ORDER OF +P, -P, PI/2-P, +C OR -PI/2+P. +C +C***REFERENCES HANDBOOK OF MATHEMATICAL FUNCTIONS BY M. ABRAMOWITZ +C AND I. A. STEGUN, NBS AMS SERIES 55, U.S. DEPT. OF +C COMMERCE, 1955. +C +C COMPUTATION OF BESSEL FUNCTIONS OF COMPLEX ARGUMENT +C AND LARGE ORDER BY D. E. AMOS, SAND83-0643, MAY, 1983 +C +C A SUBROUTINE PACKAGE FOR BESSEL FUNCTIONS OF A COMPLEX +C ARGUMENT AND NONNEGATIVE ORDER BY D. E. AMOS, SAND85- +C 1018, MAY, 1985 +C +C A PORTABLE PACKAGE FOR BESSEL FUNCTIONS OF A COMPLEX +C ARGUMENT AND NONNEGATIVE ORDER BY D. E. AMOS, TRANS. +C MATH. SOFTWARE, 1986 +C +C***ROUTINES CALLED CACAI,CBKNU,I1MACH,R1MACH +C***END PROLOGUE CAIRY + COMPLEX AI, CONE, CSQ, CY, S1, S2, TRM1, TRM2, Z, ZTA, Z3 + REAL AA, AD, AK, ALIM, ATRM, AZ, AZ3, BK, CK, COEF, C1, C2, DIG, + * DK, D1, D2, ELIM, FID, FNU, RL, R1M5, SFAC, TOL, TTH, ZI, ZR, + * Z3I, Z3R, R1MACH, BB, ALAZ + INTEGER ID, IERR, IFLAG, K, KODE, K1, K2, MR, NN, NZ, I1MACH + DIMENSION CY(1) + DATA TTH, C1, C2, COEF /6.66666666666666667E-01, + * 3.55028053887817240E-01,2.58819403792806799E-01, + * 1.83776298473930683E-01/ + DATA CONE / (1.0E0,0.0E0) / +C***FIRST EXECUTABLE STATEMENT CAIRY + IERR = 0 + NZ=0 + IF (ID.LT.0 .OR. ID.GT.1) IERR=1 + IF (KODE.LT.1 .OR. KODE.GT.2) IERR=1 + IF (IERR.NE.0) RETURN + AZ = CABS(Z) + TOL = AMAX1(R1MACH(4),1.0E-18) + FID = FLOAT(ID) + IF (AZ.GT.1.0E0) GO TO 60 +C----------------------------------------------------------------------- +C POWER SERIES FOR CABS(Z).LE.1. +C----------------------------------------------------------------------- + S1 = CONE + S2 = CONE + IF (AZ.LT.TOL) GO TO 160 + AA = AZ*AZ + IF (AA.LT.TOL/AZ) GO TO 40 + TRM1 = CONE + TRM2 = CONE + ATRM = 1.0E0 + Z3 = Z*Z*Z + AZ3 = AZ*AA + AK = 2.0E0 + FID + BK = 3.0E0 - FID - FID + CK = 4.0E0 - FID + DK = 3.0E0 + FID + FID + D1 = AK*DK + D2 = BK*CK + AD = AMIN1(D1,D2) + AK = 24.0E0 + 9.0E0*FID + BK = 30.0E0 - 9.0E0*FID + Z3R = REAL(Z3) + Z3I = AIMAG(Z3) + DO 30 K=1,25 + TRM1 = TRM1*CMPLX(Z3R/D1,Z3I/D1) + S1 = S1 + TRM1 + TRM2 = TRM2*CMPLX(Z3R/D2,Z3I/D2) + S2 = S2 + TRM2 + ATRM = ATRM*AZ3/AD + D1 = D1 + AK + D2 = D2 + BK + AD = AMIN1(D1,D2) + IF (ATRM.LT.TOL*AD) GO TO 40 + AK = AK + 18.0E0 + BK = BK + 18.0E0 + 30 CONTINUE + 40 CONTINUE + IF (ID.EQ.1) GO TO 50 + AI = S1*CMPLX(C1,0.0E0) - Z*S2*CMPLX(C2,0.0E0) + IF (KODE.EQ.1) RETURN + ZTA = Z*CSQRT(Z)*CMPLX(TTH,0.0E0) + AI = AI*CEXP(ZTA) + RETURN + 50 CONTINUE + AI = -S2*CMPLX(C2,0.0E0) + IF (AZ.GT.TOL) AI = AI + Z*Z*S1*CMPLX(C1/(1.0E0+FID),0.0E0) + IF (KODE.EQ.1) RETURN + ZTA = Z*CSQRT(Z)*CMPLX(TTH,0.0E0) + AI = AI*CEXP(ZTA) + RETURN +C----------------------------------------------------------------------- +C CASE FOR CABS(Z).GT.1.0 +C----------------------------------------------------------------------- + 60 CONTINUE + FNU = (1.0E0+FID)/3.0E0 +C----------------------------------------------------------------------- +C SET PARAMETERS RELATED TO MACHINE CONSTANTS. +C TOL IS THE APPROXIMATE UNIT ROUNDOFF LIMITED TO 1.0E-18. +C ELIM IS THE APPROXIMATE EXPONENTIAL OVER- AND UNDERFLOW LIMIT. +C EXP(-ELIM).LT.EXP(-ALIM)=EXP(-ELIM)/TOL AND +C EXP(ELIM).GT.EXP(ALIM)=EXP(ELIM)*TOL ARE INTERVALS NEAR +C UNDERFLOW AND OVERFLOW LIMITS WHERE SCALED ARITHMETIC IS DONE. +C RL IS THE LOWER BOUNDARY OF THE ASYMPTOTIC EXPANSION FOR LARGE Z. +C DIG = NUMBER OF BASE 10 DIGITS IN TOL = 10**(-DIG). +C----------------------------------------------------------------------- + K1 = I1MACH(12) + K2 = I1MACH(13) + R1M5 = R1MACH(5) + K = MIN0(IABS(K1),IABS(K2)) + ELIM = 2.303E0*(FLOAT(K)*R1M5-3.0E0) + K1 = I1MACH(11) - 1 + AA = R1M5*FLOAT(K1) + DIG = AMIN1(AA,18.0E0) + AA = AA*2.303E0 + ALIM = ELIM + AMAX1(-AA,-41.45E0) + RL = 1.2E0*DIG + 3.0E0 + ALAZ=ALOG(AZ) +C----------------------------------------------------------------------- +C TEST FOR RANGE +C----------------------------------------------------------------------- + AA=0.5E0/TOL + BB=FLOAT(I1MACH(9))*0.5E0 + AA=AMIN1(AA,BB) + AA=AA**TTH + IF (AZ.GT.AA) GO TO 260 + AA=SQRT(AA) + IF (AZ.GT.AA) IERR=3 + CSQ=CSQRT(Z) + ZTA=Z*CSQ*CMPLX(TTH,0.0E0) +C----------------------------------------------------------------------- +C RE(ZTA).LE.0 WHEN RE(Z).LT.0, ESPECIALLY WHEN IM(Z) IS SMALL +C----------------------------------------------------------------------- + IFLAG = 0 + SFAC = 1.0E0 + ZI = AIMAG(Z) + ZR = REAL(Z) + AK = AIMAG(ZTA) + IF (ZR.GE.0.0E0) GO TO 70 + BK = REAL(ZTA) + CK = -ABS(BK) + ZTA = CMPLX(CK,AK) + 70 CONTINUE + IF (ZI.NE.0.0E0) GO TO 80 + IF (ZR.GT.0.0E0) GO TO 80 + ZTA = CMPLX(0.0E0,AK) + 80 CONTINUE + AA = REAL(ZTA) + IF (AA.GE.0.0E0 .AND. ZR.GT.0.0E0) GO TO 100 + IF (KODE.EQ.2) GO TO 90 +C----------------------------------------------------------------------- +C OVERFLOW TEST +C----------------------------------------------------------------------- + IF (AA.GT.(-ALIM)) GO TO 90 + AA = -AA + 0.25E0*ALAZ + IFLAG = 1 + SFAC = TOL + IF (AA.GT.ELIM) GO TO 240 + 90 CONTINUE +C----------------------------------------------------------------------- +C CBKNU AND CACAI RETURN EXP(ZTA)*K(FNU,ZTA) ON KODE=2 +C----------------------------------------------------------------------- + MR = 1 + IF (ZI.LT.0.0E0) MR = -1 + CALL CACAI(ZTA, FNU, KODE, MR, 1, CY, NN, RL, TOL, ELIM, ALIM) + IF (NN.LT.0) GO TO 250 + NZ = NZ + NN + GO TO 120 + 100 CONTINUE + IF (KODE.EQ.2) GO TO 110 +C----------------------------------------------------------------------- +C UNDERFLOW TEST +C----------------------------------------------------------------------- + IF (AA.LT.ALIM) GO TO 110 + AA = -AA - 0.25E0*ALAZ + IFLAG = 2 + SFAC = 1.0E0/TOL + IF (AA.LT.(-ELIM)) GO TO 180 + 110 CONTINUE + CALL CBKNU(ZTA, FNU, KODE, 1, CY, NZ, TOL, ELIM, ALIM) + 120 CONTINUE + S1 = CY(1)*CMPLX(COEF,0.0E0) + IF (IFLAG.NE.0) GO TO 140 + IF (ID.EQ.1) GO TO 130 + AI = CSQ*S1 + RETURN + 130 AI = -Z*S1 + RETURN + 140 CONTINUE + S1 = S1*CMPLX(SFAC,0.0E0) + IF (ID.EQ.1) GO TO 150 + S1 = S1*CSQ + AI = S1*CMPLX(1.0E0/SFAC,0.0E0) + RETURN + 150 CONTINUE + S1 = -S1*Z + AI = S1*CMPLX(1.0E0/SFAC,0.0E0) + RETURN + 160 CONTINUE + AA = 1.0E+3*R1MACH(1) + S1 = CMPLX(0.0E0,0.0E0) + IF (ID.EQ.1) GO TO 170 + IF (AZ.GT.AA) S1 = CMPLX(C2,0.0E0)*Z + AI = CMPLX(C1,0.0E0) - S1 + RETURN + 170 CONTINUE + AI = -CMPLX(C2,0.0E0) + AA = SQRT(AA) + IF (AZ.GT.AA) S1 = Z*Z*CMPLX(0.5E0,0.0E0) + AI = AI + S1*CMPLX(C1,0.0E0) + RETURN + 180 CONTINUE + NZ = 1 + AI = CMPLX(0.0E0,0.0E0) + RETURN + 240 CONTINUE + NZ = 0 + IERR=2 + RETURN + 250 CONTINUE + IF(NN.EQ.(-1)) GO TO 240 + NZ=0 + IERR=5 + RETURN + 260 CONTINUE + IERR=4 + NZ=0 + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/amos/casyi.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/amos/casyi.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,126 @@ + SUBROUTINE CASYI(Z, FNU, KODE, N, Y, NZ, RL, TOL, ELIM, ALIM) +C***BEGIN PROLOGUE CASYI +C***REFER TO CBESI,CBESK +C +C CASYI COMPUTES THE I BESSEL FUNCTION FOR REAL(Z).GE.0.0 BY +C MEANS OF THE ASYMPTOTIC EXPANSION FOR LARGE CABS(Z) IN THE +C REGION CABS(Z).GT.MAX(RL,FNU*FNU/2). NZ=0 IS A NORMAL RETURN. +C NZ.LT.0 INDICATES AN OVERFLOW ON KODE=1. +C +C***ROUTINES CALLED R1MACH +C***END PROLOGUE CASYI + COMPLEX AK1, CK, CONE, CS1, CS2, CZ, CZERO, DK, EZ, P1, RZ, S2, + * Y, Z + REAL AA, ACZ, AEZ, AK, ALIM, ARG, ARM, ATOL, AZ, BB, BK, DFNU, + * DNU2, ELIM, FDN, FNU, PI, RL, RTPI, RTR1, S, SGN, SQK, TOL, X, + * YY, R1MACH + INTEGER I, IB, IL, INU, J, JL, K, KODE, KODED, M, N, NN, NZ + DIMENSION Y(N) + DATA PI, RTPI /3.14159265358979324E0 , 0.159154943091895336E0 / + DATA CZERO, CONE / (0.0E0,0.0E0), (1.0E0,0.0E0) / +C + NZ = 0 + AZ = CABS(Z) + X = REAL(Z) + ARM = 1.0E+3*R1MACH(1) + RTR1 = SQRT(ARM) + IL = MIN0(2,N) + DFNU = FNU + FLOAT(N-IL) +C----------------------------------------------------------------------- +C OVERFLOW TEST +C----------------------------------------------------------------------- + AK1 = CMPLX(RTPI,0.0E0)/Z + AK1 = CSQRT(AK1) + CZ = Z + IF (KODE.EQ.2) CZ = Z - CMPLX(X,0.0E0) + ACZ = REAL(CZ) + IF (ABS(ACZ).GT.ELIM) GO TO 80 + DNU2 = DFNU + DFNU + KODED = 1 + IF ((ABS(ACZ).GT.ALIM) .AND. (N.GT.2)) GO TO 10 + KODED = 0 + AK1 = AK1*CEXP(CZ) + 10 CONTINUE + FDN = 0.0E0 + IF (DNU2.GT.RTR1) FDN = DNU2*DNU2 + EZ = Z*CMPLX(8.0E0,0.0E0) +C----------------------------------------------------------------------- +C WHEN Z IS IMAGINARY, THE ERROR TEST MUST BE MADE RELATIVE TO THE +C FIRST RECIPROCAL POWER SINCE THIS IS THE LEADING TERM OF THE +C EXPANSION FOR THE IMAGINARY PART. +C----------------------------------------------------------------------- + AEZ = 8.0E0*AZ + S = TOL/AEZ + JL = INT(RL+RL) + 2 + YY = AIMAG(Z) + P1 = CZERO + IF (YY.EQ.0.0E0) GO TO 20 +C----------------------------------------------------------------------- +C CALCULATE EXP(PI*(0.5+FNU+N-IL)*I) TO MINIMIZE LOSSES OF +C SIGNIFICANCE WHEN FNU OR N IS LARGE +C----------------------------------------------------------------------- + INU = INT(FNU) + ARG = (FNU-FLOAT(INU))*PI + INU = INU + N - IL + AK = -SIN(ARG) + BK = COS(ARG) + IF (YY.LT.0.0E0) BK = -BK + P1 = CMPLX(AK,BK) + IF (MOD(INU,2).EQ.1) P1 = -P1 + 20 CONTINUE + DO 50 K=1,IL + SQK = FDN - 1.0E0 + ATOL = S*ABS(SQK) + SGN = 1.0E0 + CS1 = CONE + CS2 = CONE + CK = CONE + AK = 0.0E0 + AA = 1.0E0 + BB = AEZ + DK = EZ + DO 30 J=1,JL + CK = CK*CMPLX(SQK,0.0E0)/DK + CS2 = CS2 + CK + SGN = -SGN + CS1 = CS1 + CK*CMPLX(SGN,0.0E0) + DK = DK + EZ + AA = AA*ABS(SQK)/BB + BB = BB + AEZ + AK = AK + 8.0E0 + SQK = SQK - AK + IF (AA.LE.ATOL) GO TO 40 + 30 CONTINUE + GO TO 90 + 40 CONTINUE + S2 = CS1 + IF (X+X.LT.ELIM) S2 = S2 + P1*CS2*CEXP(-Z-Z) + FDN = FDN + 8.0E0*DFNU + 4.0E0 + P1 = -P1 + M = N - IL + K + Y(M) = S2*AK1 + 50 CONTINUE + IF (N.LE.2) RETURN + NN = N + K = NN - 2 + AK = FLOAT(K) + RZ = (CONE+CONE)/Z + IB = 3 + DO 60 I=IB,NN + Y(K) = CMPLX(AK+FNU,0.0E0)*RZ*Y(K+1) + Y(K+2) + AK = AK - 1.0E0 + K = K - 1 + 60 CONTINUE + IF (KODED.EQ.0) RETURN + CK = CEXP(CZ) + DO 70 I=1,NN + Y(I) = Y(I)*CK + 70 CONTINUE + RETURN + 80 CONTINUE + NZ = -1 + RETURN + 90 CONTINUE + NZ=-2 + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/amos/cbesh.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/amos/cbesh.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,331 @@ + SUBROUTINE CBESH(Z, FNU, KODE, M, N, CY, NZ, IERR) +C***BEGIN PROLOGUE CBESH +C***DATE WRITTEN 830501 (YYMMDD) +C***REVISION DATE 890801 (YYMMDD) +C***CATEGORY NO. B5K +C***KEYWORDS H-BESSEL FUNCTIONS,BESSEL FUNCTIONS OF COMPLEX ARGUMENT, +C BESSEL FUNCTIONS OF THIRD KIND,HANKEL FUNCTIONS +C***AUTHOR AMOS, DONALD E., SANDIA NATIONAL LABORATORIES +C***PURPOSE TO COMPUTE THE H-BESSEL FUNCTIONS OF A COMPLEX ARGUMENT +C***DESCRIPTION +C +C ON KODE=1, CBESH COMPUTES AN N MEMBER SEQUENCE OF COMPLEX +C HANKEL (BESSEL) FUNCTIONS CY(J)=H(M,FNU+J-1,Z) FOR KINDS M=1 +C OR 2, REAL, NONNEGATIVE ORDERS FNU+J-1, J=1,...,N, AND COMPLEX +C Z.NE.CMPLX(0.0E0,0.0E0) IN THE CUT PLANE -PI.LT.ARG(Z).LE.PI. +C ON KODE=2, CBESH COMPUTES THE SCALED HANKEL FUNCTIONS +C +C CY(I)=H(M,FNU+J-1,Z)*EXP(-MM*Z*I) MM=3-2M, I**2=-1. +C +C WHICH REMOVES THE EXPONENTIAL BEHAVIOR IN BOTH THE UPPER +C AND LOWER HALF PLANES. DEFINITIONS AND NOTATION ARE FOUND IN +C THE NBS HANDBOOK OF MATHEMATICAL FUNCTIONS (REF. 1). +C +C INPUT +C Z - Z=CMPLX(X,Y), Z.NE.CMPLX(0.,0.),-PI.LT.ARG(Z).LE.PI +C FNU - ORDER OF INITIAL H FUNCTION, FNU.GE.0.0E0 +C KODE - A PARAMETER TO INDICATE THE SCALING OPTION +C KODE= 1 RETURNS +C CY(J)=H(M,FNU+J-1,Z), J=1,...,N +C = 2 RETURNS +C CY(J)=H(M,FNU+J-1,Z)*EXP(-I*Z*(3-2M)) +C J=1,...,N , I**2=-1 +C M - KIND OF HANKEL FUNCTION, M=1 OR 2 +C N - NUMBER OF MEMBERS OF THE SEQUENCE, N.GE.1 +C +C OUTPUT +C CY - A COMPLEX VECTOR WHOSE FIRST N COMPONENTS CONTAIN +C VALUES FOR THE SEQUENCE +C CY(J)=H(M,FNU+J-1,Z) OR +C CY(J)=H(M,FNU+J-1,Z)*EXP(-I*Z*(3-2M)) J=1,...,N +C DEPENDING ON KODE, I**2=-1. +C NZ - NUMBER OF COMPONENTS SET TO ZERO DUE TO UNDERFLOW, +C NZ= 0 , NORMAL RETURN +C NZ.GT.0 , FIRST NZ COMPONENTS OF CY SET TO ZERO +C DUE TO UNDERFLOW, CY(J)=CMPLX(0.0,0.0) +C J=1,...,NZ WHEN Y.GT.0.0 AND M=1 OR +C Y.LT.0.0 AND M=2. FOR THE COMPLMENTARY +C HALF PLANES, NZ STATES ONLY THE NUMBER +C OF UNDERFLOWS. +C IERR -ERROR FLAG +C IERR=0, NORMAL RETURN - COMPUTATION COMPLETED +C IERR=1, INPUT ERROR - NO COMPUTATION +C IERR=2, OVERFLOW - NO COMPUTATION, FNU+N-1 TOO +C LARGE OR CABS(Z) TOO SMALL OR BOTH +C IERR=3, CABS(Z) OR FNU+N-1 LARGE - COMPUTATION DONE +C BUT LOSSES OF SIGNIFCANCE BY ARGUMENT +C REDUCTION PRODUCE LESS THAN HALF OF MACHINE +C ACCURACY +C IERR=4, CABS(Z) OR FNU+N-1 TOO LARGE - NO COMPUTA- +C TION BECAUSE OF COMPLETE LOSSES OF SIGNIFI- +C CANCE BY ARGUMENT REDUCTION +C IERR=5, ERROR - NO COMPUTATION, +C ALGORITHM TERMINATION CONDITION NOT MET +C +C***LONG DESCRIPTION +C +C THE COMPUTATION IS CARRIED OUT BY THE RELATION +C +C H(M,FNU,Z)=(1/MP)*EXP(-MP*FNU)*K(FNU,Z*EXP(-MP)) +C MP=MM*HPI*I, MM=3-2*M, HPI=PI/2, I**2=-1 +C +C FOR M=1 OR 2 WHERE THE K BESSEL FUNCTION IS COMPUTED FOR THE +C RIGHT HALF PLANE RE(Z).GE.0.0. THE K FUNCTION IS CONTINUED +C TO THE LEFT HALF PLANE BY THE RELATION +C +C K(FNU,Z*EXP(MP)) = EXP(-MP*FNU)*K(FNU,Z)-MP*I(FNU,Z) +C MP=MR*PI*I, MR=+1 OR -1, RE(Z).GT.0, I**2=-1 +C +C WHERE I(FNU,Z) IS THE I BESSEL FUNCTION. +C +C EXPONENTIAL DECAY OF H(M,FNU,Z) OCCURS IN THE UPPER HALF Z +C PLANE FOR M=1 AND THE LOWER HALF Z PLANE FOR M=2. EXPONENTIAL +C GROWTH OCCURS IN THE COMPLEMENTARY HALF PLANES. SCALING +C BY EXP(-MM*Z*I) REMOVES THE EXPONENTIAL BEHAVIOR IN THE +C WHOLE Z PLANE FOR Z TO INFINITY. +C +C FOR NEGATIVE ORDERS,THE FORMULAE +C +C H(1,-FNU,Z) = H(1,FNU,Z)*CEXP( PI*FNU*I) +C H(2,-FNU,Z) = H(2,FNU,Z)*CEXP(-PI*FNU*I) +C I**2=-1 +C +C CAN BE USED. +C +C IN MOST COMPLEX VARIABLE COMPUTATION, ONE MUST EVALUATE ELE- +C MENTARY FUNCTIONS. WHEN THE MAGNITUDE OF Z OR FNU+N-1 IS +C LARGE, LOSSES OF SIGNIFICANCE BY ARGUMENT REDUCTION OCCUR. +C CONSEQUENTLY, IF EITHER ONE EXCEEDS U1=SQRT(0.5/UR), THEN +C LOSSES EXCEEDING HALF PRECISION ARE LIKELY AND AN ERROR FLAG +C IERR=3 IS TRIGGERED WHERE UR=R1MACH(4)=UNIT ROUNDOFF. ALSO +C IF EITHER IS LARGER THAN U2=0.5/UR, THEN ALL SIGNIFICANCE IS +C LOST AND IERR=4. IN ORDER TO USE THE INT FUNCTION, ARGUMENTS +C MUST BE FURTHER RESTRICTED NOT TO EXCEED THE LARGEST MACHINE +C INTEGER, U3=I1MACH(9). THUS, THE MAGNITUDE OF Z AND FNU+N-1 IS +C RESTRICTED BY MIN(U2,U3). ON 32 BIT MACHINES, U1,U2, AND U3 +C ARE APPROXIMATELY 2.0E+3, 4.2E+6, 2.1E+9 IN SINGLE PRECISION +C ARITHMETIC AND 1.3E+8, 1.8E+16, 2.1E+9 IN DOUBLE PRECISION +C ARITHMETIC RESPECTIVELY. THIS MAKES U2 AND U3 LIMITING IN +C THEIR RESPECTIVE ARITHMETICS. THIS MEANS THAT ONE CAN EXPECT +C TO RETAIN, IN THE WORST CASES ON 32 BIT MACHINES, NO DIGITS +C IN SINGLE AND ONLY 7 DIGITS IN DOUBLE PRECISION ARITHMETIC. +C SIMILAR CONSIDERATIONS HOLD FOR OTHER MACHINES. +C +C THE APPROXIMATE RELATIVE ERROR IN THE MAGNITUDE OF A COMPLEX +C BESSEL FUNCTION CAN BE EXPRESSED BY P*10**S WHERE P=MAX(UNIT +C ROUNDOFF,1.0E-18) IS THE NOMINAL PRECISION AND 10**S REPRE- +C SENTS THE INCREASE IN ERROR DUE TO ARGUMENT REDUCTION IN THE +C ELEMENTARY FUNCTIONS. HERE, S=MAX(1,ABS(LOG10(CABS(Z))), +C ABS(LOG10(FNU))) APPROXIMATELY (I.E. S=MAX(1,ABS(EXPONENT OF +C CABS(Z),ABS(EXPONENT OF FNU)) ). HOWEVER, THE PHASE ANGLE MAY +C HAVE ONLY ABSOLUTE ACCURACY. THIS IS MOST LIKELY TO OCCUR WHEN +C ONE COMPONENT (IN ABSOLUTE VALUE) IS LARGER THAN THE OTHER BY +C SEVERAL ORDERS OF MAGNITUDE. IF ONE COMPONENT IS 10**K LARGER +C THAN THE OTHER, THEN ONE CAN EXPECT ONLY MAX(ABS(LOG10(P))-K, +C 0) SIGNIFICANT DIGITS; OR, STATED ANOTHER WAY, WHEN K EXCEEDS +C THE EXPONENT OF P, NO SIGNIFICANT DIGITS REMAIN IN THE SMALLER +C COMPONENT. HOWEVER, THE PHASE ANGLE RETAINS ABSOLUTE ACCURACY +C BECAUSE, IN COMPLEX ARITHMETIC WITH PRECISION P, THE SMALLER +C COMPONENT WILL NOT (AS A RULE) DECREASE BELOW P TIMES THE +C MAGNITUDE OF THE LARGER COMPONENT. IN THESE EXTREME CASES, +C THE PRINCIPAL PHASE ANGLE IS ON THE ORDER OF +P, -P, PI/2-P, +C OR -PI/2+P. +C +C***REFERENCES HANDBOOK OF MATHEMATICAL FUNCTIONS BY M. ABRAMOWITZ +C AND I. A. STEGUN, NBS AMS SERIES 55, U.S. DEPT. OF +C COMMERCE, 1955. +C +C COMPUTATION OF BESSEL FUNCTIONS OF COMPLEX ARGUMENT +C BY D. E. AMOS, SAND83-0083, MAY, 1983. +C +C COMPUTATION OF BESSEL FUNCTIONS OF COMPLEX ARGUMENT +C AND LARGE ORDER BY D. E. AMOS, SAND83-0643, MAY, 1983 +C +C A SUBROUTINE PACKAGE FOR BESSEL FUNCTIONS OF A COMPLEX +C ARGUMENT AND NONNEGATIVE ORDER BY D. E. AMOS, SAND85- +C 1018, MAY, 1985 +C +C A PORTABLE PACKAGE FOR BESSEL FUNCTIONS OF A COMPLEX +C ARGUMENT AND NONNEGATIVE ORDER BY D. E. AMOS, TRANS. +C MATH. SOFTWARE, 1986 +C +C***ROUTINES CALLED CACON,CBKNU,CBUNK,CUOIK,I1MACH,R1MACH +C***END PROLOGUE CBESH +C + COMPLEX CY, Z, ZN, ZT, CSGN + REAL AA, ALIM, ALN, ARG, AZ, CPN, DIG, ELIM, FMM, FN, FNU, FNUL, + * HPI, RHPI, RL, R1M5, SGN, SPN, TOL, UFL, XN, XX, YN, YY, R1MACH, + * BB, ASCLE, RTOL, ATOL + INTEGER I, IERR, INU, INUH, IR, K, KODE, K1, K2, M, + * MM, MR, N, NN, NUF, NW, NZ, I1MACH + DIMENSION CY(N) +C + DATA HPI /1.57079632679489662E0/ +C +C***FIRST EXECUTABLE STATEMENT CBESH + NZ=0 + XX = REAL(Z) + YY = AIMAG(Z) + IERR = 0 + IF (XX.EQ.0.0E0 .AND. YY.EQ.0.0E0) IERR=1 + IF (FNU.LT.0.0E0) IERR=1 + IF (M.LT.1 .OR. M.GT.2) IERR=1 + IF (KODE.LT.1 .OR. KODE.GT.2) IERR=1 + IF (N.LT.1) IERR=1 + IF (IERR.NE.0) RETURN + NN = N +C----------------------------------------------------------------------- +C SET PARAMETERS RELATED TO MACHINE CONSTANTS. +C TOL IS THE APPROXIMATE UNIT ROUNDOFF LIMITED TO 1.0E-18. +C ELIM IS THE APPROXIMATE EXPONENTIAL OVER- AND UNDERFLOW LIMIT. +C EXP(-ELIM).LT.EXP(-ALIM)=EXP(-ELIM)/TOL AND +C EXP(ELIM).GT.EXP(ALIM)=EXP(ELIM)*TOL ARE INTERVALS NEAR +C UNDERFLOW AND OVERFLOW LIMITS WHERE SCALED ARITHMETIC IS DONE. +C RL IS THE LOWER BOUNDARY OF THE ASYMPTOTIC EXPANSION FOR LARGE Z. +C DIG = NUMBER OF BASE 10 DIGITS IN TOL = 10**(-DIG). +C FNUL IS THE LOWER BOUNDARY OF THE ASYMPTOTIC SERIES FOR LARGE FNU +C----------------------------------------------------------------------- + TOL = AMAX1(R1MACH(4),1.0E-18) + K1 = I1MACH(12) + K2 = I1MACH(13) + R1M5 = R1MACH(5) + K = MIN0(IABS(K1),IABS(K2)) + ELIM = 2.303E0*(FLOAT(K)*R1M5-3.0E0) + K1 = I1MACH(11) - 1 + AA = R1M5*FLOAT(K1) + DIG = AMIN1(AA,18.0E0) + AA = AA*2.303E0 + ALIM = ELIM + AMAX1(-AA,-41.45E0) + FNUL = 10.0E0 + 6.0E0*(DIG-3.0E0) + RL = 1.2E0*DIG + 3.0E0 + FN = FNU + FLOAT(NN-1) + MM = 3 - M - M + FMM = FLOAT(MM) + ZN = Z*CMPLX(0.0E0,-FMM) + XN = REAL(ZN) + YN = AIMAG(ZN) + AZ = CABS(Z) +C----------------------------------------------------------------------- +C TEST FOR RANGE +C----------------------------------------------------------------------- + AA = 0.5E0/TOL + BB=FLOAT(I1MACH(9))*0.5E0 + AA=AMIN1(AA,BB) + IF(AZ.GT.AA) GO TO 240 + IF(FN.GT.AA) GO TO 240 + AA=SQRT(AA) + IF(AZ.GT.AA) IERR=3 + IF(FN.GT.AA) IERR=3 +C----------------------------------------------------------------------- +C OVERFLOW TEST ON THE LAST MEMBER OF THE SEQUENCE +C----------------------------------------------------------------------- + UFL = R1MACH(1)*1.0E+3 + IF (AZ.LT.UFL) GO TO 220 + IF (FNU.GT.FNUL) GO TO 90 + IF (FN.LE.1.0E0) GO TO 70 + IF (FN.GT.2.0E0) GO TO 60 + IF (AZ.GT.TOL) GO TO 70 + ARG = 0.5E0*AZ + ALN = -FN*ALOG(ARG) + IF (ALN.GT.ELIM) GO TO 220 + GO TO 70 + 60 CONTINUE + CALL CUOIK(ZN, FNU, KODE, 2, NN, CY, NUF, TOL, ELIM, ALIM) + IF (NUF.LT.0) GO TO 220 + NZ = NZ + NUF + NN = NN - NUF +C----------------------------------------------------------------------- +C HERE NN=N OR NN=0 SINCE NUF=0,NN, OR -1 ON RETURN FROM CUOIK +C IF NUF=NN, THEN CY(I)=CZERO FOR ALL I +C----------------------------------------------------------------------- + IF (NN.EQ.0) GO TO 130 + 70 CONTINUE + IF ((XN.LT.0.0E0) .OR. (XN.EQ.0.0E0 .AND. YN.LT.0.0E0 .AND. + * M.EQ.2)) GO TO 80 +C----------------------------------------------------------------------- +C RIGHT HALF PLANE COMPUTATION, XN.GE.0. .AND. (XN.NE.0. .OR. +C YN.GE.0. .OR. M=1) +C----------------------------------------------------------------------- + CALL CBKNU(ZN, FNU, KODE, NN, CY, NZ, TOL, ELIM, ALIM) + GO TO 110 +C----------------------------------------------------------------------- +C LEFT HALF PLANE COMPUTATION +C----------------------------------------------------------------------- + 80 CONTINUE + MR = -MM + CALL CACON(ZN, FNU, KODE, MR, NN, CY, NW, RL, FNUL, TOL, ELIM, + * ALIM) + IF (NW.LT.0) GO TO 230 + NZ=NW + GO TO 110 + 90 CONTINUE +C----------------------------------------------------------------------- +C UNIFORM ASYMPTOTIC EXPANSIONS FOR FNU.GT.FNUL +C----------------------------------------------------------------------- + MR = 0 + IF ((XN.GE.0.0E0) .AND. (XN.NE.0.0E0 .OR. YN.GE.0.0E0 .OR. + * M.NE.2)) GO TO 100 + MR = -MM + IF (XN.EQ.0.0E0 .AND. YN.LT.0.0E0) ZN = -ZN + 100 CONTINUE + CALL CBUNK(ZN, FNU, KODE, MR, NN, CY, NW, TOL, ELIM, ALIM) + IF (NW.LT.0) GO TO 230 + NZ = NZ + NW + 110 CONTINUE +C----------------------------------------------------------------------- +C H(M,FNU,Z) = -FMM*(I/HPI)*(ZT**FNU)*K(FNU,-Z*ZT) +C +C ZT=EXP(-FMM*HPI*I) = CMPLX(0.0,-FMM), FMM=3-2*M, M=1,2 +C----------------------------------------------------------------------- + SGN = SIGN(HPI,-FMM) +C----------------------------------------------------------------------- +C CALCULATE EXP(FNU*HPI*I) TO MINIMIZE LOSSES OF SIGNIFICANCE +C WHEN FNU IS LARGE +C----------------------------------------------------------------------- + INU = INT(FNU) + INUH = INU/2 + IR = INU - 2*INUH + ARG = (FNU-FLOAT(INU-IR))*SGN + RHPI = 1.0E0/SGN + CPN = RHPI*COS(ARG) + SPN = RHPI*SIN(ARG) +C ZN = CMPLX(-SPN,CPN) + CSGN = CMPLX(-SPN,CPN) +C IF (MOD(INUH,2).EQ.1) ZN = -ZN + IF (MOD(INUH,2).EQ.1) CSGN = -CSGN + ZT = CMPLX(0.0E0,-FMM) + RTOL = 1.0E0/TOL + ASCLE = UFL*RTOL + DO 120 I=1,NN +C CY(I) = CY(I)*ZN +C ZN = ZN*ZT + ZN=CY(I) + AA=REAL(ZN) + BB=AIMAG(ZN) + ATOL=1.0E0 + IF (AMAX1(ABS(AA),ABS(BB)).GT.ASCLE) GO TO 125 + ZN = ZN*CMPLX(RTOL,0.0E0) + ATOL = TOL + 125 CONTINUE + ZN = ZN*CSGN + CY(I) = ZN*CMPLX(ATOL,0.0E0) + CSGN = CSGN*ZT + 120 CONTINUE + RETURN + 130 CONTINUE + IF (XN.LT.0.0E0) GO TO 220 + RETURN + 220 CONTINUE + IERR=2 + NZ=0 + RETURN + 230 CONTINUE + IF(NW.EQ.(-1)) GO TO 220 + NZ=0 + IERR=5 + RETURN + 240 CONTINUE + NZ=0 + IERR=4 + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/amos/cbesi.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/amos/cbesi.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,258 @@ + SUBROUTINE CBESI(Z, FNU, KODE, N, CY, NZ, IERR) +C***BEGIN PROLOGUE CBESI +C***DATE WRITTEN 830501 (YYMMDD) +C***REVISION DATE 890801 (YYMMDD) +C***CATEGORY NO. B5K +C***KEYWORDS I-BESSEL FUNCTION,COMPLEX BESSEL FUNCTION, +C MODIFIED BESSEL FUNCTION OF THE FIRST KIND +C***AUTHOR AMOS, DONALD E., SANDIA NATIONAL LABORATORIES +C***PURPOSE TO COMPUTE I-BESSEL FUNCTIONS OF COMPLEX ARGUMENT +C***DESCRIPTION +C +C ON KODE=1, CBESI COMPUTES AN N MEMBER SEQUENCE OF COMPLEX +C BESSEL FUNCTIONS CY(J)=I(FNU+J-1,Z) FOR REAL, NONNEGATIVE +C ORDERS FNU+J-1, J=1,...,N AND COMPLEX Z IN THE CUT PLANE +C -PI.LT.ARG(Z).LE.PI. ON KODE=2, CBESI RETURNS THE SCALED +C FUNCTIONS +C +C CY(J)=EXP(-ABS(X))*I(FNU+J-1,Z) J = 1,...,N , X=REAL(Z) +C +C WITH THE EXPONENTIAL GROWTH REMOVED IN BOTH THE LEFT AND +C RIGHT HALF PLANES FOR Z TO INFINITY. DEFINITIONS AND +C NOTATION ARE FOUND IN THE NBS HANDBOOK OF MATHEMATICAL +C FUNCTIONS (REF.1) +C +C INPUT +C Z - Z=CMPLX(X,Y), -PI.LT.ARG(Z).LE.PI +C FNU - ORDER OF INITIAL I FUNCTION, FNU.GE.0.0E0 +C KODE - A PARAMETER TO INDICATE THE SCALING OPTION +C KODE= 1 RETURNS +C CY(J)=I(FNU+J-1,Z), J=1,...,N +C = 2 RETURNS +C CY(J)=I(FNU+J-1,Z)*EXP(-ABS(X)), J=1,...,N +C N - NUMBER OF MEMBERS OF THE SEQUENCE, N.GE.1 +C +C OUTPUT +C CY - A COMPLEX VECTOR WHOSE FIRST N COMPONENTS CONTAIN +C VALUES FOR THE SEQUENCE +C CY(J)=I(FNU+J-1,Z) OR +C CY(J)=I(FNU+J-1,Z)*EXP(-ABS(X)) J=1,...,N +C DEPENDING ON KODE, X=REAL(Z) +C NZ - NUMBER OF COMPONENTS SET TO ZERO DUE TO UNDERFLOW, +C NZ= 0 , NORMAL RETURN +C NZ.GT.0 , LAST NZ COMPONENTS OF CY SET TO ZERO +C DUE TO UNDERFLOW, CY(J)=CMPLX(0.0,0.0), +C J = N-NZ+1,...,N +C IERR - ERROR FLAG +C IERR=0, NORMAL RETURN - COMPUTATION COMPLETED +C IERR=1, INPUT ERROR - NO COMPUTATION +C IERR=2, OVERFLOW - NO COMPUTATION, REAL(Z) TOO +C LARGE ON KODE=1 +C IERR=3, CABS(Z) OR FNU+N-1 LARGE - COMPUTATION DONE +C BUT LOSSES OF SIGNIFCANCE BY ARGUMENT +C REDUCTION PRODUCE LESS THAN HALF OF MACHINE +C ACCURACY +C IERR=4, CABS(Z) OR FNU+N-1 TOO LARGE - NO COMPUTA- +C TION BECAUSE OF COMPLETE LOSSES OF SIGNIFI- +C CANCE BY ARGUMENT REDUCTION +C IERR=5, ERROR - NO COMPUTATION, +C ALGORITHM TERMINATION CONDITION NOT MET +C +C***LONG DESCRIPTION +C +C THE COMPUTATION IS CARRIED OUT BY THE POWER SERIES FOR +C SMALL CABS(Z), THE ASYMPTOTIC EXPANSION FOR LARGE CABS(Z), +C THE MILLER ALGORITHM NORMALIZED BY THE WRONSKIAN AND A +C NEUMANN SERIES FOR IMTERMEDIATE MAGNITUDES, AND THE +C UNIFORM ASYMPTOTIC EXPANSIONS FOR I(FNU,Z) AND J(FNU,Z) +C FOR LARGE ORDERS. BACKWARD RECURRENCE IS USED TO GENERATE +C SEQUENCES OR REDUCE ORDERS WHEN NECESSARY. +C +C THE CALCULATIONS ABOVE ARE DONE IN THE RIGHT HALF PLANE AND +C CONTINUED INTO THE LEFT HALF PLANE BY THE FORMULA +C +C I(FNU,Z*EXP(M*PI)) = EXP(M*PI*FNU)*I(FNU,Z) REAL(Z).GT.0.0 +C M = +I OR -I, I**2=-1 +C +C FOR NEGATIVE ORDERS,THE FORMULA +C +C I(-FNU,Z) = I(FNU,Z) + (2/PI)*SIN(PI*FNU)*K(FNU,Z) +C +C CAN BE USED. HOWEVER,FOR LARGE ORDERS CLOSE TO INTEGERS, THE +C THE FUNCTION CHANGES RADICALLY. WHEN FNU IS A LARGE POSITIVE +C INTEGER,THE MAGNITUDE OF I(-FNU,Z)=I(FNU,Z) IS A LARGE +C NEGATIVE POWER OF TEN. BUT WHEN FNU IS NOT AN INTEGER, +C K(FNU,Z) DOMINATES IN MAGNITUDE WITH A LARGE POSITIVE POWER OF +C TEN AND THE MOST THAT THE SECOND TERM CAN BE REDUCED IS BY +C UNIT ROUNDOFF FROM THE COEFFICIENT. THUS, WIDE CHANGES CAN +C OCCUR WITHIN UNIT ROUNDOFF OF A LARGE INTEGER FOR FNU. HERE, +C LARGE MEANS FNU.GT.CABS(Z). +C +C IN MOST COMPLEX VARIABLE COMPUTATION, ONE MUST EVALUATE ELE- +C MENTARY FUNCTIONS. WHEN THE MAGNITUDE OF Z OR FNU+N-1 IS +C LARGE, LOSSES OF SIGNIFICANCE BY ARGUMENT REDUCTION OCCUR. +C CONSEQUENTLY, IF EITHER ONE EXCEEDS U1=SQRT(0.5/UR), THEN +C LOSSES EXCEEDING HALF PRECISION ARE LIKELY AND AN ERROR FLAG +C IERR=3 IS TRIGGERED WHERE UR=R1MACH(4)=UNIT ROUNDOFF. ALSO +C IF EITHER IS LARGER THAN U2=0.5/UR, THEN ALL SIGNIFICANCE IS +C LOST AND IERR=4. IN ORDER TO USE THE INT FUNCTION, ARGUMENTS +C MUST BE FURTHER RESTRICTED NOT TO EXCEED THE LARGEST MACHINE +C INTEGER, U3=I1MACH(9). THUS, THE MAGNITUDE OF Z AND FNU+N-1 IS +C RESTRICTED BY MIN(U2,U3). ON 32 BIT MACHINES, U1,U2, AND U3 +C ARE APPROXIMATELY 2.0E+3, 4.2E+6, 2.1E+9 IN SINGLE PRECISION +C ARITHMETIC AND 1.3E+8, 1.8E+16, 2.1E+9 IN DOUBLE PRECISION +C ARITHMETIC RESPECTIVELY. THIS MAKES U2 AND U3 LIMITING IN +C THEIR RESPECTIVE ARITHMETICS. THIS MEANS THAT ONE CAN EXPECT +C TO RETAIN, IN THE WORST CASES ON 32 BIT MACHINES, NO DIGITS +C IN SINGLE AND ONLY 7 DIGITS IN DOUBLE PRECISION ARITHMETIC. +C SIMILAR CONSIDERATIONS HOLD FOR OTHER MACHINES. +C +C THE APPROXIMATE RELATIVE ERROR IN THE MAGNITUDE OF A COMPLEX +C BESSEL FUNCTION CAN BE EXPRESSED BY P*10**S WHERE P=MAX(UNIT +C ROUNDOFF,1.0E-18) IS THE NOMINAL PRECISION AND 10**S REPRE- +C SENTS THE INCREASE IN ERROR DUE TO ARGUMENT REDUCTION IN THE +C ELEMENTARY FUNCTIONS. HERE, S=MAX(1,ABS(LOG10(CABS(Z))), +C ABS(LOG10(FNU))) APPROXIMATELY (I.E. S=MAX(1,ABS(EXPONENT OF +C CABS(Z),ABS(EXPONENT OF FNU)) ). HOWEVER, THE PHASE ANGLE MAY +C HAVE ONLY ABSOLUTE ACCURACY. THIS IS MOST LIKELY TO OCCUR WHEN +C ONE COMPONENT (IN ABSOLUTE VALUE) IS LARGER THAN THE OTHER BY +C SEVERAL ORDERS OF MAGNITUDE. IF ONE COMPONENT IS 10**K LARGER +C THAN THE OTHER, THEN ONE CAN EXPECT ONLY MAX(ABS(LOG10(P))-K, +C 0) SIGNIFICANT DIGITS; OR, STATED ANOTHER WAY, WHEN K EXCEEDS +C THE EXPONENT OF P, NO SIGNIFICANT DIGITS REMAIN IN THE SMALLER +C COMPONENT. HOWEVER, THE PHASE ANGLE RETAINS ABSOLUTE ACCURACY +C BECAUSE, IN COMPLEX ARITHMETIC WITH PRECISION P, THE SMALLER +C COMPONENT WILL NOT (AS A RULE) DECREASE BELOW P TIMES THE +C MAGNITUDE OF THE LARGER COMPONENT. IN THESE EXTREME CASES, +C THE PRINCIPAL PHASE ANGLE IS ON THE ORDER OF +P, -P, PI/2-P, +C OR -PI/2+P. +C +C***REFERENCES HANDBOOK OF MATHEMATICAL FUNCTIONS BY M. ABRAMOWITZ +C AND I. A. STEGUN, NBS AMS SERIES 55, U.S. DEPT. OF +C COMMERCE, 1955. +C +C COMPUTATION OF BESSEL FUNCTIONS OF COMPLEX ARGUMENT +C BY D. E. AMOS, SAND83-0083, MAY, 1983. +C +C COMPUTATION OF BESSEL FUNCTIONS OF COMPLEX ARGUMENT +C AND LARGE ORDER BY D. E. AMOS, SAND83-0643, MAY, 1983 +C +C A SUBROUTINE PACKAGE FOR BESSEL FUNCTIONS OF A COMPLEX +C ARGUMENT AND NONNEGATIVE ORDER BY D. E. AMOS, SAND85- +C 1018, MAY, 1985 +C +C A PORTABLE PACKAGE FOR BESSEL FUNCTIONS OF A COMPLEX +C ARGUMENT AND NONNEGATIVE ORDER BY D. E. AMOS, TRANS. +C MATH. SOFTWARE, 1986 +C +C***ROUTINES CALLED CBINU,I1MACH,R1MACH +C***END PROLOGUE CBESI + COMPLEX CONE, CSGN, CY, Z, ZN + REAL AA, ALIM, ARG, DIG, ELIM, FNU, FNUL, PI, RL, R1M5, S1, S2, + * TOL, XX, YY, R1MACH, AZ, FN, BB, ASCLE, RTOL, ATOL + INTEGER I, IERR, INU, K, KODE, K1, K2, N, NN, NZ, I1MACH + DIMENSION CY(N) + DATA PI /3.14159265358979324E0/ + DATA CONE / (1.0E0,0.0E0) / +C +C***FIRST EXECUTABLE STATEMENT CBESI + IERR = 0 + NZ=0 + IF (FNU.LT.0.0E0) IERR=1 + IF (KODE.LT.1 .OR. KODE.GT.2) IERR=1 + IF (N.LT.1) IERR=1 + IF (IERR.NE.0) RETURN + XX = REAL(Z) + YY = AIMAG(Z) +C----------------------------------------------------------------------- +C SET PARAMETERS RELATED TO MACHINE CONSTANTS. +C TOL IS THE APPROXIMATE UNIT ROUNDOFF LIMITED TO 1.0E-18. +C ELIM IS THE APPROXIMATE EXPONENTIAL OVER- AND UNDERFLOW LIMIT. +C EXP(-ELIM).LT.EXP(-ALIM)=EXP(-ELIM)/TOL AND +C EXP(ELIM).GT.EXP(ALIM)=EXP(ELIM)*TOL ARE INTERVALS NEAR +C UNDERFLOW AND OVERFLOW LIMITS WHERE SCALED ARITHMETIC IS DONE. +C RL IS THE LOWER BOUNDARY OF THE ASYMPTOTIC EXPANSION FOR LARGE Z. +C DIG = NUMBER OF BASE 10 DIGITS IN TOL = 10**(-DIG). +C FNUL IS THE LOWER BOUNDARY OF THE ASYMPTOTIC SERIES FOR LARGE FNU. +C----------------------------------------------------------------------- + TOL = AMAX1(R1MACH(4),1.0E-18) + K1 = I1MACH(12) + K2 = I1MACH(13) + R1M5 = R1MACH(5) + K = MIN0(IABS(K1),IABS(K2)) + ELIM = 2.303E0*(FLOAT(K)*R1M5-3.0E0) + K1 = I1MACH(11) - 1 + AA = R1M5*FLOAT(K1) + DIG = AMIN1(AA,18.0E0) + AA = AA*2.303E0 + ALIM = ELIM + AMAX1(-AA,-41.45E0) + RL = 1.2E0*DIG + 3.0E0 + FNUL = 10.0E0 + 6.0E0*(DIG-3.0E0) + AZ = CABS(Z) +C----------------------------------------------------------------------- +C TEST FOR RANGE +C----------------------------------------------------------------------- + AA = 0.5E0/TOL + BB=FLOAT(I1MACH(9))*0.5E0 + AA=AMIN1(AA,BB) + IF(AZ.GT.AA) GO TO 140 + FN=FNU+FLOAT(N-1) + IF(FN.GT.AA) GO TO 140 + AA=SQRT(AA) + IF(AZ.GT.AA) IERR=3 + IF(FN.GT.AA) IERR=3 + ZN = Z + CSGN = CONE + IF (XX.GE.0.0E0) GO TO 40 + ZN = -Z +C----------------------------------------------------------------------- +C CALCULATE CSGN=EXP(FNU*PI*I) TO MINIMIZE LOSSES OF SIGNIFICANCE +C WHEN FNU IS LARGE +C----------------------------------------------------------------------- + INU = INT(FNU) + ARG = (FNU-FLOAT(INU))*PI + IF (YY.LT.0.0E0) ARG = -ARG + S1 = COS(ARG) + S2 = SIN(ARG) + CSGN = CMPLX(S1,S2) + IF (MOD(INU,2).EQ.1) CSGN = -CSGN + 40 CONTINUE + CALL CBINU(ZN, FNU, KODE, N, CY, NZ, RL, FNUL, TOL, ELIM, ALIM) + IF (NZ.LT.0) GO TO 120 + IF (XX.GE.0.0E0) RETURN +C----------------------------------------------------------------------- +C ANALYTIC CONTINUATION TO THE LEFT HALF PLANE +C----------------------------------------------------------------------- + NN = N - NZ + IF (NN.EQ.0) RETURN + RTOL = 1.0E0/TOL + ASCLE = R1MACH(1)*RTOL*1.0E+3 + DO 50 I=1,NN +C CY(I) = CY(I)*CSGN + ZN=CY(I) + AA=REAL(ZN) + BB=AIMAG(ZN) + ATOL=1.0E0 + IF (AMAX1(ABS(AA),ABS(BB)).GT.ASCLE) GO TO 55 + ZN = ZN*CMPLX(RTOL,0.0E0) + ATOL = TOL + 55 CONTINUE + ZN = ZN*CSGN + CY(I) = ZN*CMPLX(ATOL,0.0E0) + CSGN = -CSGN + 50 CONTINUE + RETURN + 120 CONTINUE + IF(NZ.EQ.(-2)) GO TO 130 + NZ = 0 + IERR=2 + RETURN + 130 CONTINUE + NZ=0 + IERR=5 + RETURN + 140 CONTINUE + NZ=0 + IERR=4 + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/amos/cbesj.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/amos/cbesj.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,253 @@ + SUBROUTINE CBESJ(Z, FNU, KODE, N, CY, NZ, IERR) +C***BEGIN PROLOGUE CBESJ +C***DATE WRITTEN 830501 (YYMMDD) +C***REVISION DATE 890801 (YYMMDD) +C***CATEGORY NO. B5K +C***KEYWORDS J-BESSEL FUNCTION,BESSEL FUNCTION OF COMPLEX ARGUMENT, +C BESSEL FUNCTION OF FIRST KIND +C***AUTHOR AMOS, DONALD E., SANDIA NATIONAL LABORATORIES +C***PURPOSE TO COMPUTE THE J-BESSEL FUNCTION OF A COMPLEX ARGUMENT +C***DESCRIPTION +C +C ON KODE=1, CBESJ COMPUTES AN N MEMBER SEQUENCE OF COMPLEX +C BESSEL FUNCTIONS CY(I)=J(FNU+I-1,Z) FOR REAL, NONNEGATIVE +C ORDERS FNU+I-1, I=1,...,N AND COMPLEX Z IN THE CUT PLANE +C -PI.LT.ARG(Z).LE.PI. ON KODE=2, CBESJ RETURNS THE SCALED +C FUNCTIONS +C +C CY(I)=EXP(-ABS(Y))*J(FNU+I-1,Z) I = 1,...,N , Y=AIMAG(Z) +C +C WHICH REMOVE THE EXPONENTIAL GROWTH IN BOTH THE UPPER AND +C LOWER HALF PLANES FOR Z TO INFINITY. DEFINITIONS AND NOTATION +C ARE FOUND IN THE NBS HANDBOOK OF MATHEMATICAL FUNCTIONS +C (REF. 1). +C +C INPUT +C Z - Z=CMPLX(X,Y), -PI.LT.ARG(Z).LE.PI +C FNU - ORDER OF INITIAL J FUNCTION, FNU.GE.0.0E0 +C KODE - A PARAMETER TO INDICATE THE SCALING OPTION +C KODE= 1 RETURNS +C CY(I)=J(FNU+I-1,Z), I=1,...,N +C = 2 RETURNS +C CY(I)=J(FNU+I-1,Z)*EXP(-ABS(Y)), I=1,... +C N - NUMBER OF MEMBERS OF THE SEQUENCE, N.GE.1 +C +C OUTPUT +C CY - A COMPLEX VECTOR WHOSE FIRST N COMPONENTS CONTAIN +C VALUES FOR THE SEQUENCE +C CY(I)=J(FNU+I-1,Z) OR +C CY(I)=J(FNU+I-1,Z)*EXP(-ABS(Y)) I=1,...,N +C DEPENDING ON KODE, Y=AIMAG(Z). +C NZ - NUMBER OF COMPONENTS SET TO ZERO DUE TO UNDERFLOW, +C NZ= 0 , NORMAL RETURN +C NZ.GT.0 , LAST NZ COMPONENTS OF CY SET TO ZERO +C DUE TO UNDERFLOW, CY(I)=CMPLX(0.0,0.0), +C I = N-NZ+1,...,N +C IERR - ERROR FLAG +C IERR=0, NORMAL RETURN - COMPUTATION COMPLETED +C IERR=1, INPUT ERROR - NO COMPUTATION +C IERR=2, OVERFLOW - NO COMPUTATION, AIMAG(Z) +C TOO LARGE ON KODE=1 +C IERR=3, CABS(Z) OR FNU+N-1 LARGE - COMPUTATION DONE +C BUT LOSSES OF SIGNIFCANCE BY ARGUMENT +C REDUCTION PRODUCE LESS THAN HALF OF MACHINE +C ACCURACY +C IERR=4, CABS(Z) OR FNU+N-1 TOO LARGE - NO COMPUTA- +C TION BECAUSE OF COMPLETE LOSSES OF SIGNIFI- +C CANCE BY ARGUMENT REDUCTION +C IERR=5, ERROR - NO COMPUTATION, +C ALGORITHM TERMINATION CONDITION NOT MET +C +C***LONG DESCRIPTION +C +C THE COMPUTATION IS CARRIED OUT BY THE FORMULA +C +C J(FNU,Z)=EXP( FNU*PI*I/2)*I(FNU,-I*Z) AIMAG(Z).GE.0.0 +C +C J(FNU,Z)=EXP(-FNU*PI*I/2)*I(FNU, I*Z) AIMAG(Z).LT.0.0 +C +C WHERE I**2 = -1 AND I(FNU,Z) IS THE I BESSEL FUNCTION. +C +C FOR NEGATIVE ORDERS,THE FORMULA +C +C J(-FNU,Z) = J(FNU,Z)*COS(PI*FNU) - Y(FNU,Z)*SIN(PI*FNU) +C +C CAN BE USED. HOWEVER,FOR LARGE ORDERS CLOSE TO INTEGERS, THE +C THE FUNCTION CHANGES RADICALLY. WHEN FNU IS A LARGE POSITIVE +C INTEGER,THE MAGNITUDE OF J(-FNU,Z)=J(FNU,Z)*COS(PI*FNU) IS A +C LARGE NEGATIVE POWER OF TEN. BUT WHEN FNU IS NOT AN INTEGER, +C Y(FNU,Z) DOMINATES IN MAGNITUDE WITH A LARGE POSITIVE POWER OF +C TEN AND THE MOST THAT THE SECOND TERM CAN BE REDUCED IS BY +C UNIT ROUNDOFF FROM THE COEFFICIENT. THUS, WIDE CHANGES CAN +C OCCUR WITHIN UNIT ROUNDOFF OF A LARGE INTEGER FOR FNU. HERE, +C LARGE MEANS FNU.GT.CABS(Z). +C +C IN MOST COMPLEX VARIABLE COMPUTATION, ONE MUST EVALUATE ELE- +C MENTARY FUNCTIONS. WHEN THE MAGNITUDE OF Z OR FNU+N-1 IS +C LARGE, LOSSES OF SIGNIFICANCE BY ARGUMENT REDUCTION OCCUR. +C CONSEQUENTLY, IF EITHER ONE EXCEEDS U1=SQRT(0.5/UR), THEN +C LOSSES EXCEEDING HALF PRECISION ARE LIKELY AND AN ERROR FLAG +C IERR=3 IS TRIGGERED WHERE UR=R1MACH(4)=UNIT ROUNDOFF. ALSO +C IF EITHER IS LARGER THAN U2=0.5/UR, THEN ALL SIGNIFICANCE IS +C LOST AND IERR=4. IN ORDER TO USE THE INT FUNCTION, ARGUMENTS +C MUST BE FURTHER RESTRICTED NOT TO EXCEED THE LARGEST MACHINE +C INTEGER, U3=I1MACH(9). THUS, THE MAGNITUDE OF Z AND FNU+N-1 IS +C RESTRICTED BY MIN(U2,U3). ON 32 BIT MACHINES, U1,U2, AND U3 +C ARE APPROXIMATELY 2.0E+3, 4.2E+6, 2.1E+9 IN SINGLE PRECISION +C ARITHMETIC AND 1.3E+8, 1.8E+16, 2.1E+9 IN DOUBLE PRECISION +C ARITHMETIC RESPECTIVELY. THIS MAKES U2 AND U3 LIMITING IN +C THEIR RESPECTIVE ARITHMETICS. THIS MEANS THAT ONE CAN EXPECT +C TO RETAIN, IN THE WORST CASES ON 32 BIT MACHINES, NO DIGITS +C IN SINGLE AND ONLY 7 DIGITS IN DOUBLE PRECISION ARITHMETIC. +C SIMILAR CONSIDERATIONS HOLD FOR OTHER MACHINES. +C +C THE APPROXIMATE RELATIVE ERROR IN THE MAGNITUDE OF A COMPLEX +C BESSEL FUNCTION CAN BE EXPRESSED BY P*10**S WHERE P=MAX(UNIT +C ROUNDOFF,1.0E-18) IS THE NOMINAL PRECISION AND 10**S REPRE- +C SENTS THE INCREASE IN ERROR DUE TO ARGUMENT REDUCTION IN THE +C ELEMENTARY FUNCTIONS. HERE, S=MAX(1,ABS(LOG10(CABS(Z))), +C ABS(LOG10(FNU))) APPROXIMATELY (I.E. S=MAX(1,ABS(EXPONENT OF +C CABS(Z),ABS(EXPONENT OF FNU)) ). HOWEVER, THE PHASE ANGLE MAY +C HAVE ONLY ABSOLUTE ACCURACY. THIS IS MOST LIKELY TO OCCUR WHEN +C ONE COMPONENT (IN ABSOLUTE VALUE) IS LARGER THAN THE OTHER BY +C SEVERAL ORDERS OF MAGNITUDE. IF ONE COMPONENT IS 10**K LARGER +C THAN THE OTHER, THEN ONE CAN EXPECT ONLY MAX(ABS(LOG10(P))-K, +C 0) SIGNIFICANT DIGITS; OR, STATED ANOTHER WAY, WHEN K EXCEEDS +C THE EXPONENT OF P, NO SIGNIFICANT DIGITS REMAIN IN THE SMALLER +C COMPONENT. HOWEVER, THE PHASE ANGLE RETAINS ABSOLUTE ACCURACY +C BECAUSE, IN COMPLEX ARITHMETIC WITH PRECISION P, THE SMALLER +C COMPONENT WILL NOT (AS A RULE) DECREASE BELOW P TIMES THE +C MAGNITUDE OF THE LARGER COMPONENT. IN THESE EXTREME CASES, +C THE PRINCIPAL PHASE ANGLE IS ON THE ORDER OF +P, -P, PI/2-P, +C OR -PI/2+P. +C +C***REFERENCES HANDBOOK OF MATHEMATICAL FUNCTIONS BY M. ABRAMOWITZ +C AND I. A. STEGUN, NBS AMS SERIES 55, U.S. DEPT. OF +C COMMERCE, 1955. +C +C COMPUTATION OF BESSEL FUNCTIONS OF COMPLEX ARGUMENT +C BY D. E. AMOS, SAND83-0083, MAY, 1983. +C +C COMPUTATION OF BESSEL FUNCTIONS OF COMPLEX ARGUMENT +C AND LARGE ORDER BY D. E. AMOS, SAND83-0643, MAY, 1983 +C +C A SUBROUTINE PACKAGE FOR BESSEL FUNCTIONS OF A COMPLEX +C ARGUMENT AND NONNEGATIVE ORDER BY D. E. AMOS, SAND85- +C 1018, MAY, 1985 +C +C A PORTABLE PACKAGE FOR BESSEL FUNCTIONS OF A COMPLEX +C ARGUMENT AND NONNEGATIVE ORDER BY D. E. AMOS, TRANS. +C MATH. SOFTWARE, 1986 +C +C***ROUTINES CALLED CBINU,I1MACH,R1MACH +C***END PROLOGUE CBESJ +C + COMPLEX CI, CSGN, CY, Z, ZN + REAL AA, ALIM, ARG, DIG, ELIM, FNU, FNUL, HPI, RL, R1, R1M5, R2, + * TOL, YY, R1MACH, AZ, FN, BB, ASCLE, RTOL, ATOL + INTEGER I, IERR, INU, INUH, IR, KODE, K1, K2, N, NL, NZ, I1MACH, K + DIMENSION CY(N) + DATA HPI /1.57079632679489662E0/ +C +C***FIRST EXECUTABLE STATEMENT CBESJ + IERR = 0 + NZ=0 + IF (FNU.LT.0.0E0) IERR=1 + IF (KODE.LT.1 .OR. KODE.GT.2) IERR=1 + IF (N.LT.1) IERR=1 + IF (IERR.NE.0) RETURN +C----------------------------------------------------------------------- +C SET PARAMETERS RELATED TO MACHINE CONSTANTS. +C TOL IS THE APPROXIMATE UNIT ROUNDOFF LIMITED TO 1.0E-18. +C ELIM IS THE APPROXIMATE EXPONENTIAL OVER- AND UNDERFLOW LIMIT. +C EXP(-ELIM).LT.EXP(-ALIM)=EXP(-ELIM)/TOL AND +C EXP(ELIM).GT.EXP(ALIM)=EXP(ELIM)*TOL ARE INTERVALS NEAR +C UNDERFLOW AND OVERFLOW LIMITS WHERE SCALED ARITHMETIC IS DONE. +C RL IS THE LOWER BOUNDARY OF THE ASYMPTOTIC EXPANSION FOR LARGE Z. +C DIG = NUMBER OF BASE 10 DIGITS IN TOL = 10**(-DIG). +C FNUL IS THE LOWER BOUNDARY OF THE ASYMPTOTIC SERIES FOR LARGE FNU. +C----------------------------------------------------------------------- + TOL = AMAX1(R1MACH(4),1.0E-18) + K1 = I1MACH(12) + K2 = I1MACH(13) + R1M5 = R1MACH(5) + K = MIN0(IABS(K1),IABS(K2)) + ELIM = 2.303E0*(FLOAT(K)*R1M5-3.0E0) + K1 = I1MACH(11) - 1 + AA = R1M5*FLOAT(K1) + DIG = AMIN1(AA,18.0E0) + AA = AA*2.303E0 + ALIM = ELIM + AMAX1(-AA,-41.45E0) + RL = 1.2E0*DIG + 3.0E0 + FNUL = 10.0E0 + 6.0E0*(DIG-3.0E0) + CI = CMPLX(0.0E0,1.0E0) + YY = AIMAG(Z) + AZ = CABS(Z) +C----------------------------------------------------------------------- +C TEST FOR RANGE +C----------------------------------------------------------------------- + AA = 0.5E0/TOL + BB=FLOAT(I1MACH(9))*0.5E0 + AA=AMIN1(AA,BB) + FN=FNU+FLOAT(N-1) + IF(AZ.GT.AA) GO TO 140 + IF(FN.GT.AA) GO TO 140 + AA=SQRT(AA) + IF(AZ.GT.AA) IERR=3 + IF(FN.GT.AA) IERR=3 +C----------------------------------------------------------------------- +C CALCULATE CSGN=EXP(FNU*HPI*I) TO MINIMIZE LOSSES OF SIGNIFICANCE +C WHEN FNU IS LARGE +C----------------------------------------------------------------------- + INU = INT(FNU) + INUH = INU/2 + IR = INU - 2*INUH + ARG = (FNU-FLOAT(INU-IR))*HPI + R1 = COS(ARG) + R2 = SIN(ARG) + CSGN = CMPLX(R1,R2) + IF (MOD(INUH,2).EQ.1) CSGN = -CSGN +C----------------------------------------------------------------------- +C ZN IS IN THE RIGHT HALF PLANE +C----------------------------------------------------------------------- + ZN = -Z*CI + IF (YY.GE.0.0E0) GO TO 40 + ZN = -ZN + CSGN = CONJG(CSGN) + CI = CONJG(CI) + 40 CONTINUE + CALL CBINU(ZN, FNU, KODE, N, CY, NZ, RL, FNUL, TOL, ELIM, ALIM) + IF (NZ.LT.0) GO TO 120 + NL = N - NZ + IF (NL.EQ.0) RETURN + RTOL = 1.0E0/TOL + ASCLE = R1MACH(1)*RTOL*1.0E+3 + DO 50 I=1,NL +C CY(I)=CY(I)*CSGN + ZN=CY(I) + AA=REAL(ZN) + BB=AIMAG(ZN) + ATOL=1.0E0 + IF (AMAX1(ABS(AA),ABS(BB)).GT.ASCLE) GO TO 55 + ZN = ZN*CMPLX(RTOL,0.0E0) + ATOL = TOL + 55 CONTINUE + ZN = ZN*CSGN + CY(I) = ZN*CMPLX(ATOL,0.0E0) + CSGN = CSGN*CI + 50 CONTINUE + RETURN + 120 CONTINUE + IF(NZ.EQ.(-2)) GO TO 130 + NZ = 0 + IERR = 2 + RETURN + 130 CONTINUE + NZ=0 + IERR=5 + RETURN + 140 CONTINUE + NZ=0 + IERR=4 + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/amos/cbesk.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/amos/cbesk.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,276 @@ + SUBROUTINE CBESK(Z, FNU, KODE, N, CY, NZ, IERR) +C***BEGIN PROLOGUE CBESK +C***DATE WRITTEN 830501 (YYMMDD) +C***REVISION DATE 890801 (YYMMDD) +C***CATEGORY NO. B5K +C***KEYWORDS K-BESSEL FUNCTION,COMPLEX BESSEL FUNCTION, +C MODIFIED BESSEL FUNCTION OF THE SECOND KIND, +C BESSEL FUNCTION OF THE THIRD KIND +C***AUTHOR AMOS, DONALD E., SANDIA NATIONAL LABORATORIES +C***PURPOSE TO COMPUTE K-BESSEL FUNCTIONS OF COMPLEX ARGUMENT +C***DESCRIPTION +C +C ON KODE=1, CBESK COMPUTES AN N MEMBER SEQUENCE OF COMPLEX +C BESSEL FUNCTIONS CY(J)=K(FNU+J-1,Z) FOR REAL, NONNEGATIVE +C ORDERS FNU+J-1, J=1,...,N AND COMPLEX Z.NE.CMPLX(0.0,0.0) +C IN THE CUT PLANE -PI.LT.ARG(Z).LE.PI. ON KODE=2, CBESK +C RETURNS THE SCALED K FUNCTIONS, +C +C CY(J)=EXP(Z)*K(FNU+J-1,Z) , J=1,...,N, +C +C WHICH REMOVE THE EXPONENTIAL BEHAVIOR IN BOTH THE LEFT AND +C RIGHT HALF PLANES FOR Z TO INFINITY. DEFINITIONS AND +C NOTATION ARE FOUND IN THE NBS HANDBOOK OF MATHEMATICAL +C FUNCTIONS (REF. 1). +C +C INPUT +C Z - Z=CMPLX(X,Y),Z.NE.CMPLX(0.,0.),-PI.LT.ARG(Z).LE.PI +C FNU - ORDER OF INITIAL K FUNCTION, FNU.GE.0.0E0 +C N - NUMBER OF MEMBERS OF THE SEQUENCE, N.GE.1 +C KODE - A PARAMETER TO INDICATE THE SCALING OPTION +C KODE= 1 RETURNS +C CY(I)=K(FNU+I-1,Z), I=1,...,N +C = 2 RETURNS +C CY(I)=K(FNU+I-1,Z)*EXP(Z), I=1,...,N +C +C OUTPUT +C CY - A COMPLEX VECTOR WHOSE FIRST N COMPONENTS CONTAIN +C VALUES FOR THE SEQUENCE +C CY(I)=K(FNU+I-1,Z), I=1,...,N OR +C CY(I)=K(FNU+I-1,Z)*EXP(Z), I=1,...,N +C DEPENDING ON KODE +C NZ - NUMBER OF COMPONENTS SET TO ZERO DUE TO UNDERFLOW. +C NZ= 0 , NORMAL RETURN +C NZ.GT.0 , FIRST NZ COMPONENTS OF CY SET TO ZERO +C DUE TO UNDERFLOW, CY(I)=CMPLX(0.0,0.0), +C I=1,...,N WHEN X.GE.0.0. WHEN X.LT.0.0 +C NZ STATES ONLY THE NUMBER OF UNDERFLOWS +C IN THE SEQUENCE. +C IERR - ERROR FLAG +C IERR=0, NORMAL RETURN - COMPUTATION COMPLETED +C IERR=1, INPUT ERROR - NO COMPUTATION +C IERR=2, OVERFLOW - NO COMPUTATION, FNU+N-1 IS +C TOO LARGE OR CABS(Z) IS TOO SMALL OR BOTH +C IERR=3, CABS(Z) OR FNU+N-1 LARGE - COMPUTATION DONE +C BUT LOSSES OF SIGNIFCANCE BY ARGUMENT +C REDUCTION PRODUCE LESS THAN HALF OF MACHINE +C ACCURACY +C IERR=4, CABS(Z) OR FNU+N-1 TOO LARGE - NO COMPUTA- +C TION BECAUSE OF COMPLETE LOSSES OF SIGNIFI- +C CANCE BY ARGUMENT REDUCTION +C IERR=5, ERROR - NO COMPUTATION, +C ALGORITHM TERMINATION CONDITION NOT MET +C +C***LONG DESCRIPTION +C +C EQUATIONS OF THE REFERENCE ARE IMPLEMENTED FOR SMALL ORDERS +C DNU AND DNU+1.0 IN THE RIGHT HALF PLANE X.GE.0.0. FORWARD +C RECURRENCE GENERATES HIGHER ORDERS. K IS CONTINUED TO THE LEFT +C HALF PLANE BY THE RELATION +C +C K(FNU,Z*EXP(MP)) = EXP(-MP*FNU)*K(FNU,Z)-MP*I(FNU,Z) +C MP=MR*PI*I, MR=+1 OR -1, RE(Z).GT.0, I**2=-1 +C +C WHERE I(FNU,Z) IS THE I BESSEL FUNCTION. +C +C FOR LARGE ORDERS, FNU.GT.FNUL, THE K FUNCTION IS COMPUTED +C BY MEANS OF ITS UNIFORM ASYMPTOTIC EXPANSIONS. +C +C FOR NEGATIVE ORDERS, THE FORMULA +C +C K(-FNU,Z) = K(FNU,Z) +C +C CAN BE USED. +C +C CBESK ASSUMES THAT A SIGNIFICANT DIGIT SINH(X) FUNCTION IS +C AVAILABLE. +C +C IN MOST COMPLEX VARIABLE COMPUTATION, ONE MUST EVALUATE ELE- +C MENTARY FUNCTIONS. WHEN THE MAGNITUDE OF Z OR FNU+N-1 IS +C LARGE, LOSSES OF SIGNIFICANCE BY ARGUMENT REDUCTION OCCUR. +C CONSEQUENTLY, IF EITHER ONE EXCEEDS U1=SQRT(0.5/UR), THEN +C LOSSES EXCEEDING HALF PRECISION ARE LIKELY AND AN ERROR FLAG +C IERR=3 IS TRIGGERED WHERE UR=R1MACH(4)=UNIT ROUNDOFF. ALSO +C IF EITHER IS LARGER THAN U2=0.5/UR, THEN ALL SIGNIFICANCE IS +C LOST AND IERR=4. IN ORDER TO USE THE INT FUNCTION, ARGUMENTS +C MUST BE FURTHER RESTRICTED NOT TO EXCEED THE LARGEST MACHINE +C INTEGER, U3=I1MACH(9). THUS, THE MAGNITUDE OF Z AND FNU+N-1 IS +C RESTRICTED BY MIN(U2,U3). ON 32 BIT MACHINES, U1,U2, AND U3 +C ARE APPROXIMATELY 2.0E+3, 4.2E+6, 2.1E+9 IN SINGLE PRECISION +C ARITHMETIC AND 1.3E+8, 1.8E+16, 2.1E+9 IN DOUBLE PRECISION +C ARITHMETIC RESPECTIVELY. THIS MAKES U2 AND U3 LIMITING IN +C THEIR RESPECTIVE ARITHMETICS. THIS MEANS THAT ONE CAN EXPECT +C TO RETAIN, IN THE WORST CASES ON 32 BIT MACHINES, NO DIGITS +C IN SINGLE AND ONLY 7 DIGITS IN DOUBLE PRECISION ARITHMETIC. +C SIMILAR CONSIDERATIONS HOLD FOR OTHER MACHINES. +C +C THE APPROXIMATE RELATIVE ERROR IN THE MAGNITUDE OF A COMPLEX +C BESSEL FUNCTION CAN BE EXPRESSED BY P*10**S WHERE P=MAX(UNIT +C ROUNDOFF,1.0E-18) IS THE NOMINAL PRECISION AND 10**S REPRE- +C SENTS THE INCREASE IN ERROR DUE TO ARGUMENT REDUCTION IN THE +C ELEMENTARY FUNCTIONS. HERE, S=MAX(1,ABS(LOG10(CABS(Z))), +C ABS(LOG10(FNU))) APPROXIMATELY (I.E. S=MAX(1,ABS(EXPONENT OF +C CABS(Z),ABS(EXPONENT OF FNU)) ). HOWEVER, THE PHASE ANGLE MAY +C HAVE ONLY ABSOLUTE ACCURACY. THIS IS MOST LIKELY TO OCCUR WHEN +C ONE COMPONENT (IN ABSOLUTE VALUE) IS LARGER THAN THE OTHER BY +C SEVERAL ORDERS OF MAGNITUDE. IF ONE COMPONENT IS 10**K LARGER +C THAN THE OTHER, THEN ONE CAN EXPECT ONLY MAX(ABS(LOG10(P))-K, +C 0) SIGNIFICANT DIGITS; OR, STATED ANOTHER WAY, WHEN K EXCEEDS +C THE EXPONENT OF P, NO SIGNIFICANT DIGITS REMAIN IN THE SMALLER +C COMPONENT. HOWEVER, THE PHASE ANGLE RETAINS ABSOLUTE ACCURACY +C BECAUSE, IN COMPLEX ARITHMETIC WITH PRECISION P, THE SMALLER +C COMPONENT WILL NOT (AS A RULE) DECREASE BELOW P TIMES THE +C MAGNITUDE OF THE LARGER COMPONENT. IN THESE EXTREME CASES, +C THE PRINCIPAL PHASE ANGLE IS ON THE ORDER OF +P, -P, PI/2-P, +C OR -PI/2+P. +C +C***REFERENCES HANDBOOK OF MATHEMATICAL FUNCTIONS BY M. ABRAMOWITZ +C AND I. A. STEGUN, NBS AMS SERIES 55, U.S. DEPT. OF +C COMMERCE, 1955. +C +C COMPUTATION OF BESSEL FUNCTIONS OF COMPLEX ARGUMENT +C BY D. E. AMOS, SAND83-0083, MAY, 1983. +C +C COMPUTATION OF BESSEL FUNCTIONS OF COMPLEX ARGUMENT +C AND LARGE ORDER BY D. E. AMOS, SAND83-0643, MAY, 1983. +C +C A SUBROUTINE PACKAGE FOR BESSEL FUNCTIONS OF A COMPLEX +C ARGUMENT AND NONNEGATIVE ORDER BY D. E. AMOS, SAND85- +C 1018, MAY, 1985 +C +C A PORTABLE PACKAGE FOR BESSEL FUNCTIONS OF A COMPLEX +C ARGUMENT AND NONNEGATIVE ORDER BY D. E. AMOS, TRANS. +C MATH. SOFTWARE, 1986 +C +C***ROUTINES CALLED CACON,CBKNU,CBUNK,CUOIK,I1MACH,R1MACH +C***END PROLOGUE CBESK +C + COMPLEX CY, Z + REAL AA, ALIM, ALN, ARG, AZ, DIG, ELIM, FN, FNU, FNUL, RL, R1M5, + * TOL, UFL, XX, YY, R1MACH, BB + INTEGER IERR, K, KODE, K1, K2, MR, N, NN, NUF, NW, NZ, I1MACH + DIMENSION CY(N) +C***FIRST EXECUTABLE STATEMENT CBESK + IERR = 0 + NZ=0 + XX = REAL(Z) + YY = AIMAG(Z) + IF (YY.EQ.0.0E0 .AND. XX.EQ.0.0E0) IERR=1 + IF (FNU.LT.0.0E0) IERR=1 + IF (KODE.LT.1 .OR. KODE.GT.2) IERR=1 + IF (N.LT.1) IERR=1 + IF (IERR.NE.0) RETURN + NN = N +C----------------------------------------------------------------------- +C SET PARAMETERS RELATED TO MACHINE CONSTANTS. +C TOL IS THE APPROXIMATE UNIT ROUNDOFF LIMITED TO 1.0E-18. +C ELIM IS THE APPROXIMATE EXPONENTIAL OVER- AND UNDERFLOW LIMIT. +C EXP(-ELIM).LT.EXP(-ALIM)=EXP(-ELIM)/TOL AND +C EXP(ELIM).GT.EXP(ALIM)=EXP(ELIM)*TOL ARE INTERVALS NEAR +C UNDERFLOW AND OVERFLOW LIMITS WHERE SCALED ARITHMETIC IS DONE. +C RL IS THE LOWER BOUNDARY OF THE ASYMPTOTIC EXPANSION FOR LARGE Z. +C DIG = NUMBER OF BASE 10 DIGITS IN TOL = 10**(-DIG). +C FNUL IS THE LOWER BOUNDARY OF THE ASYMPTOTIC SERIES FOR LARGE FNU +C----------------------------------------------------------------------- + TOL = AMAX1(R1MACH(4),1.0E-18) + K1 = I1MACH(12) + K2 = I1MACH(13) + R1M5 = R1MACH(5) + K = MIN0(IABS(K1),IABS(K2)) + ELIM = 2.303E0*(FLOAT(K)*R1M5-3.0E0) + K1 = I1MACH(11) - 1 + AA = R1M5*FLOAT(K1) + DIG = AMIN1(AA,18.0E0) + AA = AA*2.303E0 + ALIM = ELIM + AMAX1(-AA,-41.45E0) + FNUL = 10.0E0 + 6.0E0*(DIG-3.0E0) + RL = 1.2E0*DIG + 3.0E0 + AZ = CABS(Z) + FN = FNU + FLOAT(NN-1) +C----------------------------------------------------------------------- +C TEST FOR RANGE +C----------------------------------------------------------------------- + AA = 0.5E0/TOL + BB=FLOAT(I1MACH(9))*0.5E0 + AA=AMIN1(AA,BB) + IF(AZ.GT.AA) GO TO 210 + IF(FN.GT.AA) GO TO 210 + AA=SQRT(AA) + IF(AZ.GT.AA) IERR=3 + IF(FN.GT.AA) IERR=3 +C----------------------------------------------------------------------- +C OVERFLOW TEST ON THE LAST MEMBER OF THE SEQUENCE +C----------------------------------------------------------------------- +C UFL = EXP(-ELIM) + UFL = R1MACH(1)*1.0E+3 + IF (AZ.LT.UFL) GO TO 180 + IF (FNU.GT.FNUL) GO TO 80 + IF (FN.LE.1.0E0) GO TO 60 + IF (FN.GT.2.0E0) GO TO 50 + IF (AZ.GT.TOL) GO TO 60 + ARG = 0.5E0*AZ + ALN = -FN*ALOG(ARG) + IF (ALN.GT.ELIM) GO TO 180 + GO TO 60 + 50 CONTINUE + CALL CUOIK(Z, FNU, KODE, 2, NN, CY, NUF, TOL, ELIM, ALIM) + IF (NUF.LT.0) GO TO 180 + NZ = NZ + NUF + NN = NN - NUF +C----------------------------------------------------------------------- +C HERE NN=N OR NN=0 SINCE NUF=0,NN, OR -1 ON RETURN FROM CUOIK +C IF NUF=NN, THEN CY(I)=CZERO FOR ALL I +C----------------------------------------------------------------------- + IF (NN.EQ.0) GO TO 100 + 60 CONTINUE + IF (XX.LT.0.0E0) GO TO 70 +C----------------------------------------------------------------------- +C RIGHT HALF PLANE COMPUTATION, REAL(Z).GE.0. +C----------------------------------------------------------------------- + CALL CBKNU(Z, FNU, KODE, NN, CY, NW, TOL, ELIM, ALIM) + IF (NW.LT.0) GO TO 200 + NZ=NW + RETURN +C----------------------------------------------------------------------- +C LEFT HALF PLANE COMPUTATION +C PI/2.LT.ARG(Z).LE.PI AND -PI.LT.ARG(Z).LT.-PI/2. +C----------------------------------------------------------------------- + 70 CONTINUE + IF (NZ.NE.0) GO TO 180 + MR = 1 + IF (YY.LT.0.0E0) MR = -1 + CALL CACON(Z, FNU, KODE, MR, NN, CY, NW, RL, FNUL, TOL, ELIM, + * ALIM) + IF (NW.LT.0) GO TO 200 + NZ=NW + RETURN +C----------------------------------------------------------------------- +C UNIFORM ASYMPTOTIC EXPANSIONS FOR FNU.GT.FNUL +C----------------------------------------------------------------------- + 80 CONTINUE + MR = 0 + IF (XX.GE.0.0E0) GO TO 90 + MR = 1 + IF (YY.LT.0.0E0) MR = -1 + 90 CONTINUE + CALL CBUNK(Z, FNU, KODE, MR, NN, CY, NW, TOL, ELIM, ALIM) + IF (NW.LT.0) GO TO 200 + NZ = NZ + NW + RETURN + 100 CONTINUE + IF (XX.LT.0.0E0) GO TO 180 + RETURN + 180 CONTINUE + NZ = 0 + IERR=2 + RETURN + 200 CONTINUE + IF(NW.EQ.(-1)) GO TO 180 + NZ=0 + IERR=5 + RETURN + 210 CONTINUE + NZ=0 + IERR=4 + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/amos/cbesy.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/amos/cbesy.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,226 @@ + SUBROUTINE CBESY(Z, FNU, KODE, N, CY, NZ, CWRK, IERR) +C***BEGIN PROLOGUE CBESY +C***DATE WRITTEN 830501 (YYMMDD) +C***REVISION DATE 890801 (YYMMDD) +C***CATEGORY NO. B5K +C***KEYWORDS Y-BESSEL FUNCTION,BESSEL FUNCTION OF COMPLEX ARGUMENT, +C BESSEL FUNCTION OF SECOND KIND +C***AUTHOR AMOS, DONALD E., SANDIA NATIONAL LABORATORIES +C***PURPOSE TO COMPUTE THE Y-BESSEL FUNCTION OF A COMPLEX ARGUMENT +C***DESCRIPTION +C +C ON KODE=1, CBESY COMPUTES AN N MEMBER SEQUENCE OF COMPLEX +C BESSEL FUNCTIONS CY(I)=Y(FNU+I-1,Z) FOR REAL, NONNEGATIVE +C ORDERS FNU+I-1, I=1,...,N AND COMPLEX Z IN THE CUT PLANE +C -PI.LT.ARG(Z).LE.PI. ON KODE=2, CBESY RETURNS THE SCALED +C FUNCTIONS +C +C CY(I)=EXP(-ABS(Y))*Y(FNU+I-1,Z) I = 1,...,N , Y=AIMAG(Z) +C +C WHICH REMOVE THE EXPONENTIAL GROWTH IN BOTH THE UPPER AND +C LOWER HALF PLANES FOR Z TO INFINITY. DEFINITIONS AND NOTATION +C ARE FOUND IN THE NBS HANDBOOK OF MATHEMATICAL FUNCTIONS +C (REF. 1). +C +C INPUT +C Z - Z=CMPLX(X,Y), Z.NE.CMPLX(0.,0.),-PI.LT.ARG(Z).LE.PI +C FNU - ORDER OF INITIAL Y FUNCTION, FNU.GE.0.0E0 +C KODE - A PARAMETER TO INDICATE THE SCALING OPTION +C KODE= 1 RETURNS +C CY(I)=Y(FNU+I-1,Z), I=1,...,N +C = 2 RETURNS +C CY(I)=Y(FNU+I-1,Z)*EXP(-ABS(Y)), I=1,...,N +C WHERE Y=AIMAG(Z) +C N - NUMBER OF MEMBERS OF THE SEQUENCE, N.GE.1 +C CWRK - A COMPLEX WORK VECTOR OF DIMENSION AT LEAST N +C +C OUTPUT +C CY - A COMPLEX VECTOR WHOSE FIRST N COMPONENTS CONTAIN +C VALUES FOR THE SEQUENCE +C CY(I)=Y(FNU+I-1,Z) OR +C CY(I)=Y(FNU+I-1,Z)*EXP(-ABS(Y)) I=1,...,N +C DEPENDING ON KODE. +C NZ - NZ=0 , A NORMAL RETURN +C NZ.GT.0 , NZ COMPONENTS OF CY SET TO ZERO DUE TO +C UNDERFLOW (GENERALLY ON KODE=2) +C IERR - ERROR FLAG +C IERR=0, NORMAL RETURN - COMPUTATION COMPLETED +C IERR=1, INPUT ERROR - NO COMPUTATION +C IERR=2, OVERFLOW - NO COMPUTATION, FNU+N-1 IS +C TOO LARGE OR CABS(Z) IS TOO SMALL OR BOTH +C IERR=3, CABS(Z) OR FNU+N-1 LARGE - COMPUTATION DONE +C BUT LOSSES OF SIGNIFCANCE BY ARGUMENT +C REDUCTION PRODUCE LESS THAN HALF OF MACHINE +C ACCURACY +C IERR=4, CABS(Z) OR FNU+N-1 TOO LARGE - NO COMPUTA- +C TION BECAUSE OF COMPLETE LOSSES OF SIGNIFI- +C CANCE BY ARGUMENT REDUCTION +C IERR=5, ERROR - NO COMPUTATION, +C ALGORITHM TERMINATION CONDITION NOT MET +C +C***LONG DESCRIPTION +C +C THE COMPUTATION IS CARRIED OUT BY THE FORMULA +C +C Y(FNU,Z)=0.5*(H(1,FNU,Z)-H(2,FNU,Z))/I +C +C WHERE I**2 = -1 AND THE HANKEL BESSEL FUNCTIONS H(1,FNU,Z) +C AND H(2,FNU,Z) ARE CALCULATED IN CBESH. +C +C FOR NEGATIVE ORDERS,THE FORMULA +C +C Y(-FNU,Z) = Y(FNU,Z)*COS(PI*FNU) + J(FNU,Z)*SIN(PI*FNU) +C +C CAN BE USED. HOWEVER,FOR LARGE ORDERS CLOSE TO HALF ODD +C INTEGERS THE FUNCTION CHANGES RADICALLY. WHEN FNU IS A LARGE +C POSITIVE HALF ODD INTEGER,THE MAGNITUDE OF Y(-FNU,Z)=J(FNU,Z)* +C SIN(PI*FNU) IS A LARGE NEGATIVE POWER OF TEN. BUT WHEN FNU IS +C NOT A HALF ODD INTEGER, Y(FNU,Z) DOMINATES IN MAGNITUDE WITH A +C LARGE POSITIVE POWER OF TEN AND THE MOST THAT THE SECOND TERM +C CAN BE REDUCED IS BY UNIT ROUNDOFF FROM THE COEFFICIENT. THUS, +C WIDE CHANGES CAN OCCUR WITHIN UNIT ROUNDOFF OF A LARGE HALF +C ODD INTEGER. HERE, LARGE MEANS FNU.GT.CABS(Z). +C +C IN MOST COMPLEX VARIABLE COMPUTATION, ONE MUST EVALUATE ELE- +C MENTARY FUNCTIONS. WHEN THE MAGNITUDE OF Z OR FNU+N-1 IS +C LARGE, LOSSES OF SIGNIFICANCE BY ARGUMENT REDUCTION OCCUR. +C CONSEQUENTLY, IF EITHER ONE EXCEEDS U1=SQRT(0.5/UR), THEN +C LOSSES EXCEEDING HALF PRECISION ARE LIKELY AND AN ERROR FLAG +C IERR=3 IS TRIGGERED WHERE UR=R1MACH(4)=UNIT ROUNDOFF. ALSO +C IF EITHER IS LARGER THAN U2=0.5/UR, THEN ALL SIGNIFICANCE IS +C LOST AND IERR=4. IN ORDER TO USE THE INT FUNCTION, ARGUMENTS +C MUST BE FURTHER RESTRICTED NOT TO EXCEED THE LARGEST MACHINE +C INTEGER, U3=I1MACH(9). THUS, THE MAGNITUDE OF Z AND FNU+N-1 IS +C RESTRICTED BY MIN(U2,U3). ON 32 BIT MACHINES, U1,U2, AND U3 +C ARE APPROXIMATELY 2.0E+3, 4.2E+6, 2.1E+9 IN SINGLE PRECISION +C ARITHMETIC AND 1.3E+8, 1.8E+16, 2.1E+9 IN DOUBLE PRECISION +C ARITHMETIC RESPECTIVELY. THIS MAKES U2 AND U3 LIMITING IN +C THEIR RESPECTIVE ARITHMETICS. THIS MEANS THAT ONE CAN EXPECT +C TO RETAIN, IN THE WORST CASES ON 32 BIT MACHINES, NO DIGITS +C IN SINGLE AND ONLY 7 DIGITS IN DOUBLE PRECISION ARITHMETIC. +C SIMILAR CONSIDERATIONS HOLD FOR OTHER MACHINES. +C +C THE APPROXIMATE RELATIVE ERROR IN THE MAGNITUDE OF A COMPLEX +C BESSEL FUNCTION CAN BE EXPRESSED BY P*10**S WHERE P=MAX(UNIT +C ROUNDOFF,1.0E-18) IS THE NOMINAL PRECISION AND 10**S REPRE- +C SENTS THE INCREASE IN ERROR DUE TO ARGUMENT REDUCTION IN THE +C ELEMENTARY FUNCTIONS. HERE, S=MAX(1,ABS(LOG10(CABS(Z))), +C ABS(LOG10(FNU))) APPROXIMATELY (I.E. S=MAX(1,ABS(EXPONENT OF +C CABS(Z),ABS(EXPONENT OF FNU)) ). HOWEVER, THE PHASE ANGLE MAY +C HAVE ONLY ABSOLUTE ACCURACY. THIS IS MOST LIKELY TO OCCUR WHEN +C ONE COMPONENT (IN ABSOLUTE VALUE) IS LARGER THAN THE OTHER BY +C SEVERAL ORDERS OF MAGNITUDE. IF ONE COMPONENT IS 10**K LARGER +C THAN THE OTHER, THEN ONE CAN EXPECT ONLY MAX(ABS(LOG10(P))-K, +C 0) SIGNIFICANT DIGITS; OR, STATED ANOTHER WAY, WHEN K EXCEEDS +C THE EXPONENT OF P, NO SIGNIFICANT DIGITS REMAIN IN THE SMALLER +C COMPONENT. HOWEVER, THE PHASE ANGLE RETAINS ABSOLUTE ACCURACY +C BECAUSE, IN COMPLEX ARITHMETIC WITH PRECISION P, THE SMALLER +C COMPONENT WILL NOT (AS A RULE) DECREASE BELOW P TIMES THE +C MAGNITUDE OF THE LARGER COMPONENT. IN THESE EXTREME CASES, +C THE PRINCIPAL PHASE ANGLE IS ON THE ORDER OF +P, -P, PI/2-P, +C OR -PI/2+P. +C +C***REFERENCES HANDBOOK OF MATHEMATICAL FUNCTIONS BY M. ABRAMOWITZ +C AND I. A. STEGUN, NBS AMS SERIES 55, U.S. DEPT. OF +C COMMERCE, 1955. +C +C COMPUTATION OF BESSEL FUNCTIONS OF COMPLEX ARGUMENT +C BY D. E. AMOS, SAND83-0083, MAY, 1983. +C +C COMPUTATION OF BESSEL FUNCTIONS OF COMPLEX ARGUMENT +C AND LARGE ORDER BY D. E. AMOS, SAND83-0643, MAY, 1983 +C +C A SUBROUTINE PACKAGE FOR BESSEL FUNCTIONS OF A COMPLEX +C ARGUMENT AND NONNEGATIVE ORDER BY D. E. AMOS, SAND85- +C 1018, MAY, 1985 +C +C A PORTABLE PACKAGE FOR BESSEL FUNCTIONS OF A COMPLEX +C ARGUMENT AND NONNEGATIVE ORDER BY D. E. AMOS, TRANS. +C MATH. SOFTWARE, 1986 +C +C***ROUTINES CALLED CBESH,I1MACH,R1MACH +C***END PROLOGUE CBESY +C + COMPLEX CWRK, CY, C1, C2, EX, HCI, Z, ZU, ZV + REAL ELIM, EY, FNU, R1, R2, TAY, XX, YY, R1MACH, ASCLE, RTOL, + * ATOL, AA, BB + INTEGER I, IERR, K, KODE, K1, K2, N, NZ, NZ1, NZ2, I1MACH + DIMENSION CY(N), CWRK(N) +C***FIRST EXECUTABLE STATEMENT CBESY + XX = REAL(Z) + YY = AIMAG(Z) + IERR = 0 + NZ=0 + IF (XX.EQ.0.0E0 .AND. YY.EQ.0.0E0) IERR=1 + IF (FNU.LT.0.0E0) IERR=1 + IF (KODE.LT.1 .OR. KODE.GT.2) IERR=1 + IF (N.LT.1) IERR=1 + IF (IERR.NE.0) RETURN + HCI = CMPLX(0.0E0,0.5E0) + CALL CBESH(Z, FNU, KODE, 1, N, CY, NZ1, IERR) + IF (IERR.NE.0.AND.IERR.NE.3) GO TO 170 + CALL CBESH(Z, FNU, KODE, 2, N, CWRK, NZ2, IERR) + IF (IERR.NE.0.AND.IERR.NE.3) GO TO 170 + NZ = MIN0(NZ1,NZ2) + IF (KODE.EQ.2) GO TO 60 + DO 50 I=1,N + CY(I) = HCI*(CWRK(I)-CY(I)) + 50 CONTINUE + RETURN + 60 CONTINUE + TOL = AMAX1(R1MACH(4),1.0E-18) + K1 = I1MACH(12) + K2 = I1MACH(13) + K = MIN0(IABS(K1),IABS(K2)) + R1M5 = R1MACH(5) +C----------------------------------------------------------------------- +C ELIM IS THE APPROXIMATE EXPONENTIAL UNDER- AND OVERFLOW LIMIT +C----------------------------------------------------------------------- + ELIM = 2.303E0*(FLOAT(K)*R1M5-3.0E0) + R1 = COS(XX) + R2 = SIN(XX) + EX = CMPLX(R1,R2) + EY = 0.0E0 + TAY = ABS(YY+YY) + IF (TAY.LT.ELIM) EY = EXP(-TAY) + IF (YY.LT.0.0E0) GO TO 90 + C1 = EX*CMPLX(EY,0.0E0) + C2 = CONJG(EX) + 70 CONTINUE + NZ = 0 + RTOL = 1.0E0/TOL + ASCLE = R1MACH(1)*RTOL*1.0E+3 + DO 80 I=1,N +C CY(I) = HCI*(C2*CWRK(I)-C1*CY(I)) + ZV = CWRK(I) + AA=REAL(ZV) + BB=AIMAG(ZV) + ATOL=1.0E0 + IF (AMAX1(ABS(AA),ABS(BB)).GT.ASCLE) GO TO 75 + ZV = ZV*CMPLX(RTOL,0.0E0) + ATOL = TOL + 75 CONTINUE + ZV = ZV*C2*HCI + ZV = ZV*CMPLX(ATOL,0.0E0) + ZU=CY(I) + AA=REAL(ZU) + BB=AIMAG(ZU) + ATOL=1.0E0 + IF (AMAX1(ABS(AA),ABS(BB)).GT.ASCLE) GO TO 85 + ZU = ZU*CMPLX(RTOL,0.0E0) + ATOL = TOL + 85 CONTINUE + ZU = ZU*C1*HCI + ZU = ZU*CMPLX(ATOL,0.0E0) + CY(I) = ZV - ZU + IF (CY(I).EQ.CMPLX(0.0E0,0.0E0) .AND. EY.EQ.0.0E0) NZ = NZ + 1 + 80 CONTINUE + RETURN + 90 CONTINUE + C1 = EX + C2 = CONJG(EX)*CMPLX(EY,0.0E0) + GO TO 70 + 170 CONTINUE + NZ = 0 + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/amos/cbinu.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/amos/cbinu.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,105 @@ + SUBROUTINE CBINU(Z, FNU, KODE, N, CY, NZ, RL, FNUL, TOL, ELIM, + * ALIM) +C***BEGIN PROLOGUE CBINU +C***REFER TO CBESH,CBESI,CBESJ,CBESK,CAIRY,CBIRY +C +C CBINU COMPUTES THE I FUNCTION IN THE RIGHT HALF Z PLANE +C +C***ROUTINES CALLED CASYI,CBUNI,CMLRI,CSERI,CUOIK,CWRSK +C***END PROLOGUE CBINU + COMPLEX CW, CY, CZERO, Z + REAL ALIM, AZ, DFNU, ELIM, FNU, FNUL, RL, TOL + INTEGER I, INW, KODE, N, NLAST, NN, NUI, NW, NZ + DIMENSION CY(N), CW(2) + DATA CZERO / (0.0E0,0.0E0) / +C + NZ = 0 + AZ = CABS(Z) + NN = N + DFNU = FNU + FLOAT(N-1) + IF (AZ.LE.2.0E0) GO TO 10 + IF (AZ*AZ*0.25E0.GT.DFNU+1.0E0) GO TO 20 + 10 CONTINUE +C----------------------------------------------------------------------- +C POWER SERIES +C----------------------------------------------------------------------- + CALL CSERI(Z, FNU, KODE, NN, CY, NW, TOL, ELIM, ALIM) + INW = IABS(NW) + NZ = NZ + INW + NN = NN - INW + IF (NN.EQ.0) RETURN + IF (NW.GE.0) GO TO 120 + DFNU = FNU + FLOAT(NN-1) + 20 CONTINUE + IF (AZ.LT.RL) GO TO 40 + IF (DFNU.LE.1.0E0) GO TO 30 + IF (AZ+AZ.LT.DFNU*DFNU) GO TO 50 +C----------------------------------------------------------------------- +C ASYMPTOTIC EXPANSION FOR LARGE Z +C----------------------------------------------------------------------- + 30 CONTINUE + CALL CASYI(Z, FNU, KODE, NN, CY, NW, RL, TOL, ELIM, ALIM) + IF (NW.LT.0) GO TO 130 + GO TO 120 + 40 CONTINUE + IF (DFNU.LE.1.0E0) GO TO 70 + 50 CONTINUE +C----------------------------------------------------------------------- +C OVERFLOW AND UNDERFLOW TEST ON I SEQUENCE FOR MILLER ALGORITHM +C----------------------------------------------------------------------- + CALL CUOIK(Z, FNU, KODE, 1, NN, CY, NW, TOL, ELIM, ALIM) + IF (NW.LT.0) GO TO 130 + NZ = NZ + NW + NN = NN - NW + IF (NN.EQ.0) RETURN + DFNU = FNU+FLOAT(NN-1) + IF (DFNU.GT.FNUL) GO TO 110 + IF (AZ.GT.FNUL) GO TO 110 + 60 CONTINUE + IF (AZ.GT.RL) GO TO 80 + 70 CONTINUE +C----------------------------------------------------------------------- +C MILLER ALGORITHM NORMALIZED BY THE SERIES +C----------------------------------------------------------------------- + CALL CMLRI(Z, FNU, KODE, NN, CY, NW, TOL) + IF(NW.LT.0) GO TO 130 + GO TO 120 + 80 CONTINUE +C----------------------------------------------------------------------- +C MILLER ALGORITHM NORMALIZED BY THE WRONSKIAN +C----------------------------------------------------------------------- +C----------------------------------------------------------------------- +C OVERFLOW TEST ON K FUNCTIONS USED IN WRONSKIAN +C----------------------------------------------------------------------- + CALL CUOIK(Z, FNU, KODE, 2, 2, CW, NW, TOL, ELIM, ALIM) + IF (NW.GE.0) GO TO 100 + NZ = NN + DO 90 I=1,NN + CY(I) = CZERO + 90 CONTINUE + RETURN + 100 CONTINUE + IF (NW.GT.0) GO TO 130 + CALL CWRSK(Z, FNU, KODE, NN, CY, NW, CW, TOL, ELIM, ALIM) + IF (NW.LT.0) GO TO 130 + GO TO 120 + 110 CONTINUE +C----------------------------------------------------------------------- +C INCREMENT FNU+NN-1 UP TO FNUL, COMPUTE AND RECUR BACKWARD +C----------------------------------------------------------------------- + NUI = INT(FNUL-DFNU) + 1 + NUI = MAX0(NUI,0) + CALL CBUNI(Z, FNU, KODE, NN, CY, NW, NUI, NLAST, FNUL, TOL, ELIM, + * ALIM) + IF (NW.LT.0) GO TO 130 + NZ = NZ + NW + IF (NLAST.EQ.0) GO TO 120 + NN = NLAST + GO TO 60 + 120 CONTINUE + RETURN + 130 CONTINUE + NZ = -1 + IF(NW.EQ.(-2)) NZ=-2 + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/amos/cbiry.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/amos/cbiry.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,309 @@ + SUBROUTINE CBIRY(Z, ID, KODE, BI, IERR) +C***BEGIN PROLOGUE CBIRY +C***DATE WRITTEN 830501 (YYMMDD) +C***REVISION DATE 890801 (YYMMDD) +C***CATEGORY NO. B5K +C***KEYWORDS AIRY FUNCTION,BESSEL FUNCTIONS OF ORDER ONE THIRD +C***AUTHOR AMOS, DONALD E., SANDIA NATIONAL LABORATORIES +C***PURPOSE TO COMPUTE AIRY FUNCTIONS BI(Z) AND DBI(Z) FOR COMPLEX Z +C***DESCRIPTION +C +C ON KODE=1, CBIRY COMPUTES THE COMPLEX AIRY FUNCTION BI(Z) OR +C ITS DERIVATIVE DBI(Z)/DZ ON ID=0 OR ID=1 RESPECTIVELY. ON +C KODE=2, A SCALING OPTION CEXP(-AXZTA)*BI(Z) OR CEXP(-AXZTA)* +C DBI(Z)/DZ IS PROVIDED TO REMOVE THE EXPONENTIAL BEHAVIOR IN +C BOTH THE LEFT AND RIGHT HALF PLANES WHERE +C ZTA=(2/3)*Z*CSQRT(Z)=CMPLX(XZTA,YZTA) AND AXZTA=ABS(XZTA). +C DEFINITIONS AND NOTATION ARE FOUND IN THE NBS HANDBOOK OF +C MATHEMATICAL FUNCTIONS (REF. 1). +C +C INPUT +C Z - Z=CMPLX(X,Y) +C ID - ORDER OF DERIVATIVE, ID=0 OR ID=1 +C KODE - A PARAMETER TO INDICATE THE SCALING OPTION +C KODE= 1 RETURNS +C BI=BI(Z) ON ID=0 OR +C BI=DBI(Z)/DZ ON ID=1 +C = 2 RETURNS +C BI=CEXP(-AXZTA)*BI(Z) ON ID=0 OR +C BI=CEXP(-AXZTA)*DBI(Z)/DZ ON ID=1 WHERE +C ZTA=(2/3)*Z*CSQRT(Z)=CMPLX(XZTA,YZTA) +C AND AXZTA=ABS(XZTA) +C +C OUTPUT +C BI - COMPLEX ANSWER DEPENDING ON THE CHOICES FOR ID AND +C KODE +C IERR - ERROR FLAG +C IERR=0, NORMAL RETURN - COMPUTATION COMPLETED +C IERR=1, INPUT ERROR - NO COMPUTATION +C IERR=2, OVERFLOW - NO COMPUTATION, REAL(Z) +C TOO LARGE WITH KODE=1 +C IERR=3, CABS(Z) LARGE - COMPUTATION COMPLETED +C LOSSES OF SIGNIFCANCE BY ARGUMENT REDUCTION +C PRODUCE LESS THAN HALF OF MACHINE ACCURACY +C IERR=4, CABS(Z) TOO LARGE - NO COMPUTATION +C COMPLETE LOSS OF ACCURACY BY ARGUMENT +C REDUCTION +C IERR=5, ERROR - NO COMPUTATION, +C ALGORITHM TERMINATION CONDITION NOT MET +C +C***LONG DESCRIPTION +C +C BI AND DBI ARE COMPUTED FOR CABS(Z).GT.1.0 FROM THE I BESSEL +C FUNCTIONS BY +C +C BI(Z)=C*SQRT(Z)*( I(-1/3,ZTA) + I(1/3,ZTA) ) +C DBI(Z)=C * Z * ( I(-2/3,ZTA) + I(2/3,ZTA) ) +C C=1.0/SQRT(3.0) +C ZTA=(2/3)*Z**(3/2) +C +C WITH THE POWER SERIES FOR CABS(Z).LE.1.0. +C +C IN MOST COMPLEX VARIABLE COMPUTATION, ONE MUST EVALUATE ELE- +C MENTARY FUNCTIONS. WHEN THE MAGNITUDE OF Z IS LARGE, LOSSES +C OF SIGNIFICANCE BY ARGUMENT REDUCTION OCCUR. CONSEQUENTLY, IF +C THE MAGNITUDE OF ZETA=(2/3)*Z**1.5 EXCEEDS U1=SQRT(0.5/UR), +C THEN LOSSES EXCEEDING HALF PRECISION ARE LIKELY AND AN ERROR +C FLAG IERR=3 IS TRIGGERED WHERE UR=R1MACH(4)=UNIT ROUNDOFF. +C ALSO, IF THE MAGNITUDE OF ZETA IS LARGER THAN U2=0.5/UR, THEN +C ALL SIGNIFICANCE IS LOST AND IERR=4. IN ORDER TO USE THE INT +C FUNCTION, ZETA MUST BE FURTHER RESTRICTED NOT TO EXCEED THE +C LARGEST INTEGER, U3=I1MACH(9). THUS, THE MAGNITUDE OF ZETA +C MUST BE RESTRICTED BY MIN(U2,U3). ON 32 BIT MACHINES, U1,U2, +C AND U3 ARE APPROXIMATELY 2.0E+3, 4.2E+6, 2.1E+9 IN SINGLE +C PRECISION ARITHMETIC AND 1.3E+8, 1.8E+16, 2.1E+9 IN DOUBLE +C PRECISION ARITHMETIC RESPECTIVELY. THIS MAKES U2 AND U3 LIMIT- +C ING IN THEIR RESPECTIVE ARITHMETICS. THIS MEANS THAT THE MAG- +C NITUDE OF Z CANNOT EXCEED 3.1E+4 IN SINGLE AND 2.1E+6 IN +C DOUBLE PRECISION ARITHMETIC. THIS ALSO MEANS THAT ONE CAN +C EXPECT TO RETAIN, IN THE WORST CASES ON 32 BIT MACHINES, +C NO DIGITS IN SINGLE PRECISION AND ONLY 7 DIGITS IN DOUBLE +C PRECISION ARITHMETIC. +C +C THE APPROXIMATE RELATIVE ERROR IN THE MAGNITUDE OF A COMPLEX +C BESSEL FUNCTION CAN BE EXPRESSED BY P*10**S WHERE P=MAX(UNIT +C ROUNDOFF,1.0E-18) IS THE NOMINAL PRECISION AND 10**S REPRE- +C SENTS THE INCREASE IN ERROR DUE TO ARGUMENT REDUCTION IN THE +C ELEMENTARY FUNCTIONS. HERE, S=MAX(1,ABS(LOG10(CABS(Z))), +C ABS(LOG10(FNU))) APPROXIMATELY (I.E. S=MAX(1,ABS(EXPONENT OF +C CABS(Z),ABS(EXPONENT OF FNU)) ). HOWEVER, THE PHASE ANGLE MAY +C HAVE ONLY ABSOLUTE ACCURACY. THIS IS MOST LIKELY TO OCCUR WHEN +C ONE COMPONENT (IN ABSOLUTE VALUE) IS LARGER THAN THE OTHER BY +C SEVERAL ORDERS OF MAGNITUDE. IF ONE COMPONENT IS 10**K LARGER +C THAN THE OTHER, THEN ONE CAN EXPECT ONLY MAX(ABS(LOG10(P))-K, +C 0) SIGNIFICANT DIGITS; OR, STATED ANOTHER WAY, WHEN K EXCEEDS +C THE EXPONENT OF P, NO SIGNIFICANT DIGITS REMAIN IN THE SMALLER +C COMPONENT. HOWEVER, THE PHASE ANGLE RETAINS ABSOLUTE ACCURACY +C BECAUSE, IN COMPLEX ARITHMETIC WITH PRECISION P, THE SMALLER +C COMPONENT WILL NOT (AS A RULE) DECREASE BELOW P TIMES THE +C MAGNITUDE OF THE LARGER COMPONENT. IN THESE EXTREME CASES, +C THE PRINCIPAL PHASE ANGLE IS ON THE ORDER OF +P, -P, PI/2-P, +C OR -PI/2+P. +C +C***REFERENCES HANDBOOK OF MATHEMATICAL FUNCTIONS BY M. ABRAMOWITZ +C AND I. A. STEGUN, NBS AMS SERIES 55, U.S. DEPT. OF +C COMMERCE, 1955. +C +C COMPUTATION OF BESSEL FUNCTIONS OF COMPLEX ARGUMENT +C AND LARGE ORDER BY D. E. AMOS, SAND83-0643, MAY, 1983 +C +C A SUBROUTINE PACKAGE FOR BESSEL FUNCTIONS OF A COMPLEX +C ARGUMENT AND NONNEGATIVE ORDER BY D. E. AMOS, SAND85- +C 1018, MAY, 1985 +C +C A PORTABLE PACKAGE FOR BESSEL FUNCTIONS OF A COMPLEX +C ARGUMENT AND NONNEGATIVE ORDER BY D. E. AMOS, TRANS. +C MATH. SOFTWARE, 1986 +C +C***ROUTINES CALLED CBINU,I1MACH,R1MACH +C***END PROLOGUE CBIRY + COMPLEX BI, CONE, CSQ, CY, S1, S2, TRM1, TRM2, Z, ZTA, Z3 + REAL AA, AD, AK, ALIM, ATRM, AZ, AZ3, BB, BK, CK, COEF, C1, C2, + * DIG, DK, D1, D2, ELIM, FID, FMR, FNU, FNUL, PI, RL, R1M5, SFAC, + * TOL, TTH, ZI, ZR, Z3I, Z3R, R1MACH + INTEGER ID, IERR, K, KODE, K1, K2, NZ, I1MACH + DIMENSION CY(2) + DATA TTH, C1, C2, COEF, PI /6.66666666666666667E-01, + * 6.14926627446000736E-01,4.48288357353826359E-01, + * 5.77350269189625765E-01,3.14159265358979324E+00/ + DATA CONE / (1.0E0,0.0E0) / +C***FIRST EXECUTABLE STATEMENT CBIRY + IERR = 0 + NZ=0 + IF (ID.LT.0 .OR. ID.GT.1) IERR=1 + IF (KODE.LT.1 .OR. KODE.GT.2) IERR=1 + IF (IERR.NE.0) RETURN + AZ = CABS(Z) + TOL = AMAX1(R1MACH(4),1.0E-18) + FID = FLOAT(ID) + IF (AZ.GT.1.0E0) GO TO 60 +C----------------------------------------------------------------------- +C POWER SERIES FOR CABS(Z).LE.1. +C----------------------------------------------------------------------- + S1 = CONE + S2 = CONE + IF (AZ.LT.TOL) GO TO 110 + AA = AZ*AZ + IF (AA.LT.TOL/AZ) GO TO 40 + TRM1 = CONE + TRM2 = CONE + ATRM = 1.0E0 + Z3 = Z*Z*Z + AZ3 = AZ*AA + AK = 2.0E0 + FID + BK = 3.0E0 - FID - FID + CK = 4.0E0 - FID + DK = 3.0E0 + FID + FID + D1 = AK*DK + D2 = BK*CK + AD = AMIN1(D1,D2) + AK = 24.0E0 + 9.0E0*FID + BK = 30.0E0 - 9.0E0*FID + Z3R = REAL(Z3) + Z3I = AIMAG(Z3) + DO 30 K=1,25 + TRM1 = TRM1*CMPLX(Z3R/D1,Z3I/D1) + S1 = S1 + TRM1 + TRM2 = TRM2*CMPLX(Z3R/D2,Z3I/D2) + S2 = S2 + TRM2 + ATRM = ATRM*AZ3/AD + D1 = D1 + AK + D2 = D2 + BK + AD = AMIN1(D1,D2) + IF (ATRM.LT.TOL*AD) GO TO 40 + AK = AK + 18.0E0 + BK = BK + 18.0E0 + 30 CONTINUE + 40 CONTINUE + IF (ID.EQ.1) GO TO 50 + BI = S1*CMPLX(C1,0.0E0) + Z*S2*CMPLX(C2,0.0E0) + IF (KODE.EQ.1) RETURN + ZTA = Z*CSQRT(Z)*CMPLX(TTH,0.0E0) + AA = REAL(ZTA) + AA = -ABS(AA) + BI = BI*CMPLX(EXP(AA),0.0E0) + RETURN + 50 CONTINUE + BI = S2*CMPLX(C2,0.0E0) + IF (AZ.GT.TOL) BI = BI + Z*Z*S1*CMPLX(C1/(1.0E0+FID),0.0E0) + IF (KODE.EQ.1) RETURN + ZTA = Z*CSQRT(Z)*CMPLX(TTH,0.0E0) + AA = REAL(ZTA) + AA = -ABS(AA) + BI = BI*CMPLX(EXP(AA),0.0E0) + RETURN +C----------------------------------------------------------------------- +C CASE FOR CABS(Z).GT.1.0 +C----------------------------------------------------------------------- + 60 CONTINUE + FNU = (1.0E0+FID)/3.0E0 +C----------------------------------------------------------------------- +C SET PARAMETERS RELATED TO MACHINE CONSTANTS. +C TOL IS THE APPROXIMATE UNIT ROUNDOFF LIMITED TO 1.0E-18. +C ELIM IS THE APPROXIMATE EXPONENTIAL OVER- AND UNDERFLOW LIMIT. +C EXP(-ELIM).LT.EXP(-ALIM)=EXP(-ELIM)/TOL AND +C EXP(ELIM).GT.EXP(ALIM)=EXP(ELIM)*TOL ARE INTERVALS NEAR +C UNDERFLOW AND OVERFLOW LIMITS WHERE SCALED ARITHMETIC IS DONE. +C RL IS THE LOWER BOUNDARY OF THE ASYMPTOTIC EXPANSION FOR LARGE Z. +C DIG = NUMBER OF BASE 10 DIGITS IN TOL = 10**(-DIG). +C FNUL IS THE LOWER BOUNDARY OF THE ASYMPTOTIC SERIES FOR LARGE FNU. +C----------------------------------------------------------------------- + K1 = I1MACH(12) + K2 = I1MACH(13) + R1M5 = R1MACH(5) + K = MIN0(IABS(K1),IABS(K2)) + ELIM = 2.303E0*(FLOAT(K)*R1M5-3.0E0) + K1 = I1MACH(11) - 1 + AA = R1M5*FLOAT(K1) + DIG = AMIN1(AA,18.0E0) + AA = AA*2.303E0 + ALIM = ELIM + AMAX1(-AA,-41.45E0) + RL = 1.2E0*DIG + 3.0E0 + FNUL = 10.0E0 + 6.0E0*(DIG-3.0E0) +C----------------------------------------------------------------------- +C TEST FOR RANGE +C----------------------------------------------------------------------- + AA=0.5E0/TOL + BB=FLOAT(I1MACH(9))*0.5E0 + AA=AMIN1(AA,BB) + AA=AA**TTH + IF (AZ.GT.AA) GO TO 190 + AA=SQRT(AA) + IF (AZ.GT.AA) IERR=3 + CSQ=CSQRT(Z) + ZTA=Z*CSQ*CMPLX(TTH,0.0E0) +C----------------------------------------------------------------------- +C RE(ZTA).LE.0 WHEN RE(Z).LT.0, ESPECIALLY WHEN IM(Z) IS SMALL +C----------------------------------------------------------------------- + SFAC = 1.0E0 + ZI = AIMAG(Z) + ZR = REAL(Z) + AK = AIMAG(ZTA) + IF (ZR.GE.0.0E0) GO TO 70 + BK = REAL(ZTA) + CK = -ABS(BK) + ZTA = CMPLX(CK,AK) + 70 CONTINUE + IF (ZI.EQ.0.0E0 .AND. ZR.LE.0.0E0) ZTA = CMPLX(0.0E0,AK) + AA = REAL(ZTA) + IF (KODE.EQ.2) GO TO 80 +C----------------------------------------------------------------------- +C OVERFLOW TEST +C----------------------------------------------------------------------- + BB = ABS(AA) + IF (BB.LT.ALIM) GO TO 80 + BB = BB + 0.25E0*ALOG(AZ) + SFAC = TOL + IF (BB.GT.ELIM) GO TO 170 + 80 CONTINUE + FMR = 0.0E0 + IF (AA.GE.0.0E0 .AND. ZR.GT.0.0E0) GO TO 90 + FMR = PI + IF (ZI.LT.0.0E0) FMR = -PI + ZTA = -ZTA + 90 CONTINUE +C----------------------------------------------------------------------- +C AA=FACTOR FOR ANALYTIC CONTINUATION OF I(FNU,ZTA) +C KODE=2 RETURNS EXP(-ABS(XZTA))*I(FNU,ZTA) FROM CBINU +C----------------------------------------------------------------------- + CALL CBINU(ZTA, FNU, KODE, 1, CY, NZ, RL, FNUL, TOL, ELIM, ALIM) + IF (NZ.LT.0) GO TO 180 + AA = FMR*FNU + Z3 = CMPLX(SFAC,0.0E0) + S1 = CY(1)*CMPLX(COS(AA),SIN(AA))*Z3 + FNU = (2.0E0-FID)/3.0E0 + CALL CBINU(ZTA, FNU, KODE, 2, CY, NZ, RL, FNUL, TOL, ELIM, ALIM) + CY(1) = CY(1)*Z3 + CY(2) = CY(2)*Z3 +C----------------------------------------------------------------------- +C BACKWARD RECUR ONE STEP FOR ORDERS -1/3 OR -2/3 +C----------------------------------------------------------------------- + S2 = CY(1)*CMPLX(FNU+FNU,0.0E0)/ZTA + CY(2) + AA = FMR*(FNU-1.0E0) + S1 = (S1+S2*CMPLX(COS(AA),SIN(AA)))*CMPLX(COEF,0.0E0) + IF (ID.EQ.1) GO TO 100 + S1 = CSQ*S1 + BI = S1*CMPLX(1.0E0/SFAC,0.0E0) + RETURN + 100 CONTINUE + S1 = Z*S1 + BI = S1*CMPLX(1.0E0/SFAC,0.0E0) + RETURN + 110 CONTINUE + AA = C1*(1.0E0-FID) + FID*C2 + BI = CMPLX(AA,0.0E0) + RETURN + 170 CONTINUE + NZ=0 + IERR=2 + RETURN + 180 CONTINUE + IF(NZ.EQ.(-1)) GO TO 170 + NZ=0 + IERR=5 + RETURN + 190 CONTINUE + IERR=4 + NZ=0 + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/amos/cbknu.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/amos/cbknu.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,455 @@ + SUBROUTINE CBKNU(Z, FNU, KODE, N, Y, NZ, TOL, ELIM, ALIM) +C***BEGIN PROLOGUE CBKNU +C***REFER TO CBESI,CBESK,CAIRY,CBESH +C +C CBKNU COMPUTES THE K BESSEL FUNCTION IN THE RIGHT HALF Z PLANE +C +C***ROUTINES CALLED CKSCL,CSHCH,GAMLN,I1MACH,R1MACH,CUCHK +C***END PROLOGUE CBKNU +C + COMPLEX CCH, CK, COEF, CONE, CRSC, CS, CSCL, CSH, CSR, CSS, CTWO, + * CZ, CZERO, F, FMU, P, PT, P1, P2, Q, RZ, SMU, ST, S1, S2, Y, Z, + * ZD, CELM, CY + REAL AA, AK, ALIM, ASCLE, A1, A2, BB, BK, BRY, CAZ, CC, DNU, + * DNU2, ELIM, ETEST, FC, FHS, FK, FKS, FNU, FPI, G1, G2, HPI, PI, + * P2I, P2M, P2R, RK, RTHPI, R1, S, SPI, TM, TOL, TTH, T1, T2, XX, + * YY, GAMLN, R1MACH, HELIM, ELM, XD, YD, ALAS, AS + INTEGER I, IDUM, IFLAG, INU, K, KFLAG, KK, KMAX, KODE, KODED, N, + * NZ, I1MACH, NW, J, IC, INUB + DIMENSION BRY(3), CC(8), CSS(3), CSR(3), Y(N), CY(2) +C + DATA KMAX / 30 / + DATA R1 / 2.0E0 / + DATA CZERO,CONE,CTWO /(0.0E0,0.0E0),(1.0E0,0.0E0),(2.0E0,0.0E0)/ +C + DATA PI, RTHPI, SPI ,HPI, FPI, TTH / + 1 3.14159265358979324E0, 1.25331413731550025E0, + 2 1.90985931710274403E0, 1.57079632679489662E0, + 3 1.89769999331517738E0, 6.66666666666666666E-01/ +C + DATA CC(1), CC(2), CC(3), CC(4), CC(5), CC(6), CC(7), CC(8)/ + 1 5.77215664901532861E-01, -4.20026350340952355E-02, + 2 -4.21977345555443367E-02, 7.21894324666309954E-03, + 3 -2.15241674114950973E-04, -2.01348547807882387E-05, + 4 1.13302723198169588E-06, 6.11609510448141582E-09/ +C + XX = REAL(Z) + YY = AIMAG(Z) + CAZ = CABS(Z) + CSCL = CMPLX(1.0E0/TOL,0.0E0) + CRSC = CMPLX(TOL,0.0E0) + CSS(1) = CSCL + CSS(2) = CONE + CSS(3) = CRSC + CSR(1) = CRSC + CSR(2) = CONE + CSR(3) = CSCL + BRY(1) = 1.0E+3*R1MACH(1)/TOL + BRY(2) = 1.0E0/BRY(1) + BRY(3) = R1MACH(2) + NZ = 0 + IFLAG = 0 + KODED = KODE + RZ = CTWO/Z + INU = INT(FNU+0.5E0) + DNU = FNU - FLOAT(INU) + IF (ABS(DNU).EQ.0.5E0) GO TO 110 + DNU2 = 0.0E0 + IF (ABS(DNU).GT.TOL) DNU2 = DNU*DNU + IF (CAZ.GT.R1) GO TO 110 +C----------------------------------------------------------------------- +C SERIES FOR CABS(Z).LE.R1 +C----------------------------------------------------------------------- + FC = 1.0E0 + SMU = CLOG(RZ) + FMU = SMU*CMPLX(DNU,0.0E0) + CALL CSHCH(FMU, CSH, CCH) + IF (DNU.EQ.0.0E0) GO TO 10 + FC = DNU*PI + FC = FC/SIN(FC) + SMU = CSH*CMPLX(1.0E0/DNU,0.0E0) + 10 CONTINUE + A2 = 1.0E0 + DNU +C----------------------------------------------------------------------- +C GAM(1-Z)*GAM(1+Z)=PI*Z/SIN(PI*Z), T1=1/GAM(1-DNU), T2=1/GAM(1+DNU) +C----------------------------------------------------------------------- + T2 = EXP(-GAMLN(A2,IDUM)) + T1 = 1.0E0/(T2*FC) + IF (ABS(DNU).GT.0.1E0) GO TO 40 +C----------------------------------------------------------------------- +C SERIES FOR F0 TO RESOLVE INDETERMINACY FOR SMALL ABS(DNU) +C----------------------------------------------------------------------- + AK = 1.0E0 + S = CC(1) + DO 20 K=2,8 + AK = AK*DNU2 + TM = CC(K)*AK + S = S + TM + IF (ABS(TM).LT.TOL) GO TO 30 + 20 CONTINUE + 30 G1 = -S + GO TO 50 + 40 CONTINUE + G1 = (T1-T2)/(DNU+DNU) + 50 CONTINUE + G2 = 0.5E0*(T1+T2)*FC + G1 = G1*FC + F = CMPLX(G1,0.0E0)*CCH + SMU*CMPLX(G2,0.0E0) + PT = CEXP(FMU) + P = CMPLX(0.5E0/T2,0.0E0)*PT + Q = CMPLX(0.5E0/T1,0.0E0)/PT + S1 = F + S2 = P + AK = 1.0E0 + A1 = 1.0E0 + CK = CONE + BK = 1.0E0 - DNU2 + IF (INU.GT.0 .OR. N.GT.1) GO TO 80 +C----------------------------------------------------------------------- +C GENERATE K(FNU,Z), 0.0D0 .LE. FNU .LT. 0.5D0 AND N=1 +C----------------------------------------------------------------------- + IF (CAZ.LT.TOL) GO TO 70 + CZ = Z*Z*CMPLX(0.25E0,0.0E0) + T1 = 0.25E0*CAZ*CAZ + 60 CONTINUE + F = (F*CMPLX(AK,0.0E0)+P+Q)*CMPLX(1.0E0/BK,0.0E0) + P = P*CMPLX(1.0E0/(AK-DNU),0.0E0) + Q = Q*CMPLX(1.0E0/(AK+DNU),0.0E0) + RK = 1.0E0/AK + CK = CK*CZ*CMPLX(RK,0.0) + S1 = S1 + CK*F + A1 = A1*T1*RK + BK = BK + AK + AK + 1.0E0 + AK = AK + 1.0E0 + IF (A1.GT.TOL) GO TO 60 + 70 CONTINUE + Y(1) = S1 + IF (KODED.EQ.1) RETURN + Y(1) = S1*CEXP(Z) + RETURN +C----------------------------------------------------------------------- +C GENERATE K(DNU,Z) AND K(DNU+1,Z) FOR FORWARD RECURRENCE +C----------------------------------------------------------------------- + 80 CONTINUE + IF (CAZ.LT.TOL) GO TO 100 + CZ = Z*Z*CMPLX(0.25E0,0.0E0) + T1 = 0.25E0*CAZ*CAZ + 90 CONTINUE + F = (F*CMPLX(AK,0.0E0)+P+Q)*CMPLX(1.0E0/BK,0.0E0) + P = P*CMPLX(1.0E0/(AK-DNU),0.0E0) + Q = Q*CMPLX(1.0E0/(AK+DNU),0.0E0) + RK = 1.0E0/AK + CK = CK*CZ*CMPLX(RK,0.0E0) + S1 = S1 + CK*F + S2 = S2 + CK*(P-F*CMPLX(AK,0.0E0)) + A1 = A1*T1*RK + BK = BK + AK + AK + 1.0E0 + AK = AK + 1.0E0 + IF (A1.GT.TOL) GO TO 90 + 100 CONTINUE + KFLAG = 2 + BK = REAL(SMU) + A1 = FNU + 1.0E0 + AK = A1*ABS(BK) + IF (AK.GT.ALIM) KFLAG = 3 + P2 = S2*CSS(KFLAG) + S2 = P2*RZ + S1 = S1*CSS(KFLAG) + IF (KODED.EQ.1) GO TO 210 + F = CEXP(Z) + S1 = S1*F + S2 = S2*F + GO TO 210 +C----------------------------------------------------------------------- +C IFLAG=0 MEANS NO UNDERFLOW OCCURRED +C IFLAG=1 MEANS AN UNDERFLOW OCCURRED- COMPUTATION PROCEEDS WITH +C KODED=2 AND A TEST FOR ON SCALE VALUES IS MADE DURING FORWARD +C RECURSION +C----------------------------------------------------------------------- + 110 CONTINUE + COEF = CMPLX(RTHPI,0.0E0)/CSQRT(Z) + KFLAG = 2 + IF (KODED.EQ.2) GO TO 120 + IF (XX.GT.ALIM) GO TO 290 +C BLANK LINE + A1 = EXP(-XX)*REAL(CSS(KFLAG)) + PT = CMPLX(A1,0.0E0)*CMPLX(COS(YY),-SIN(YY)) + COEF = COEF*PT + 120 CONTINUE + IF (ABS(DNU).EQ.0.5E0) GO TO 300 +C----------------------------------------------------------------------- +C MILLER ALGORITHM FOR CABS(Z).GT.R1 +C----------------------------------------------------------------------- + AK = COS(PI*DNU) + AK = ABS(AK) + IF (AK.EQ.0.0E0) GO TO 300 + FHS = ABS(0.25E0-DNU2) + IF (FHS.EQ.0.0E0) GO TO 300 +C----------------------------------------------------------------------- +C COMPUTE R2=F(E). IF CABS(Z).GE.R2, USE FORWARD RECURRENCE TO +C DETERMINE THE BACKWARD INDEX K. R2=F(E) IS A STRAIGHT LINE ON +C 12.LE.E.LE.60. E IS COMPUTED FROM 2**(-E)=B**(1-I1MACH(11))= +C TOL WHERE B IS THE BASE OF THE ARITHMETIC. +C----------------------------------------------------------------------- + T1 = FLOAT(I1MACH(11)-1)*R1MACH(5)*3.321928094E0 + T1 = AMAX1(T1,12.0E0) + T1 = AMIN1(T1,60.0E0) + T2 = TTH*T1 - 6.0E0 + IF (XX.NE.0.0E0) GO TO 130 + T1 = HPI + GO TO 140 + 130 CONTINUE + T1 = ATAN(YY/XX) + T1 = ABS(T1) + 140 CONTINUE + IF (T2.GT.CAZ) GO TO 170 +C----------------------------------------------------------------------- +C FORWARD RECURRENCE LOOP WHEN CABS(Z).GE.R2 +C----------------------------------------------------------------------- + ETEST = AK/(PI*CAZ*TOL) + FK = 1.0E0 + IF (ETEST.LT.1.0E0) GO TO 180 + FKS = 2.0E0 + RK = CAZ + CAZ + 2.0E0 + A1 = 0.0E0 + A2 = 1.0E0 + DO 150 I=1,KMAX + AK = FHS/FKS + BK = RK/(FK+1.0E0) + TM = A2 + A2 = BK*A2 - AK*A1 + A1 = TM + RK = RK + 2.0E0 + FKS = FKS + FK + FK + 2.0E0 + FHS = FHS + FK + FK + FK = FK + 1.0E0 + TM = ABS(A2)*FK + IF (ETEST.LT.TM) GO TO 160 + 150 CONTINUE + GO TO 310 + 160 CONTINUE + FK = FK + SPI*T1*SQRT(T2/CAZ) + FHS = ABS(0.25E0-DNU2) + GO TO 180 + 170 CONTINUE +C----------------------------------------------------------------------- +C COMPUTE BACKWARD INDEX K FOR CABS(Z).LT.R2 +C----------------------------------------------------------------------- + A2 = SQRT(CAZ) + AK = FPI*AK/(TOL*SQRT(A2)) + AA = 3.0E0*T1/(1.0E0+CAZ) + BB = 14.7E0*T1/(28.0E0+CAZ) + AK = (ALOG(AK)+CAZ*COS(AA)/(1.0E0+0.008E0*CAZ))/COS(BB) + FK = 0.12125E0*AK*AK/CAZ + 1.5E0 + 180 CONTINUE + K = INT(FK) +C----------------------------------------------------------------------- +C BACKWARD RECURRENCE LOOP FOR MILLER ALGORITHM +C----------------------------------------------------------------------- + FK = FLOAT(K) + FKS = FK*FK + P1 = CZERO + P2 = CMPLX(TOL,0.0E0) + CS = P2 + DO 190 I=1,K + A1 = FKS - FK + A2 = (FKS+FK)/(A1+FHS) + RK = 2.0E0/(FK+1.0E0) + T1 = (FK+XX)*RK + T2 = YY*RK + PT = P2 + P2 = (P2*CMPLX(T1,T2)-P1)*CMPLX(A2,0.0E0) + P1 = PT + CS = CS + P2 + FKS = A1 - FK + 1.0E0 + FK = FK - 1.0E0 + 190 CONTINUE +C----------------------------------------------------------------------- +C COMPUTE (P2/CS)=(P2/CABS(CS))*(CONJG(CS)/CABS(CS)) FOR BETTER +C SCALING +C----------------------------------------------------------------------- + TM = CABS(CS) + PT = CMPLX(1.0E0/TM,0.0E0) + S1 = PT*P2 + CS = CONJG(CS)*PT + S1 = COEF*S1*CS + IF (INU.GT.0 .OR. N.GT.1) GO TO 200 + ZD = Z + IF(IFLAG.EQ.1) GO TO 270 + GO TO 240 + 200 CONTINUE +C----------------------------------------------------------------------- +C COMPUTE P1/P2=(P1/CABS(P2)*CONJG(P2)/CABS(P2) FOR SCALING +C----------------------------------------------------------------------- + TM = CABS(P2) + PT = CMPLX(1.0E0/TM,0.0E0) + P1 = PT*P1 + P2 = CONJG(P2)*PT + PT = P1*P2 + S2 = S1*(CONE+(CMPLX(DNU+0.5E0,0.0E0)-PT)/Z) +C----------------------------------------------------------------------- +C FORWARD RECURSION ON THE THREE TERM RECURSION RELATION WITH +C SCALING NEAR EXPONENT EXTREMES ON KFLAG=1 OR KFLAG=3 +C----------------------------------------------------------------------- + 210 CONTINUE + CK = CMPLX(DNU+1.0E0,0.0E0)*RZ + IF (N.EQ.1) INU = INU - 1 + IF (INU.GT.0) GO TO 220 + IF (N.EQ.1) S1=S2 + ZD = Z + IF(IFLAG.EQ.1) GO TO 270 + GO TO 240 + 220 CONTINUE + INUB = 1 + IF (IFLAG.EQ.1) GO TO 261 + 225 CONTINUE + P1 = CSR(KFLAG) + ASCLE = BRY(KFLAG) + DO 230 I=INUB,INU + ST = S2 + S2 = CK*S2 + S1 + S1 = ST + CK = CK + RZ + IF (KFLAG.GE.3) GO TO 230 + P2 = S2*P1 + P2R = REAL(P2) + P2I = AIMAG(P2) + P2R = ABS(P2R) + P2I = ABS(P2I) + P2M = AMAX1(P2R,P2I) + IF (P2M.LE.ASCLE) GO TO 230 + KFLAG = KFLAG + 1 + ASCLE = BRY(KFLAG) + S1 = S1*P1 + S2 = P2 + S1 = S1*CSS(KFLAG) + S2 = S2*CSS(KFLAG) + P1 = CSR(KFLAG) + 230 CONTINUE + IF (N.EQ.1) S1 = S2 + 240 CONTINUE + Y(1) = S1*CSR(KFLAG) + IF (N.EQ.1) RETURN + Y(2) = S2*CSR(KFLAG) + IF (N.EQ.2) RETURN + KK = 2 + 250 CONTINUE + KK = KK + 1 + IF (KK.GT.N) RETURN + P1 = CSR(KFLAG) + ASCLE = BRY(KFLAG) + DO 260 I=KK,N + P2 = S2 + S2 = CK*S2 + S1 + S1 = P2 + CK = CK + RZ + P2 = S2*P1 + Y(I) = P2 + IF (KFLAG.GE.3) GO TO 260 + P2R = REAL(P2) + P2I = AIMAG(P2) + P2R = ABS(P2R) + P2I = ABS(P2I) + P2M = AMAX1(P2R,P2I) + IF (P2M.LE.ASCLE) GO TO 260 + KFLAG = KFLAG + 1 + ASCLE = BRY(KFLAG) + S1 = S1*P1 + S2 = P2 + S1 = S1*CSS(KFLAG) + S2 = S2*CSS(KFLAG) + P1 = CSR(KFLAG) + 260 CONTINUE + RETURN +C----------------------------------------------------------------------- +C IFLAG=1 CASES, FORWARD RECURRENCE ON SCALED VALUES ON UNDERFLOW +C----------------------------------------------------------------------- + 261 CONTINUE + HELIM = 0.5E0*ELIM + ELM = EXP(-ELIM) + CELM = CMPLX(ELM,0.0) + ASCLE = BRY(1) + ZD = Z + XD = XX + YD = YY + IC = -1 + J = 2 + DO 262 I=1,INU + ST = S2 + S2 = CK*S2+S1 + S1 = ST + CK = CK+RZ + AS = CABS(S2) + ALAS = ALOG(AS) + P2R = -XD+ALAS + IF(P2R.LT.(-ELIM)) GO TO 263 + P2 = -ZD+CLOG(S2) + P2R = REAL(P2) + P2I = AIMAG(P2) + P2M = EXP(P2R)/TOL + P1 = CMPLX(P2M,0.0E0)*CMPLX(COS(P2I),SIN(P2I)) + CALL CUCHK(P1,NW,ASCLE,TOL) + IF(NW.NE.0) GO TO 263 + J=3-J + CY(J) = P1 + IF(IC.EQ.(I-1)) GO TO 264 + IC = I + GO TO 262 + 263 CONTINUE + IF(ALAS.LT.HELIM) GO TO 262 + XD = XD-ELIM + S1 = S1*CELM + S2 = S2*CELM + ZD = CMPLX(XD,YD) + 262 CONTINUE + IF(N.EQ.1) S1 = S2 + GO TO 270 + 264 CONTINUE + KFLAG = 1 + INUB = I+1 + S2 = CY(J) + J = 3 - J + S1 = CY(J) + IF(INUB.LE.INU) GO TO 225 + IF(N.EQ.1) S1 = S2 + GO TO 240 + 270 CONTINUE + Y(1) = S1 + IF (N.EQ.1) GO TO 280 + Y(2) = S2 + 280 CONTINUE + ASCLE = BRY(1) + CALL CKSCL(ZD, FNU, N, Y, NZ, RZ, ASCLE, TOL, ELIM) + INU = N - NZ + IF (INU.LE.0) RETURN + KK = NZ + 1 + S1 = Y(KK) + Y(KK) = S1*CSR(1) + IF (INU.EQ.1) RETURN + KK = NZ + 2 + S2 = Y(KK) + Y(KK) = S2*CSR(1) + IF (INU.EQ.2) RETURN + T2 = FNU + FLOAT(KK-1) + CK = CMPLX(T2,0.0E0)*RZ + KFLAG = 1 + GO TO 250 + 290 CONTINUE +C----------------------------------------------------------------------- +C SCALE BY EXP(Z), IFLAG = 1 CASES +C----------------------------------------------------------------------- + KODED = 2 + IFLAG = 1 + KFLAG = 2 + GO TO 120 +C----------------------------------------------------------------------- +C FNU=HALF ODD INTEGER CASE, DNU=-0.5 +C----------------------------------------------------------------------- + 300 CONTINUE + S1 = COEF + S2 = COEF + GO TO 210 + 310 CONTINUE + NZ=-2 + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/amos/cbuni.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/amos/cbuni.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,158 @@ + SUBROUTINE CBUNI(Z, FNU, KODE, N, Y, NZ, NUI, NLAST, FNUL, TOL, + * ELIM, ALIM) +C***BEGIN PROLOGUE CBUNI +C***REFER TO CBESI,CBESK +C +C CBUNI COMPUTES THE I BESSEL FUNCTION FOR LARGE CABS(Z).GT. +C FNUL AND FNU+N-1.LT.FNUL. THE ORDER IS INCREASED FROM +C FNU+N-1 GREATER THAN FNUL BY ADDING NUI AND COMPUTING +C ACCORDING TO THE UNIFORM ASYMPTOTIC EXPANSION FOR I(FNU,Z) +C ON IFORM=1 AND THE EXPANSION FOR J(FNU,Z) ON IFORM=2 +C +C***ROUTINES CALLED CUNI1,CUNI2,R1MACH +C***END PROLOGUE CBUNI + COMPLEX CSCL, CSCR, CY, RZ, ST, S1, S2, Y, Z + REAL ALIM, AX, AY, DFNU, ELIM, FNU, FNUI, FNUL, GNU, TOL, XX, YY, + * ASCLE, BRY, STR, STI, STM, R1MACH + INTEGER I, IFLAG, IFORM, K, KODE, N, NL, NLAST, NUI, NW, NZ + DIMENSION Y(N), CY(2), BRY(3) + NZ = 0 + XX = REAL(Z) + YY = AIMAG(Z) + AX = ABS(XX)*1.7321E0 + AY = ABS(YY) + IFORM = 1 + IF (AY.GT.AX) IFORM = 2 + IF (NUI.EQ.0) GO TO 60 + FNUI = FLOAT(NUI) + DFNU = FNU + FLOAT(N-1) + GNU = DFNU + FNUI + IF (IFORM.EQ.2) GO TO 10 +C----------------------------------------------------------------------- +C ASYMPTOTIC EXPANSION FOR I(FNU,Z) FOR LARGE FNU APPLIED IN +C -PI/3.LE.ARG(Z).LE.PI/3 +C----------------------------------------------------------------------- + CALL CUNI1(Z, GNU, KODE, 2, CY, NW, NLAST, FNUL, TOL, ELIM, ALIM) + GO TO 20 + 10 CONTINUE +C----------------------------------------------------------------------- +C ASYMPTOTIC EXPANSION FOR J(FNU,Z*EXP(M*HPI)) FOR LARGE FNU +C APPLIED IN PI/3.LT.ABS(ARG(Z)).LE.PI/2 WHERE M=+I OR -I +C AND HPI=PI/2 +C----------------------------------------------------------------------- + CALL CUNI2(Z, GNU, KODE, 2, CY, NW, NLAST, FNUL, TOL, ELIM, ALIM) + 20 CONTINUE + IF (NW.LT.0) GO TO 50 + IF (NW.NE.0) GO TO 90 + AY = CABS(CY(1)) +C---------------------------------------------------------------------- +C SCALE BACKWARD RECURRENCE, BRY(3) IS DEFINED BUT NEVER USED +C---------------------------------------------------------------------- + BRY(1) = 1.0E+3*R1MACH(1)/TOL + BRY(2) = 1.0E0/BRY(1) + BRY(3) = BRY(2) + IFLAG = 2 + ASCLE = BRY(2) + AX = 1.0E0 + CSCL = CMPLX(AX,0.0E0) + IF (AY.GT.BRY(1)) GO TO 21 + IFLAG = 1 + ASCLE = BRY(1) + AX = 1.0E0/TOL + CSCL = CMPLX(AX,0.0E0) + GO TO 25 + 21 CONTINUE + IF (AY.LT.BRY(2)) GO TO 25 + IFLAG = 3 + ASCLE = BRY(3) + AX = TOL + CSCL = CMPLX(AX,0.0E0) + 25 CONTINUE + AY = 1.0E0/AX + CSCR = CMPLX(AY,0.0E0) + S1 = CY(2)*CSCL + S2 = CY(1)*CSCL + RZ = CMPLX(2.0E0,0.0E0)/Z + DO 30 I=1,NUI + ST = S2 + S2 = CMPLX(DFNU+FNUI,0.0E0)*RZ*S2 + S1 + S1 = ST + FNUI = FNUI - 1.0E0 + IF (IFLAG.GE.3) GO TO 30 + ST = S2*CSCR + STR = REAL(ST) + STI = AIMAG(ST) + STR = ABS(STR) + STI = ABS(STI) + STM = AMAX1(STR,STI) + IF (STM.LE.ASCLE) GO TO 30 + IFLAG = IFLAG+1 + ASCLE = BRY(IFLAG) + S1 = S1*CSCR + S2 = ST + AX = AX*TOL + AY = 1.0E0/AX + CSCL = CMPLX(AX,0.0E0) + CSCR = CMPLX(AY,0.0E0) + S1 = S1*CSCL + S2 = S2*CSCL + 30 CONTINUE + Y(N) = S2*CSCR + IF (N.EQ.1) RETURN + NL = N - 1 + FNUI = FLOAT(NL) + K = NL + DO 40 I=1,NL + ST = S2 + S2 = CMPLX(FNU+FNUI,0.0E0)*RZ*S2 + S1 + S1 = ST + ST = S2*CSCR + Y(K) = ST + FNUI = FNUI - 1.0E0 + K = K - 1 + IF (IFLAG.GE.3) GO TO 40 + STR = REAL(ST) + STI = AIMAG(ST) + STR = ABS(STR) + STI = ABS(STI) + STM = AMAX1(STR,STI) + IF (STM.LE.ASCLE) GO TO 40 + IFLAG = IFLAG+1 + ASCLE = BRY(IFLAG) + S1 = S1*CSCR + S2 = ST + AX = AX*TOL + AY = 1.0E0/AX + CSCL = CMPLX(AX,0.0E0) + CSCR = CMPLX(AY,0.0E0) + S1 = S1*CSCL + S2 = S2*CSCL + 40 CONTINUE + RETURN + 50 CONTINUE + NZ = -1 + IF(NW.EQ.(-2)) NZ=-2 + RETURN + 60 CONTINUE + IF (IFORM.EQ.2) GO TO 70 +C----------------------------------------------------------------------- +C ASYMPTOTIC EXPANSION FOR I(FNU,Z) FOR LARGE FNU APPLIED IN +C -PI/3.LE.ARG(Z).LE.PI/3 +C----------------------------------------------------------------------- + CALL CUNI1(Z, FNU, KODE, N, Y, NW, NLAST, FNUL, TOL, ELIM, ALIM) + GO TO 80 + 70 CONTINUE +C----------------------------------------------------------------------- +C ASYMPTOTIC EXPANSION FOR J(FNU,Z*EXP(M*HPI)) FOR LARGE FNU +C APPLIED IN PI/3.LT.ABS(ARG(Z)).LE.PI/2 WHERE M=+I OR -I +C AND HPI=PI/2 +C----------------------------------------------------------------------- + CALL CUNI2(Z, FNU, KODE, N, Y, NW, NLAST, FNUL, TOL, ELIM, ALIM) + 80 CONTINUE + IF (NW.LT.0) GO TO 50 + NZ = NW + RETURN + 90 CONTINUE + NLAST = N + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/amos/cbunk.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/amos/cbunk.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,36 @@ + SUBROUTINE CBUNK(Z, FNU, KODE, MR, N, Y, NZ, TOL, ELIM, ALIM) +C***BEGIN PROLOGUE CBUNK +C***REFER TO CBESK,CBESH +C +C CBUNK COMPUTES THE K BESSEL FUNCTION FOR FNU.GT.FNUL. +C ACCORDING TO THE UNIFORM ASYMPTOTIC EXPANSION FOR K(FNU,Z) +C IN CUNK1 AND THE EXPANSION FOR H(2,FNU,Z) IN CUNK2 +C +C***ROUTINES CALLED CUNK1,CUNK2 +C***END PROLOGUE CBUNK + COMPLEX Y, Z + REAL ALIM, AX, AY, ELIM, FNU, TOL, XX, YY + INTEGER KODE, MR, N, NZ + DIMENSION Y(N) + NZ = 0 + XX = REAL(Z) + YY = AIMAG(Z) + AX = ABS(XX)*1.7321E0 + AY = ABS(YY) + IF (AY.GT.AX) GO TO 10 +C----------------------------------------------------------------------- +C ASYMPTOTIC EXPANSION FOR K(FNU,Z) FOR LARGE FNU APPLIED IN +C -PI/3.LE.ARG(Z).LE.PI/3 +C----------------------------------------------------------------------- + CALL CUNK1(Z, FNU, KODE, MR, N, Y, NZ, TOL, ELIM, ALIM) + GO TO 20 + 10 CONTINUE +C----------------------------------------------------------------------- +C ASYMPTOTIC EXPANSION FOR H(2,FNU,Z*EXP(M*HPI)) FOR LARGE FNU +C APPLIED IN PI/3.LT.ABS(ARG(Z)).LE.PI/2 WHERE M=+I OR -I +C AND HPI=PI/2 +C----------------------------------------------------------------------- + CALL CUNK2(Z, FNU, KODE, MR, N, Y, NZ, TOL, ELIM, ALIM) + 20 CONTINUE + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/amos/ckscl.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/amos/ckscl.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,102 @@ + SUBROUTINE CKSCL(ZR, FNU, N, Y, NZ, RZ, ASCLE, TOL, ELIM) +C***BEGIN PROLOGUE CKSCL +C***REFER TO CBKNU,CUNK1,CUNK2 +C +C SET K FUNCTIONS TO ZERO ON UNDERFLOW, CONTINUE RECURRENCE +C ON SCALED FUNCTIONS UNTIL TWO MEMBERS COME ON SCALE, THEN +C RETURN WITH MIN(NZ+2,N) VALUES SCALED BY 1/TOL. +C +C***ROUTINES CALLED CUCHK +C***END PROLOGUE CKSCL + COMPLEX CK, CS, CY, CZERO, RZ, S1, S2, Y, ZR, ZD, CELM + REAL AA, ASCLE, ACS, AS, CSI, CSR, ELIM, FN, FNU, TOL, XX, ZRI, + * ELM, ALAS, HELIM + INTEGER I, IC, K, KK, N, NN, NW, NZ + DIMENSION Y(N), CY(2) + DATA CZERO / (0.0E0,0.0E0) / +C + NZ = 0 + IC = 0 + XX = REAL(ZR) + NN = MIN0(2,N) + DO 10 I=1,NN + S1 = Y(I) + CY(I) = S1 + AS = CABS(S1) + ACS = -XX + ALOG(AS) + NZ = NZ + 1 + Y(I) = CZERO + IF (ACS.LT.(-ELIM)) GO TO 10 + CS = -ZR + CLOG(S1) + CSR = REAL(CS) + CSI = AIMAG(CS) + AA = EXP(CSR)/TOL + CS = CMPLX(AA,0.0E0)*CMPLX(COS(CSI),SIN(CSI)) + CALL CUCHK(CS, NW, ASCLE, TOL) + IF (NW.NE.0) GO TO 10 + Y(I) = CS + NZ = NZ - 1 + IC = I + 10 CONTINUE + IF (N.EQ.1) RETURN + IF (IC.GT.1) GO TO 20 + Y(1) = CZERO + NZ = 2 + 20 CONTINUE + IF (N.EQ.2) RETURN + IF (NZ.EQ.0) RETURN + FN = FNU + 1.0E0 + CK = CMPLX(FN,0.0E0)*RZ + S1 = CY(1) + S2 = CY(2) + HELIM = 0.5E0*ELIM + ELM = EXP(-ELIM) + CELM = CMPLX(ELM,0.0E0) + ZRI =AIMAG(ZR) + ZD = ZR +C +C FIND TWO CONSECUTIVE Y VALUES ON SCALE. SCALE RECURRENCE IF +C S2 GETS LARGER THAN EXP(ELIM/2) +C + DO 30 I=3,N + KK = I + CS = S2 + S2 = CK*S2 + S1 + S1 = CS + CK = CK + RZ + AS = CABS(S2) + ALAS = ALOG(AS) + ACS = -XX + ALAS + NZ = NZ + 1 + Y(I) = CZERO + IF (ACS.LT.(-ELIM)) GO TO 25 + CS = -ZD + CLOG(S2) + CSR = REAL(CS) + CSI = AIMAG(CS) + AA = EXP(CSR)/TOL + CS = CMPLX(AA,0.0E0)*CMPLX(COS(CSI),SIN(CSI)) + CALL CUCHK(CS, NW, ASCLE, TOL) + IF (NW.NE.0) GO TO 25 + Y(I) = CS + NZ = NZ - 1 + IF (IC.EQ.(KK-1)) GO TO 40 + IC = KK + GO TO 30 + 25 CONTINUE + IF(ALAS.LT.HELIM) GO TO 30 + XX = XX-ELIM + S1 = S1*CELM + S2 = S2*CELM + ZD = CMPLX(XX,ZRI) + 30 CONTINUE + NZ = N + IF(IC.EQ.N) NZ=N-1 + GO TO 45 + 40 CONTINUE + NZ = KK - 2 + 45 CONTINUE + DO 50 K=1,NZ + Y(K) = CZERO + 50 CONTINUE + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/amos/cmlri.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/amos/cmlri.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,155 @@ + SUBROUTINE CMLRI(Z, FNU, KODE, N, Y, NZ, TOL) +C***BEGIN PROLOGUE CMLRI +C***REFER TO CBESI,CBESK +C +C CMLRI COMPUTES THE I BESSEL FUNCTION FOR RE(Z).GE.0.0 BY THE +C MILLER ALGORITHM NORMALIZED BY A NEUMANN SERIES. +C +C***ROUTINES CALLED GAMLN,R1MACH +C***END PROLOGUE CMLRI + COMPLEX CK, CNORM, CONE, CTWO, CZERO, PT, P1, P2, RZ, SUM, Y, Z + REAL ACK, AK, AP, AT, AZ, BK, FKAP, FKK, FLAM, FNF, FNU, RHO, + * RHO2, SCLE, TFNF, TOL, TST, X, GAMLN, R1MACH + INTEGER I, IAZ, IDUM, IFNU, INU, ITIME, K, KK, KM, KODE, M, N + DIMENSION Y(N) + DATA CZERO,CONE,CTWO /(0.0E0,0.0E0),(1.0E0,0.0E0),(2.0E0,0.0E0)/ + SCLE = 1.0E+3*R1MACH(1)/TOL + NZ=0 + AZ = CABS(Z) + X = REAL(Z) + IAZ = INT(AZ) + IFNU = INT(FNU) + INU = IFNU + N - 1 + AT = FLOAT(IAZ) + 1.0E0 + CK = CMPLX(AT,0.0E0)/Z + RZ = CTWO/Z + P1 = CZERO + P2 = CONE + ACK = (AT+1.0E0)/AZ + RHO = ACK + SQRT(ACK*ACK-1.0E0) + RHO2 = RHO*RHO + TST = (RHO2+RHO2)/((RHO2-1.0E0)*(RHO-1.0E0)) + TST = TST/TOL +C----------------------------------------------------------------------- +C COMPUTE RELATIVE TRUNCATION ERROR INDEX FOR SERIES +C----------------------------------------------------------------------- + AK = AT + DO 10 I=1,80 + PT = P2 + P2 = P1 - CK*P2 + P1 = PT + CK = CK + RZ + AP = CABS(P2) + IF (AP.GT.TST*AK*AK) GO TO 20 + AK = AK + 1.0E0 + 10 CONTINUE + GO TO 110 + 20 CONTINUE + I = I + 1 + K = 0 + IF (INU.LT.IAZ) GO TO 40 +C----------------------------------------------------------------------- +C COMPUTE RELATIVE TRUNCATION ERROR FOR RATIOS +C----------------------------------------------------------------------- + P1 = CZERO + P2 = CONE + AT = FLOAT(INU) + 1.0E0 + CK = CMPLX(AT,0.0E0)/Z + ACK = AT/AZ + TST = SQRT(ACK/TOL) + ITIME = 1 + DO 30 K=1,80 + PT = P2 + P2 = P1 - CK*P2 + P1 = PT + CK = CK + RZ + AP = CABS(P2) + IF (AP.LT.TST) GO TO 30 + IF (ITIME.EQ.2) GO TO 40 + ACK = CABS(CK) + FLAM = ACK + SQRT(ACK*ACK-1.0E0) + FKAP = AP/CABS(P1) + RHO = AMIN1(FLAM,FKAP) + TST = TST*SQRT(RHO/(RHO*RHO-1.0E0)) + ITIME = 2 + 30 CONTINUE + GO TO 110 + 40 CONTINUE +C----------------------------------------------------------------------- +C BACKWARD RECURRENCE AND SUM NORMALIZING RELATION +C----------------------------------------------------------------------- + K = K + 1 + KK = MAX0(I+IAZ,K+INU) + FKK = FLOAT(KK) + P1 = CZERO +C----------------------------------------------------------------------- +C SCALE P2 AND SUM BY SCLE +C----------------------------------------------------------------------- + P2 = CMPLX(SCLE,0.0E0) + FNF = FNU - FLOAT(IFNU) + TFNF = FNF + FNF + BK = GAMLN(FKK+TFNF+1.0E0,IDUM) - GAMLN(FKK+1.0E0,IDUM) + * -GAMLN(TFNF+1.0E0,IDUM) + BK = EXP(BK) + SUM = CZERO + KM = KK - INU + DO 50 I=1,KM + PT = P2 + P2 = P1 + CMPLX(FKK+FNF,0.0E0)*RZ*P2 + P1 = PT + AK = 1.0E0 - TFNF/(FKK+TFNF) + ACK = BK*AK + SUM = SUM + CMPLX(ACK+BK,0.0E0)*P1 + BK = ACK + FKK = FKK - 1.0E0 + 50 CONTINUE + Y(N) = P2 + IF (N.EQ.1) GO TO 70 + DO 60 I=2,N + PT = P2 + P2 = P1 + CMPLX(FKK+FNF,0.0E0)*RZ*P2 + P1 = PT + AK = 1.0E0 - TFNF/(FKK+TFNF) + ACK = BK*AK + SUM = SUM + CMPLX(ACK+BK,0.0E0)*P1 + BK = ACK + FKK = FKK - 1.0E0 + M = N - I + 1 + Y(M) = P2 + 60 CONTINUE + 70 CONTINUE + IF (IFNU.LE.0) GO TO 90 + DO 80 I=1,IFNU + PT = P2 + P2 = P1 + CMPLX(FKK+FNF,0.0E0)*RZ*P2 + P1 = PT + AK = 1.0E0 - TFNF/(FKK+TFNF) + ACK = BK*AK + SUM = SUM + CMPLX(ACK+BK,0.0E0)*P1 + BK = ACK + FKK = FKK - 1.0E0 + 80 CONTINUE + 90 CONTINUE + PT = Z + IF (KODE.EQ.2) PT = PT - CMPLX(X,0.0E0) + P1 = -CMPLX(FNF,0.0E0)*CLOG(RZ) + PT + AP = GAMLN(1.0E0+FNF,IDUM) + PT = P1 - CMPLX(AP,0.0E0) +C----------------------------------------------------------------------- +C THE DIVISION CEXP(PT)/(SUM+P2) IS ALTERED TO AVOID OVERFLOW +C IN THE DENOMINATOR BY SQUARING LARGE QUANTITIES +C----------------------------------------------------------------------- + P2 = P2 + SUM + AP = CABS(P2) + P1 = CMPLX(1.0E0/AP,0.0E0) + CK = CEXP(PT)*P1 + PT = CONJG(P2)*P1 + CNORM = CK*PT + DO 100 I=1,N + Y(I) = Y(I)*CNORM + 100 CONTINUE + RETURN + 110 CONTINUE + NZ=-2 + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/amos/crati.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/amos/crati.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,100 @@ + SUBROUTINE CRATI(Z, FNU, N, CY, TOL) +C***BEGIN PROLOGUE CRATI +C***REFER TO CBESI,CBESK,CBESH +C +C CRATI COMPUTES RATIOS OF I BESSEL FUNCTIONS BY BACKWARD +C RECURRENCE. THE STARTING INDEX IS DETERMINED BY FORWARD +C RECURRENCE AS DESCRIBED IN J. RES. OF NAT. BUR. OF STANDARDS-B, +C MATHEMATICAL SCIENCES, VOL 77B, P111-114, SEPTEMBER, 1973, +C BESSEL FUNCTIONS I AND J OF COMPLEX ARGUMENT AND INTEGER ORDER, +C BY D. J. SOOKNE. +C +C***ROUTINES CALLED (NONE) +C***END PROLOGUE CRATI + COMPLEX CDFNU, CONE, CY, CZERO, PT, P1, P2, RZ, T1, Z + REAL AK, AMAGZ, AP1, AP2, ARG, AZ, DFNU, FDNU, FLAM, FNU, FNUP, + * RAP1, RHO, TEST, TEST1, TOL + INTEGER I, ID, IDNU, INU, ITIME, K, KK, MAGZ, N + DIMENSION CY(N) + DATA CZERO, CONE / (0.0E0,0.0E0), (1.0E0,0.0E0) / + AZ = CABS(Z) + INU = INT(FNU) + IDNU = INU + N - 1 + FDNU = FLOAT(IDNU) + MAGZ = INT(AZ) + AMAGZ = FLOAT(MAGZ+1) + FNUP = AMAX1(AMAGZ,FDNU) + ID = IDNU - MAGZ - 1 + ITIME = 1 + K = 1 + RZ = (CONE+CONE)/Z + T1 = CMPLX(FNUP,0.0E0)*RZ + P2 = -T1 + P1 = CONE + T1 = T1 + RZ + IF (ID.GT.0) ID = 0 + AP2 = CABS(P2) + AP1 = CABS(P1) +C----------------------------------------------------------------------- +C THE OVERFLOW TEST ON K(FNU+I-1,Z) BEFORE THE CALL TO CBKNX +C GUARANTEES THAT P2 IS ON SCALE. SCALE TEST1 AND ALL SUBSEQUENT +C P2 VALUES BY AP1 TO ENSURE THAT AN OVERFLOW DOES NOT OCCUR +C PREMATURELY. +C----------------------------------------------------------------------- + ARG = (AP2+AP2)/(AP1*TOL) + TEST1 = SQRT(ARG) + TEST = TEST1 + RAP1 = 1.0E0/AP1 + P1 = P1*CMPLX(RAP1,0.0E0) + P2 = P2*CMPLX(RAP1,0.0E0) + AP2 = AP2*RAP1 + 10 CONTINUE + K = K + 1 + AP1 = AP2 + PT = P2 + P2 = P1 - T1*P2 + P1 = PT + T1 = T1 + RZ + AP2 = CABS(P2) + IF (AP1.LE.TEST) GO TO 10 + IF (ITIME.EQ.2) GO TO 20 + AK = CABS(T1)*0.5E0 + FLAM = AK + SQRT(AK*AK-1.0E0) + RHO = AMIN1(AP2/AP1,FLAM) + TEST = TEST1*SQRT(RHO/(RHO*RHO-1.0E0)) + ITIME = 2 + GO TO 10 + 20 CONTINUE + KK = K + 1 - ID + AK = FLOAT(KK) + DFNU = FNU + FLOAT(N-1) + CDFNU = CMPLX(DFNU,0.0E0) + T1 = CMPLX(AK,0.0E0) + P1 = CMPLX(1.0E0/AP2,0.0E0) + P2 = CZERO + DO 30 I=1,KK + PT = P1 + P1 = RZ*(CDFNU+T1)*P1 + P2 + P2 = PT + T1 = T1 - CONE + 30 CONTINUE + IF (REAL(P1).NE.0.0E0 .OR. AIMAG(P1).NE.0.0E0) GO TO 40 + P1 = CMPLX(TOL,TOL) + 40 CONTINUE + CY(N) = P2/P1 + IF (N.EQ.1) RETURN + K = N - 1 + AK = FLOAT(K) + T1 = CMPLX(AK,0.0E0) + CDFNU = CMPLX(FNU,0.0E0)*RZ + DO 60 I=2,N + PT = CDFNU + T1*RZ + CY(K+1) + IF (REAL(PT).NE.0.0E0 .OR. AIMAG(PT).NE.0.0E0) GO TO 50 + PT = CMPLX(TOL,TOL) + 50 CONTINUE + CY(K) = CONE/PT + T1 = T1 - CONE + K = K - 1 + 60 CONTINUE + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/amos/cs1s2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/amos/cs1s2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,44 @@ + SUBROUTINE CS1S2(ZR, S1, S2, NZ, ASCLE, ALIM, IUF) +C***BEGIN PROLOGUE CS1S2 +C***REFER TO CBESK,CAIRY +C +C CS1S2 TESTS FOR A POSSIBLE UNDERFLOW RESULTING FROM THE +C ADDITION OF THE I AND K FUNCTIONS IN THE ANALYTIC CON- +C TINUATION FORMULA WHERE S1=K FUNCTION AND S2=I FUNCTION. +C ON KODE=1 THE I AND K FUNCTIONS ARE DIFFERENT ORDERS OF +C MAGNITUDE, BUT FOR KODE=2 THEY CAN BE OF THE SAME ORDER +C OF MAGNITUDE AND THE MAXIMUM MUST BE AT LEAST ONE +C PRECISION ABOVE THE UNDERFLOW LIMIT. +C +C***ROUTINES CALLED (NONE) +C***END PROLOGUE CS1S2 + COMPLEX CZERO, C1, S1, S1D, S2, ZR + REAL AA, ALIM, ALN, ASCLE, AS1, AS2, XX + INTEGER IUF, NZ + DATA CZERO / (0.0E0,0.0E0) / + NZ = 0 + AS1 = CABS(S1) + AS2 = CABS(S2) + AA = REAL(S1) + ALN = AIMAG(S1) + IF (AA.EQ.0.0E0 .AND. ALN.EQ.0.0E0) GO TO 10 + IF (AS1.EQ.0.0E0) GO TO 10 + XX = REAL(ZR) + ALN = -XX - XX + ALOG(AS1) + S1D = S1 + S1 = CZERO + AS1 = 0.0E0 + IF (ALN.LT.(-ALIM)) GO TO 10 + C1 = CLOG(S1D) - ZR - ZR + S1 = CEXP(C1) + AS1 = CABS(S1) + IUF = IUF + 1 + 10 CONTINUE + AA = AMAX1(AS1,AS2) + IF (AA.GT.ASCLE) RETURN + S1 = CZERO + S2 = CZERO + NZ = 1 + IUF = 0 + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/amos/cseri.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/amos/cseri.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,154 @@ + SUBROUTINE CSERI(Z, FNU, KODE, N, Y, NZ, TOL, ELIM, ALIM) +C***BEGIN PROLOGUE CSERI +C***REFER TO CBESI,CBESK +C +C CSERI COMPUTES THE I BESSEL FUNCTION FOR REAL(Z).GE.0.0 BY +C MEANS OF THE POWER SERIES FOR LARGE CABS(Z) IN THE +C REGION CABS(Z).LE.2*SQRT(FNU+1). NZ=0 IS A NORMAL RETURN. +C NZ.GT.0 MEANS THAT THE LAST NZ COMPONENTS WERE SET TO ZERO +C DUE TO UNDERFLOW. NZ.LT.0 MEANS UNDERFLOW OCCURRED, BUT THE +C CONDITION CABS(Z).LE.2*SQRT(FNU+1) WAS VIOLATED AND THE +C COMPUTATION MUST BE COMPLETED IN ANOTHER ROUTINE WITH N=N-ABS(NZ). +C +C***ROUTINES CALLED CUCHK,GAMLN,R1MACH +C***END PROLOGUE CSERI + COMPLEX AK1, CK, COEF, CONE, CRSC, CZ, CZERO, HZ, RZ, S1, S2, W, + * Y, Z + REAL AA, ACZ, AK, ALIM, ARM, ASCLE, ATOL, AZ, DFNU, ELIM, FNU, + * FNUP, RAK1, RS, RTR1, S, SS, TOL, X, GAMLN, R1MACH + INTEGER I, IB, IDUM, IFLAG, IL, K, KODE, L, M, N, NN, NW, NZ + DIMENSION Y(N), W(2) + DATA CZERO, CONE / (0.0E0,0.0E0), (1.0E0,0.0E0) / +C + NZ = 0 + AZ = CABS(Z) + IF (AZ.EQ.0.0E0) GO TO 150 + X = REAL(Z) + ARM = 1.0E+3*R1MACH(1) + RTR1 = SQRT(ARM) + CRSC = CMPLX(1.0E0,0.0E0) + IFLAG = 0 + IF (AZ.LT.ARM) GO TO 140 + HZ = Z*CMPLX(0.5E0,0.0E0) + CZ = CZERO + IF (AZ.GT.RTR1) CZ = HZ*HZ + ACZ = CABS(CZ) + NN = N + CK = CLOG(HZ) + 10 CONTINUE + DFNU = FNU + FLOAT(NN-1) + FNUP = DFNU + 1.0E0 +C----------------------------------------------------------------------- +C UNDERFLOW TEST +C----------------------------------------------------------------------- + AK1 = CK*CMPLX(DFNU,0.0E0) + AK = GAMLN(FNUP,IDUM) + AK1 = AK1 - CMPLX(AK,0.0E0) + IF (KODE.EQ.2) AK1 = AK1 - CMPLX(X,0.0E0) + RAK1 = REAL(AK1) + IF (RAK1.GT.(-ELIM)) GO TO 30 + 20 CONTINUE + NZ = NZ + 1 + Y(NN) = CZERO + IF (ACZ.GT.DFNU) GO TO 170 + NN = NN - 1 + IF (NN.EQ.0) RETURN + GO TO 10 + 30 CONTINUE + IF (RAK1.GT.(-ALIM)) GO TO 40 + IFLAG = 1 + SS = 1.0E0/TOL + CRSC = CMPLX(TOL,0.0E0) + ASCLE = ARM*SS + 40 CONTINUE + AK = AIMAG(AK1) + AA = EXP(RAK1) + IF (IFLAG.EQ.1) AA = AA*SS + COEF = CMPLX(AA,0.0E0)*CMPLX(COS(AK),SIN(AK)) + ATOL = TOL*ACZ/FNUP + IL = MIN0(2,NN) + DO 80 I=1,IL + DFNU = FNU + FLOAT(NN-I) + FNUP = DFNU + 1.0E0 + S1 = CONE + IF (ACZ.LT.TOL*FNUP) GO TO 60 + AK1 = CONE + AK = FNUP + 2.0E0 + S = FNUP + AA = 2.0E0 + 50 CONTINUE + RS = 1.0E0/S + AK1 = AK1*CZ*CMPLX(RS,0.0E0) + S1 = S1 + AK1 + S = S + AK + AK = AK + 2.0E0 + AA = AA*ACZ*RS + IF (AA.GT.ATOL) GO TO 50 + 60 CONTINUE + M = NN - I + 1 + S2 = S1*COEF + W(I) = S2 + IF (IFLAG.EQ.0) GO TO 70 + CALL CUCHK(S2, NW, ASCLE, TOL) + IF (NW.NE.0) GO TO 20 + 70 CONTINUE + Y(M) = S2*CRSC + IF (I.NE.IL) COEF = COEF*CMPLX(DFNU,0.0E0)/HZ + 80 CONTINUE + IF (NN.LE.2) RETURN + K = NN - 2 + AK = FLOAT(K) + RZ = (CONE+CONE)/Z + IF (IFLAG.EQ.1) GO TO 110 + IB = 3 + 90 CONTINUE + DO 100 I=IB,NN + Y(K) = CMPLX(AK+FNU,0.0E0)*RZ*Y(K+1) + Y(K+2) + AK = AK - 1.0E0 + K = K - 1 + 100 CONTINUE + RETURN +C----------------------------------------------------------------------- +C RECUR BACKWARD WITH SCALED VALUES +C----------------------------------------------------------------------- + 110 CONTINUE +C----------------------------------------------------------------------- +C EXP(-ALIM)=EXP(-ELIM)/TOL=APPROX. ONE PRECISION ABOVE THE +C UNDERFLOW LIMIT = ASCLE = R1MACH(1)*CSCL*1.0E+3 +C----------------------------------------------------------------------- + S1 = W(1) + S2 = W(2) + DO 120 L=3,NN + CK = S2 + S2 = S1 + CMPLX(AK+FNU,0.0E0)*RZ*S2 + S1 = CK + CK = S2*CRSC + Y(K) = CK + AK = AK - 1.0E0 + K = K - 1 + IF (CABS(CK).GT.ASCLE) GO TO 130 + 120 CONTINUE + RETURN + 130 CONTINUE + IB = L + 1 + IF (IB.GT.NN) RETURN + GO TO 90 + 140 CONTINUE + NZ = N + IF (FNU.EQ.0.0E0) NZ = NZ - 1 + 150 CONTINUE + Y(1) = CZERO + IF (FNU.EQ.0.0E0) Y(1) = CONE + IF (N.EQ.1) RETURN + DO 160 I=2,N + Y(I) = CZERO + 160 CONTINUE + RETURN +C----------------------------------------------------------------------- +C RETURN WITH NZ.LT.0 IF CABS(Z*Z/4).GT.FNU+N-NZ-1 COMPLETE +C THE CALCULATION IN CBINU WITH N=N-IABS(NZ) +C----------------------------------------------------------------------- + 170 CONTINUE + NZ = -NZ + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/amos/cshch.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/amos/cshch.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,25 @@ + SUBROUTINE CSHCH(Z, CSH, CCH) +C***BEGIN PROLOGUE CSHCH +C***REFER TO CBESK,CBESH +C +C CSHCH COMPUTES THE COMPLEX HYPERBOLIC FUNCTIONS CSH=SINH(X+I*Y) +C AND CCH=COSH(X+I*Y), WHERE I**2=-1. +C +C***ROUTINES CALLED (NONE) +C***END PROLOGUE CSHCH + COMPLEX CCH, CSH, Z + REAL CCHI, CCHR, CH, CN, CSHI, CSHR, SH, SN, X, Y, COSH, SINH + X = REAL(Z) + Y = AIMAG(Z) + SH = SINH(X) + CH = COSH(X) + SN = SIN(Y) + CN = COS(Y) + CSHR = SH*CN + CSHI = CH*SN + CSH = CMPLX(CSHR,CSHI) + CCHR = CH*CN + CCHI = SH*SN + CCH = CMPLX(CCHR,CCHI) + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/amos/cuchk.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/amos/cuchk.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,30 @@ + SUBROUTINE CUCHK(Y, NZ, ASCLE, TOL) +C***BEGIN PROLOGUE CUCHK +C***REFER TO CSERI,CUOIK,CUNK1,CUNK2,CUNI1,CUNI2,CKSCL +C +C Y ENTERS AS A SCALED QUANTITY WHOSE MAGNITUDE IS GREATER THAN +C EXP(-ALIM)=ASCLE=1.0E+3*R1MACH(1)/TOL. THE TEST IS MADE TO SEE +C IF THE MAGNITUDE OF THE REAL OR IMAGINARY PART WOULD UNDER FLOW +C WHEN Y IS SCALED (BY TOL) TO ITS PROPER VALUE. Y IS ACCEPTED +C IF THE UNDERFLOW IS AT LEAST ONE PRECISION BELOW THE MAGNITUDE +C OF THE LARGEST COMPONENT; OTHERWISE THE PHASE ANGLE DOES NOT HAVE +C ABSOLUTE ACCURACY AND AN UNDERFLOW IS ASSUMED. +C +C***ROUTINES CALLED (NONE) +C***END PROLOGUE CUCHK +C + COMPLEX Y + REAL ASCLE, SS, ST, TOL, YR, YI + INTEGER NZ + NZ = 0 + YR = REAL(Y) + YI = AIMAG(Y) + YR = ABS(YR) + YI = ABS(YI) + ST = AMIN1(YR,YI) + IF (ST.GT.ASCLE) RETURN + SS = AMAX1(YR,YI) + ST=ST/TOL + IF (SS.LT.ST) NZ = 1 + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/amos/cunhj.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/amos/cunhj.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,648 @@ + SUBROUTINE CUNHJ(Z, FNU, IPMTR, TOL, PHI, ARG, ZETA1, ZETA2, + * ASUM, BSUM) +C***BEGIN PROLOGUE CUNHJ +C***REFER TO CBESI,CBESK +C +C REFERENCES +C HANDBOOK OF MATHEMATICAL FUNCTIONS BY M. ABRAMOWITZ AND I.A. +C STEGUN, AMS55, NATIONAL BUREAU OF STANDARDS, 1965, CHAPTER 9. +C +C ASYMPTOTICS AND SPECIAL FUNCTIONS BY F.W.J. OLVER, ACADEMIC +C PRESS, N.Y., 1974, PAGE 420 +C +C ABSTRACT +C CUNHJ COMPUTES PARAMETERS FOR BESSEL FUNCTIONS C(FNU,Z) = +C J(FNU,Z), Y(FNU,Z) OR H(I,FNU,Z) I=1,2 FOR LARGE ORDERS FNU +C BY MEANS OF THE UNIFORM ASYMPTOTIC EXPANSION +C +C C(FNU,Z)=C1*PHI*( ASUM*AIRY(ARG) + C2*BSUM*DAIRY(ARG) ) +C +C FOR PROPER CHOICES OF C1, C2, AIRY AND DAIRY WHERE AIRY IS +C AN AIRY FUNCTION AND DAIRY IS ITS DERIVATIVE. +C +C (2/3)*FNU*ZETA**1.5 = ZETA1-ZETA2, +C +C ZETA1=0.5*FNU*CLOG((1+W)/(1-W)), ZETA2=FNU*W FOR SCALING +C PURPOSES IN AIRY FUNCTIONS FROM CAIRY OR CBIRY. +C +C MCONJ=SIGN OF AIMAG(Z), BUT IS AMBIGUOUS WHEN Z IS REAL AND +C MUST BE SPECIFIED. IPMTR=0 RETURNS ALL PARAMETERS. IPMTR= +C 1 COMPUTES ALL EXCEPT ASUM AND BSUM. +C +C***ROUTINES CALLED (NONE) +C***END PROLOGUE CUNHJ + COMPLEX ARG, ASUM, BSUM, CFNU, CONE, CR, CZERO, DR, P, PHI, + * PRZTH, PTFN, RFN13, RTZTA, RZTH, SUMA, SUMB, TFN, T2, UP, W, W2, + * Z, ZA, ZB, ZC, ZETA, ZETA1, ZETA2, ZTH + REAL ALFA, ANG, AP, AR, ATOL, AW2, AZTH, BETA, BR, BTOL, C, EX1, + * EX2, FNU, FN13, FN23, GAMA, HPI, PI, PP, RFNU, RFNU2, THPI, TOL, + * WI, WR, ZCI, ZCR, ZETAI, ZETAR, ZTHI, ZTHR, ASUMR, ASUMI, BSUMR, + * BSUMI, TEST, TSTR, TSTI, AC + INTEGER IAS, IBS, IPMTR, IS, J, JR, JU, K, KMAX, KP1, KS, L, LR, + * LRP1, L1, L2, M + DIMENSION AR(14), BR(14), C(105), ALFA(180), BETA(210), GAMA(30), + * AP(30), P(30), UP(14), CR(14), DR(14) + DATA AR(1), AR(2), AR(3), AR(4), AR(5), AR(6), AR(7), AR(8), + 1 AR(9), AR(10), AR(11), AR(12), AR(13), AR(14)/ + 2 1.00000000000000000E+00, 1.04166666666666667E-01, + 3 8.35503472222222222E-02, 1.28226574556327160E-01, + 4 2.91849026464140464E-01, 8.81627267443757652E-01, + 5 3.32140828186276754E+00, 1.49957629868625547E+01, + 6 7.89230130115865181E+01, 4.74451538868264323E+02, + 7 3.20749009089066193E+03, 2.40865496408740049E+04, + 8 1.98923119169509794E+05, 1.79190200777534383E+06/ + DATA BR(1), BR(2), BR(3), BR(4), BR(5), BR(6), BR(7), BR(8), + 1 BR(9), BR(10), BR(11), BR(12), BR(13), BR(14)/ + 2 1.00000000000000000E+00, -1.45833333333333333E-01, + 3 -9.87413194444444444E-02, -1.43312053915895062E-01, + 4 -3.17227202678413548E-01, -9.42429147957120249E-01, + 5 -3.51120304082635426E+00, -1.57272636203680451E+01, + 6 -8.22814390971859444E+01, -4.92355370523670524E+02, + 7 -3.31621856854797251E+03, -2.48276742452085896E+04, + 8 -2.04526587315129788E+05, -1.83844491706820990E+06/ + DATA C(1), C(2), C(3), C(4), C(5), C(6), C(7), C(8), C(9), C(10), + 1 C(11), C(12), C(13), C(14), C(15), C(16), C(17), C(18), + 2 C(19), C(20), C(21), C(22), C(23), C(24)/ + 3 1.00000000000000000E+00, -2.08333333333333333E-01, + 4 1.25000000000000000E-01, 3.34201388888888889E-01, + 5 -4.01041666666666667E-01, 7.03125000000000000E-02, + 6 -1.02581259645061728E+00, 1.84646267361111111E+00, + 7 -8.91210937500000000E-01, 7.32421875000000000E-02, + 8 4.66958442342624743E+00, -1.12070026162229938E+01, + 9 8.78912353515625000E+00, -2.36408691406250000E+00, + A 1.12152099609375000E-01, -2.82120725582002449E+01, + B 8.46362176746007346E+01, -9.18182415432400174E+01, + C 4.25349987453884549E+01, -7.36879435947963170E+00, + D 2.27108001708984375E-01, 2.12570130039217123E+02, + E -7.65252468141181642E+02, 1.05999045252799988E+03/ + DATA C(25), C(26), C(27), C(28), C(29), C(30), C(31), C(32), + 1 C(33), C(34), C(35), C(36), C(37), C(38), C(39), C(40), + 2 C(41), C(42), C(43), C(44), C(45), C(46), C(47), C(48)/ + 3 -6.99579627376132541E+02, 2.18190511744211590E+02, + 4 -2.64914304869515555E+01, 5.72501420974731445E-01, + 5 -1.91945766231840700E+03, 8.06172218173730938E+03, + 6 -1.35865500064341374E+04, 1.16553933368645332E+04, + 7 -5.30564697861340311E+03, 1.20090291321635246E+03, + 8 -1.08090919788394656E+02, 1.72772750258445740E+00, + 9 2.02042913309661486E+04, -9.69805983886375135E+04, + A 1.92547001232531532E+05, -2.03400177280415534E+05, + B 1.22200464983017460E+05, -4.11926549688975513E+04, + C 7.10951430248936372E+03, -4.93915304773088012E+02, + D 6.07404200127348304E+00, -2.42919187900551333E+05, + E 1.31176361466297720E+06, -2.99801591853810675E+06/ + DATA C(49), C(50), C(51), C(52), C(53), C(54), C(55), C(56), + 1 C(57), C(58), C(59), C(60), C(61), C(62), C(63), C(64), + 2 C(65), C(66), C(67), C(68), C(69), C(70), C(71), C(72)/ + 3 3.76327129765640400E+06, -2.81356322658653411E+06, + 4 1.26836527332162478E+06, -3.31645172484563578E+05, + 5 4.52187689813627263E+04, -2.49983048181120962E+03, + 6 2.43805296995560639E+01, 3.28446985307203782E+06, + 7 -1.97068191184322269E+07, 5.09526024926646422E+07, + 8 -7.41051482115326577E+07, 6.63445122747290267E+07, + 9 -3.75671766607633513E+07, 1.32887671664218183E+07, + A -2.78561812808645469E+06, 3.08186404612662398E+05, + B -1.38860897537170405E+04, 1.10017140269246738E+02, + C -4.93292536645099620E+07, 3.25573074185765749E+08, + D -9.39462359681578403E+08, 1.55359689957058006E+09, + E -1.62108055210833708E+09, 1.10684281682301447E+09/ + DATA C(73), C(74), C(75), C(76), C(77), C(78), C(79), C(80), + 1 C(81), C(82), C(83), C(84), C(85), C(86), C(87), C(88), + 2 C(89), C(90), C(91), C(92), C(93), C(94), C(95), C(96)/ + 3 -4.95889784275030309E+08, 1.42062907797533095E+08, + 4 -2.44740627257387285E+07, 2.24376817792244943E+06, + 5 -8.40054336030240853E+04, 5.51335896122020586E+02, + 6 8.14789096118312115E+08, -5.86648149205184723E+09, + 7 1.86882075092958249E+10, -3.46320433881587779E+10, + 8 4.12801855797539740E+10, -3.30265997498007231E+10, + 9 1.79542137311556001E+10, -6.56329379261928433E+09, + A 1.55927986487925751E+09, -2.25105661889415278E+08, + B 1.73951075539781645E+07, -5.49842327572288687E+05, + C 3.03809051092238427E+03, -1.46792612476956167E+10, + D 1.14498237732025810E+11, -3.99096175224466498E+11, + E 8.19218669548577329E+11, -1.09837515608122331E+12/ + DATA C(97), C(98), C(99), C(100), C(101), C(102), C(103), C(104), + 1 C(105)/ + 2 1.00815810686538209E+12, -6.45364869245376503E+11, + 3 2.87900649906150589E+11, -8.78670721780232657E+10, + 4 1.76347306068349694E+10, -2.16716498322379509E+09, + 5 1.43157876718888981E+08, -3.87183344257261262E+06, + 6 1.82577554742931747E+04/ + DATA ALFA(1), ALFA(2), ALFA(3), ALFA(4), ALFA(5), ALFA(6), + 1 ALFA(7), ALFA(8), ALFA(9), ALFA(10), ALFA(11), ALFA(12), + 2 ALFA(13), ALFA(14), ALFA(15), ALFA(16), ALFA(17), ALFA(18), + 3 ALFA(19), ALFA(20), ALFA(21), ALFA(22)/ + 4 -4.44444444444444444E-03, -9.22077922077922078E-04, + 5 -8.84892884892884893E-05, 1.65927687832449737E-04, + 6 2.46691372741792910E-04, 2.65995589346254780E-04, + 7 2.61824297061500945E-04, 2.48730437344655609E-04, + 8 2.32721040083232098E-04, 2.16362485712365082E-04, + 9 2.00738858762752355E-04, 1.86267636637545172E-04, + A 1.73060775917876493E-04, 1.61091705929015752E-04, + B 1.50274774160908134E-04, 1.40503497391269794E-04, + C 1.31668816545922806E-04, 1.23667445598253261E-04, + D 1.16405271474737902E-04, 1.09798298372713369E-04, + E 1.03772410422992823E-04, 9.82626078369363448E-05/ + DATA ALFA(23), ALFA(24), ALFA(25), ALFA(26), ALFA(27), ALFA(28), + 1 ALFA(29), ALFA(30), ALFA(31), ALFA(32), ALFA(33), ALFA(34), + 2 ALFA(35), ALFA(36), ALFA(37), ALFA(38), ALFA(39), ALFA(40), + 3 ALFA(41), ALFA(42), ALFA(43), ALFA(44)/ + 4 9.32120517249503256E-05, 8.85710852478711718E-05, + 5 8.42963105715700223E-05, 8.03497548407791151E-05, + 6 7.66981345359207388E-05, 7.33122157481777809E-05, + 7 7.01662625163141333E-05, 6.72375633790160292E-05, + 8 6.93735541354588974E-04, 2.32241745182921654E-04, + 9 -1.41986273556691197E-05, -1.16444931672048640E-04, + A -1.50803558053048762E-04, -1.55121924918096223E-04, + B -1.46809756646465549E-04, -1.33815503867491367E-04, + C -1.19744975684254051E-04, -1.06184319207974020E-04, + D -9.37699549891194492E-05, -8.26923045588193274E-05, + E -7.29374348155221211E-05, -6.44042357721016283E-05/ + DATA ALFA(45), ALFA(46), ALFA(47), ALFA(48), ALFA(49), ALFA(50), + 1 ALFA(51), ALFA(52), ALFA(53), ALFA(54), ALFA(55), ALFA(56), + 2 ALFA(57), ALFA(58), ALFA(59), ALFA(60), ALFA(61), ALFA(62), + 3 ALFA(63), ALFA(64), ALFA(65), ALFA(66)/ + 4 -5.69611566009369048E-05, -5.04731044303561628E-05, + 5 -4.48134868008882786E-05, -3.98688727717598864E-05, + 6 -3.55400532972042498E-05, -3.17414256609022480E-05, + 7 -2.83996793904174811E-05, -2.54522720634870566E-05, + 8 -2.28459297164724555E-05, -2.05352753106480604E-05, + 9 -1.84816217627666085E-05, -1.66519330021393806E-05, + A -1.50179412980119482E-05, -1.35554031379040526E-05, + B -1.22434746473858131E-05, -1.10641884811308169E-05, + C -3.54211971457743841E-04, -1.56161263945159416E-04, + D 3.04465503594936410E-05, 1.30198655773242693E-04, + E 1.67471106699712269E-04, 1.70222587683592569E-04/ + DATA ALFA(67), ALFA(68), ALFA(69), ALFA(70), ALFA(71), ALFA(72), + 1 ALFA(73), ALFA(74), ALFA(75), ALFA(76), ALFA(77), ALFA(78), + 2 ALFA(79), ALFA(80), ALFA(81), ALFA(82), ALFA(83), ALFA(84), + 3 ALFA(85), ALFA(86), ALFA(87), ALFA(88)/ + 4 1.56501427608594704E-04, 1.36339170977445120E-04, + 5 1.14886692029825128E-04, 9.45869093034688111E-05, + 6 7.64498419250898258E-05, 6.07570334965197354E-05, + 7 4.74394299290508799E-05, 3.62757512005344297E-05, + 8 2.69939714979224901E-05, 1.93210938247939253E-05, + 9 1.30056674793963203E-05, 7.82620866744496661E-06, + A 3.59257485819351583E-06, 1.44040049814251817E-07, + B -2.65396769697939116E-06, -4.91346867098485910E-06, + C -6.72739296091248287E-06, -8.17269379678657923E-06, + D -9.31304715093561232E-06, -1.02011418798016441E-05, + E -1.08805962510592880E-05, -1.13875481509603555E-05/ + DATA ALFA(89), ALFA(90), ALFA(91), ALFA(92), ALFA(93), ALFA(94), + 1 ALFA(95), ALFA(96), ALFA(97), ALFA(98), ALFA(99), ALFA(100), + 2 ALFA(101), ALFA(102), ALFA(103), ALFA(104), ALFA(105), + 3 ALFA(106), ALFA(107), ALFA(108), ALFA(109), ALFA(110)/ + 4 -1.17519675674556414E-05, -1.19987364870944141E-05, + 5 3.78194199201772914E-04, 2.02471952761816167E-04, + 6 -6.37938506318862408E-05, -2.38598230603005903E-04, + 7 -3.10916256027361568E-04, -3.13680115247576316E-04, + 8 -2.78950273791323387E-04, -2.28564082619141374E-04, + 9 -1.75245280340846749E-04, -1.25544063060690348E-04, + A -8.22982872820208365E-05, -4.62860730588116458E-05, + B -1.72334302366962267E-05, 5.60690482304602267E-06, + C 2.31395443148286800E-05, 3.62642745856793957E-05, + D 4.58006124490188752E-05, 5.24595294959114050E-05, + E 5.68396208545815266E-05, 5.94349820393104052E-05/ + DATA ALFA(111), ALFA(112), ALFA(113), ALFA(114), ALFA(115), + 1 ALFA(116), ALFA(117), ALFA(118), ALFA(119), ALFA(120), + 2 ALFA(121), ALFA(122), ALFA(123), ALFA(124), ALFA(125), + 3 ALFA(126), ALFA(127), ALFA(128), ALFA(129), ALFA(130)/ + 4 6.06478527578421742E-05, 6.08023907788436497E-05, + 5 6.01577894539460388E-05, 5.89199657344698500E-05, + 6 5.72515823777593053E-05, 5.52804375585852577E-05, + 7 5.31063773802880170E-05, 5.08069302012325706E-05, + 8 4.84418647620094842E-05, 4.60568581607475370E-05, + 9 -6.91141397288294174E-04, -4.29976633058871912E-04, + A 1.83067735980039018E-04, 6.60088147542014144E-04, + B 8.75964969951185931E-04, 8.77335235958235514E-04, + C 7.49369585378990637E-04, 5.63832329756980918E-04, + D 3.68059319971443156E-04, 1.88464535514455599E-04/ + DATA ALFA(131), ALFA(132), ALFA(133), ALFA(134), ALFA(135), + 1 ALFA(136), ALFA(137), ALFA(138), ALFA(139), ALFA(140), + 2 ALFA(141), ALFA(142), ALFA(143), ALFA(144), ALFA(145), + 3 ALFA(146), ALFA(147), ALFA(148), ALFA(149), ALFA(150)/ + 4 3.70663057664904149E-05, -8.28520220232137023E-05, + 5 -1.72751952869172998E-04, -2.36314873605872983E-04, + 6 -2.77966150694906658E-04, -3.02079514155456919E-04, + 7 -3.12594712643820127E-04, -3.12872558758067163E-04, + 8 -3.05678038466324377E-04, -2.93226470614557331E-04, + 9 -2.77255655582934777E-04, -2.59103928467031709E-04, + A -2.39784014396480342E-04, -2.20048260045422848E-04, + B -2.00443911094971498E-04, -1.81358692210970687E-04, + C -1.63057674478657464E-04, -1.45712672175205844E-04, + D -1.29425421983924587E-04, -1.14245691942445952E-04/ + DATA ALFA(151), ALFA(152), ALFA(153), ALFA(154), ALFA(155), + 1 ALFA(156), ALFA(157), ALFA(158), ALFA(159), ALFA(160), + 2 ALFA(161), ALFA(162), ALFA(163), ALFA(164), ALFA(165), + 3 ALFA(166), ALFA(167), ALFA(168), ALFA(169), ALFA(170)/ + 4 1.92821964248775885E-03, 1.35592576302022234E-03, + 5 -7.17858090421302995E-04, -2.58084802575270346E-03, + 6 -3.49271130826168475E-03, -3.46986299340960628E-03, + 7 -2.82285233351310182E-03, -1.88103076404891354E-03, + 8 -8.89531718383947600E-04, 3.87912102631035228E-06, + 9 7.28688540119691412E-04, 1.26566373053457758E-03, + A 1.62518158372674427E-03, 1.83203153216373172E-03, + B 1.91588388990527909E-03, 1.90588846755546138E-03, + C 1.82798982421825727E-03, 1.70389506421121530E-03, + D 1.55097127171097686E-03, 1.38261421852276159E-03/ + DATA ALFA(171), ALFA(172), ALFA(173), ALFA(174), ALFA(175), + 1 ALFA(176), ALFA(177), ALFA(178), ALFA(179), ALFA(180)/ + 2 1.20881424230064774E-03, 1.03676532638344962E-03, + 3 8.71437918068619115E-04, 7.16080155297701002E-04, + 4 5.72637002558129372E-04, 4.42089819465802277E-04, + 5 3.24724948503090564E-04, 2.20342042730246599E-04, + 6 1.28412898401353882E-04, 4.82005924552095464E-05/ + DATA BETA(1), BETA(2), BETA(3), BETA(4), BETA(5), BETA(6), + 1 BETA(7), BETA(8), BETA(9), BETA(10), BETA(11), BETA(12), + 2 BETA(13), BETA(14), BETA(15), BETA(16), BETA(17), BETA(18), + 3 BETA(19), BETA(20), BETA(21), BETA(22)/ + 4 1.79988721413553309E-02, 5.59964911064388073E-03, + 5 2.88501402231132779E-03, 1.80096606761053941E-03, + 6 1.24753110589199202E-03, 9.22878876572938311E-04, + 7 7.14430421727287357E-04, 5.71787281789704872E-04, + 8 4.69431007606481533E-04, 3.93232835462916638E-04, + 9 3.34818889318297664E-04, 2.88952148495751517E-04, + A 2.52211615549573284E-04, 2.22280580798883327E-04, + B 1.97541838033062524E-04, 1.76836855019718004E-04, + C 1.59316899661821081E-04, 1.44347930197333986E-04, + D 1.31448068119965379E-04, 1.20245444949302884E-04, + E 1.10449144504599392E-04, 1.01828770740567258E-04/ + DATA BETA(23), BETA(24), BETA(25), BETA(26), BETA(27), BETA(28), + 1 BETA(29), BETA(30), BETA(31), BETA(32), BETA(33), BETA(34), + 2 BETA(35), BETA(36), BETA(37), BETA(38), BETA(39), BETA(40), + 3 BETA(41), BETA(42), BETA(43), BETA(44)/ + 4 9.41998224204237509E-05, 8.74130545753834437E-05, + 5 8.13466262162801467E-05, 7.59002269646219339E-05, + 6 7.09906300634153481E-05, 6.65482874842468183E-05, + 7 6.25146958969275078E-05, 5.88403394426251749E-05, + 8 -1.49282953213429172E-03, -8.78204709546389328E-04, + 9 -5.02916549572034614E-04, -2.94822138512746025E-04, + A -1.75463996970782828E-04, -1.04008550460816434E-04, + B -5.96141953046457895E-05, -3.12038929076098340E-05, + C -1.26089735980230047E-05, -2.42892608575730389E-07, + D 8.05996165414273571E-06, 1.36507009262147391E-05, + E 1.73964125472926261E-05, 1.98672978842133780E-05/ + DATA BETA(45), BETA(46), BETA(47), BETA(48), BETA(49), BETA(50), + 1 BETA(51), BETA(52), BETA(53), BETA(54), BETA(55), BETA(56), + 2 BETA(57), BETA(58), BETA(59), BETA(60), BETA(61), BETA(62), + 3 BETA(63), BETA(64), BETA(65), BETA(66)/ + 4 2.14463263790822639E-05, 2.23954659232456514E-05, + 5 2.28967783814712629E-05, 2.30785389811177817E-05, + 6 2.30321976080909144E-05, 2.28236073720348722E-05, + 7 2.25005881105292418E-05, 2.20981015361991429E-05, + 8 2.16418427448103905E-05, 2.11507649256220843E-05, + 9 2.06388749782170737E-05, 2.01165241997081666E-05, + A 1.95913450141179244E-05, 1.90689367910436740E-05, + B 1.85533719641636667E-05, 1.80475722259674218E-05, + C 5.52213076721292790E-04, 4.47932581552384646E-04, + D 2.79520653992020589E-04, 1.52468156198446602E-04, + E 6.93271105657043598E-05, 1.76258683069991397E-05/ + DATA BETA(67), BETA(68), BETA(69), BETA(70), BETA(71), BETA(72), + 1 BETA(73), BETA(74), BETA(75), BETA(76), BETA(77), BETA(78), + 2 BETA(79), BETA(80), BETA(81), BETA(82), BETA(83), BETA(84), + 3 BETA(85), BETA(86), BETA(87), BETA(88)/ + 4 -1.35744996343269136E-05, -3.17972413350427135E-05, + 5 -4.18861861696693365E-05, -4.69004889379141029E-05, + 6 -4.87665447413787352E-05, -4.87010031186735069E-05, + 7 -4.74755620890086638E-05, -4.55813058138628452E-05, + 8 -4.33309644511266036E-05, -4.09230193157750364E-05, + 9 -3.84822638603221274E-05, -3.60857167535410501E-05, + A -3.37793306123367417E-05, -3.15888560772109621E-05, + B -2.95269561750807315E-05, -2.75978914828335759E-05, + C -2.58006174666883713E-05, -2.41308356761280200E-05, + D -2.25823509518346033E-05, -2.11479656768912971E-05, + E -1.98200638885294927E-05, -1.85909870801065077E-05/ + DATA BETA(89), BETA(90), BETA(91), BETA(92), BETA(93), BETA(94), + 1 BETA(95), BETA(96), BETA(97), BETA(98), BETA(99), BETA(100), + 2 BETA(101), BETA(102), BETA(103), BETA(104), BETA(105), + 3 BETA(106), BETA(107), BETA(108), BETA(109), BETA(110)/ + 4 -1.74532699844210224E-05, -1.63997823854497997E-05, + 5 -4.74617796559959808E-04, -4.77864567147321487E-04, + 6 -3.20390228067037603E-04, -1.61105016119962282E-04, + 7 -4.25778101285435204E-05, 3.44571294294967503E-05, + 8 7.97092684075674924E-05, 1.03138236708272200E-04, + 9 1.12466775262204158E-04, 1.13103642108481389E-04, + A 1.08651634848774268E-04, 1.01437951597661973E-04, + B 9.29298396593363896E-05, 8.40293133016089978E-05, + C 7.52727991349134062E-05, 6.69632521975730872E-05, + D 5.92564547323194704E-05, 5.22169308826975567E-05, + E 4.58539485165360646E-05, 4.01445513891486808E-05/ + DATA BETA(111), BETA(112), BETA(113), BETA(114), BETA(115), + 1 BETA(116), BETA(117), BETA(118), BETA(119), BETA(120), + 2 BETA(121), BETA(122), BETA(123), BETA(124), BETA(125), + 3 BETA(126), BETA(127), BETA(128), BETA(129), BETA(130)/ + 4 3.50481730031328081E-05, 3.05157995034346659E-05, + 5 2.64956119950516039E-05, 2.29363633690998152E-05, + 6 1.97893056664021636E-05, 1.70091984636412623E-05, + 7 1.45547428261524004E-05, 1.23886640995878413E-05, + 8 1.04775876076583236E-05, 8.79179954978479373E-06, + 9 7.36465810572578444E-04, 8.72790805146193976E-04, + A 6.22614862573135066E-04, 2.85998154194304147E-04, + B 3.84737672879366102E-06, -1.87906003636971558E-04, + C -2.97603646594554535E-04, -3.45998126832656348E-04, + D -3.53382470916037712E-04, -3.35715635775048757E-04/ + DATA BETA(131), BETA(132), BETA(133), BETA(134), BETA(135), + 1 BETA(136), BETA(137), BETA(138), BETA(139), BETA(140), + 2 BETA(141), BETA(142), BETA(143), BETA(144), BETA(145), + 3 BETA(146), BETA(147), BETA(148), BETA(149), BETA(150)/ + 4 -3.04321124789039809E-04, -2.66722723047612821E-04, + 5 -2.27654214122819527E-04, -1.89922611854562356E-04, + 6 -1.55058918599093870E-04, -1.23778240761873630E-04, + 7 -9.62926147717644187E-05, -7.25178327714425337E-05, + 8 -5.22070028895633801E-05, -3.50347750511900522E-05, + 9 -2.06489761035551757E-05, -8.70106096849767054E-06, + A 1.13698686675100290E-06, 9.16426474122778849E-06, + B 1.56477785428872620E-05, 2.08223629482466847E-05, + C 2.48923381004595156E-05, 2.80340509574146325E-05, + D 3.03987774629861915E-05, 3.21156731406700616E-05/ + DATA BETA(151), BETA(152), BETA(153), BETA(154), BETA(155), + 1 BETA(156), BETA(157), BETA(158), BETA(159), BETA(160), + 2 BETA(161), BETA(162), BETA(163), BETA(164), BETA(165), + 3 BETA(166), BETA(167), BETA(168), BETA(169), BETA(170)/ + 4 -1.80182191963885708E-03, -2.43402962938042533E-03, + 5 -1.83422663549856802E-03, -7.62204596354009765E-04, + 6 2.39079475256927218E-04, 9.49266117176881141E-04, + 7 1.34467449701540359E-03, 1.48457495259449178E-03, + 8 1.44732339830617591E-03, 1.30268261285657186E-03, + 9 1.10351597375642682E-03, 8.86047440419791759E-04, + A 6.73073208165665473E-04, 4.77603872856582378E-04, + B 3.05991926358789362E-04, 1.60315694594721630E-04, + C 4.00749555270613286E-05, -5.66607461635251611E-05, + D -1.32506186772982638E-04, -1.90296187989614057E-04/ + DATA BETA(171), BETA(172), BETA(173), BETA(174), BETA(175), + 1 BETA(176), BETA(177), BETA(178), BETA(179), BETA(180), + 2 BETA(181), BETA(182), BETA(183), BETA(184), BETA(185), + 3 BETA(186), BETA(187), BETA(188), BETA(189), BETA(190)/ + 4 -2.32811450376937408E-04, -2.62628811464668841E-04, + 5 -2.82050469867598672E-04, -2.93081563192861167E-04, + 6 -2.97435962176316616E-04, -2.96557334239348078E-04, + 7 -2.91647363312090861E-04, -2.83696203837734166E-04, + 8 -2.73512317095673346E-04, -2.61750155806768580E-04, + 9 6.38585891212050914E-03, 9.62374215806377941E-03, + A 7.61878061207001043E-03, 2.83219055545628054E-03, + B -2.09841352012720090E-03, -5.73826764216626498E-03, + C -7.70804244495414620E-03, -8.21011692264844401E-03, + D -7.65824520346905413E-03, -6.47209729391045177E-03/ + DATA BETA(191), BETA(192), BETA(193), BETA(194), BETA(195), + 1 BETA(196), BETA(197), BETA(198), BETA(199), BETA(200), + 2 BETA(201), BETA(202), BETA(203), BETA(204), BETA(205), + 3 BETA(206), BETA(207), BETA(208), BETA(209), BETA(210)/ + 4 -4.99132412004966473E-03, -3.45612289713133280E-03, + 5 -2.01785580014170775E-03, -7.59430686781961401E-04, + 6 2.84173631523859138E-04, 1.10891667586337403E-03, + 7 1.72901493872728771E-03, 2.16812590802684701E-03, + 8 2.45357710494539735E-03, 2.61281821058334862E-03, + 9 2.67141039656276912E-03, 2.65203073395980430E-03, + A 2.57411652877287315E-03, 2.45389126236094427E-03, + B 2.30460058071795494E-03, 2.13684837686712662E-03, + C 1.95896528478870911E-03, 1.77737008679454412E-03, + D 1.59690280765839059E-03, 1.42111975664438546E-03/ + DATA GAMA(1), GAMA(2), GAMA(3), GAMA(4), GAMA(5), GAMA(6), + 1 GAMA(7), GAMA(8), GAMA(9), GAMA(10), GAMA(11), GAMA(12), + 2 GAMA(13), GAMA(14), GAMA(15), GAMA(16), GAMA(17), GAMA(18), + 3 GAMA(19), GAMA(20), GAMA(21), GAMA(22)/ + 4 6.29960524947436582E-01, 2.51984209978974633E-01, + 5 1.54790300415655846E-01, 1.10713062416159013E-01, + 6 8.57309395527394825E-02, 6.97161316958684292E-02, + 7 5.86085671893713576E-02, 5.04698873536310685E-02, + 8 4.42600580689154809E-02, 3.93720661543509966E-02, + 9 3.54283195924455368E-02, 3.21818857502098231E-02, + A 2.94646240791157679E-02, 2.71581677112934479E-02, + B 2.51768272973861779E-02, 2.34570755306078891E-02, + C 2.19508390134907203E-02, 2.06210828235646240E-02, + D 1.94388240897880846E-02, 1.83810633800683158E-02, + E 1.74293213231963172E-02, 1.65685837786612353E-02/ + DATA GAMA(23), GAMA(24), GAMA(25), GAMA(26), GAMA(27), GAMA(28), + 1 GAMA(29), GAMA(30)/ + 2 1.57865285987918445E-02, 1.50729501494095594E-02, + 3 1.44193250839954639E-02, 1.38184805735341786E-02, + 4 1.32643378994276568E-02, 1.27517121970498651E-02, + 5 1.22761545318762767E-02, 1.18338262398482403E-02/ + DATA EX1, EX2, HPI, PI, THPI / + 1 3.33333333333333333E-01, 6.66666666666666667E-01, + 2 1.57079632679489662E+00, 3.14159265358979324E+00, + 3 4.71238898038468986E+00/ + DATA CZERO, CONE / (0.0E0,0.0E0), (1.0E0,0.0E0) / +C + RFNU = 1.0E0/FNU +C ZB = Z*CMPLX(RFNU,0.0E0) +C----------------------------------------------------------------------- +C OVERFLOW TEST (Z/FNU TOO SMALL) +C----------------------------------------------------------------------- + TSTR = REAL(Z) + TSTI = AIMAG(Z) + TEST = R1MACH(1)*1.0E+3 + AC = FNU*TEST + IF (ABS(TSTR).GT.AC .OR. ABS(TSTI).GT.AC) GO TO 15 + AC = 2.0E0*ABS(ALOG(TEST))+FNU + ZETA1 = CMPLX(AC,0.0E0) + ZETA2 = CMPLX(FNU,0.0E0) + PHI=CONE + ARG=CONE + RETURN + 15 CONTINUE + ZB = Z*CMPLX(RFNU,0.0E0) + RFNU2 = RFNU*RFNU +C----------------------------------------------------------------------- +C COMPUTE IN THE FOURTH QUADRANT +C----------------------------------------------------------------------- + FN13 = FNU**EX1 + FN23 = FN13*FN13 + RFN13 = CMPLX(1.0E0/FN13,0.0E0) + W2 = CONE - ZB*ZB + AW2 = CABS(W2) + IF (AW2.GT.0.25E0) GO TO 130 +C----------------------------------------------------------------------- +C POWER SERIES FOR CABS(W2).LE.0.25E0 +C----------------------------------------------------------------------- + K = 1 + P(1) = CONE + SUMA = CMPLX(GAMA(1),0.0E0) + AP(1) = 1.0E0 + IF (AW2.LT.TOL) GO TO 20 + DO 10 K=2,30 + P(K) = P(K-1)*W2 + SUMA = SUMA + P(K)*CMPLX(GAMA(K),0.0E0) + AP(K) = AP(K-1)*AW2 + IF (AP(K).LT.TOL) GO TO 20 + 10 CONTINUE + K = 30 + 20 CONTINUE + KMAX = K + ZETA = W2*SUMA + ARG = ZETA*CMPLX(FN23,0.0E0) + ZA = CSQRT(SUMA) + ZETA2 = CSQRT(W2)*CMPLX(FNU,0.0E0) + ZETA1 = ZETA2*(CONE+ZETA*ZA*CMPLX(EX2,0.0E0)) + ZA = ZA + ZA + PHI = CSQRT(ZA)*RFN13 + IF (IPMTR.EQ.1) GO TO 120 +C----------------------------------------------------------------------- +C SUM SERIES FOR ASUM AND BSUM +C----------------------------------------------------------------------- + SUMB = CZERO + DO 30 K=1,KMAX + SUMB = SUMB + P(K)*CMPLX(BETA(K),0.0E0) + 30 CONTINUE + ASUM = CZERO + BSUM = SUMB + L1 = 0 + L2 = 30 + BTOL = TOL*CABS(BSUM) + ATOL = TOL + PP = 1.0E0 + IAS = 0 + IBS = 0 + IF (RFNU2.LT.TOL) GO TO 110 + DO 100 IS=2,7 + ATOL = ATOL/RFNU2 + PP = PP*RFNU2 + IF (IAS.EQ.1) GO TO 60 + SUMA = CZERO + DO 40 K=1,KMAX + M = L1 + K + SUMA = SUMA + P(K)*CMPLX(ALFA(M),0.0E0) + IF (AP(K).LT.ATOL) GO TO 50 + 40 CONTINUE + 50 CONTINUE + ASUM = ASUM + SUMA*CMPLX(PP,0.0E0) + IF (PP.LT.TOL) IAS = 1 + 60 CONTINUE + IF (IBS.EQ.1) GO TO 90 + SUMB = CZERO + DO 70 K=1,KMAX + M = L2 + K + SUMB = SUMB + P(K)*CMPLX(BETA(M),0.0E0) + IF (AP(K).LT.ATOL) GO TO 80 + 70 CONTINUE + 80 CONTINUE + BSUM = BSUM + SUMB*CMPLX(PP,0.0E0) + IF (PP.LT.BTOL) IBS = 1 + 90 CONTINUE + IF (IAS.EQ.1 .AND. IBS.EQ.1) GO TO 110 + L1 = L1 + 30 + L2 = L2 + 30 + 100 CONTINUE + 110 CONTINUE + ASUM = ASUM + CONE + PP = RFNU*REAL(RFN13) + BSUM = BSUM*CMPLX(PP,0.0E0) + 120 CONTINUE + RETURN +C----------------------------------------------------------------------- +C CABS(W2).GT.0.25E0 +C----------------------------------------------------------------------- + 130 CONTINUE + W = CSQRT(W2) + WR = REAL(W) + WI = AIMAG(W) + IF (WR.LT.0.0E0) WR = 0.0E0 + IF (WI.LT.0.0E0) WI = 0.0E0 + W = CMPLX(WR,WI) + ZA = (CONE+W)/ZB + ZC = CLOG(ZA) + ZCR = REAL(ZC) + ZCI = AIMAG(ZC) + IF (ZCI.LT.0.0E0) ZCI = 0.0E0 + IF (ZCI.GT.HPI) ZCI = HPI + IF (ZCR.LT.0.0E0) ZCR = 0.0E0 + ZC = CMPLX(ZCR,ZCI) + ZTH = (ZC-W)*CMPLX(1.5E0,0.0E0) + CFNU = CMPLX(FNU,0.0E0) + ZETA1 = ZC*CFNU + ZETA2 = W*CFNU + AZTH = CABS(ZTH) + ZTHR = REAL(ZTH) + ZTHI = AIMAG(ZTH) + ANG = THPI + IF (ZTHR.GE.0.0E0 .AND. ZTHI.LT.0.0E0) GO TO 140 + ANG = HPI + IF (ZTHR.EQ.0.0E0) GO TO 140 + ANG = ATAN(ZTHI/ZTHR) + IF (ZTHR.LT.0.0E0) ANG = ANG + PI + 140 CONTINUE + PP = AZTH**EX2 + ANG = ANG*EX2 + ZETAR = PP*COS(ANG) + ZETAI = PP*SIN(ANG) + IF (ZETAI.LT.0.0E0) ZETAI = 0.0E0 + ZETA = CMPLX(ZETAR,ZETAI) + ARG = ZETA*CMPLX(FN23,0.0E0) + RTZTA = ZTH/ZETA + ZA = RTZTA/W + PHI = CSQRT(ZA+ZA)*RFN13 + IF (IPMTR.EQ.1) GO TO 120 + TFN = CMPLX(RFNU,0.0E0)/W + RZTH = CMPLX(RFNU,0.0E0)/ZTH + ZC = RZTH*CMPLX(AR(2),0.0E0) + T2 = CONE/W2 + UP(2) = (T2*CMPLX(C(2),0.0E0)+CMPLX(C(3),0.0E0))*TFN + BSUM = UP(2) + ZC + ASUM = CZERO + IF (RFNU.LT.TOL) GO TO 220 + PRZTH = RZTH + PTFN = TFN + UP(1) = CONE + PP = 1.0E0 + BSUMR = REAL(BSUM) + BSUMI = AIMAG(BSUM) + BTOL = TOL*(ABS(BSUMR)+ABS(BSUMI)) + KS = 0 + KP1 = 2 + L = 3 + IAS = 0 + IBS = 0 + DO 210 LR=2,12,2 + LRP1 = LR + 1 +C----------------------------------------------------------------------- +C COMPUTE TWO ADDITIONAL CR, DR, AND UP FOR TWO MORE TERMS IN +C NEXT SUMA AND SUMB +C----------------------------------------------------------------------- + DO 160 K=LR,LRP1 + KS = KS + 1 + KP1 = KP1 + 1 + L = L + 1 + ZA = CMPLX(C(L),0.0E0) + DO 150 J=2,KP1 + L = L + 1 + ZA = ZA*T2 + CMPLX(C(L),0.0E0) + 150 CONTINUE + PTFN = PTFN*TFN + UP(KP1) = PTFN*ZA + CR(KS) = PRZTH*CMPLX(BR(KS+1),0.0E0) + PRZTH = PRZTH*RZTH + DR(KS) = PRZTH*CMPLX(AR(KS+2),0.0E0) + 160 CONTINUE + PP = PP*RFNU2 + IF (IAS.EQ.1) GO TO 180 + SUMA = UP(LRP1) + JU = LRP1 + DO 170 JR=1,LR + JU = JU - 1 + SUMA = SUMA + CR(JR)*UP(JU) + 170 CONTINUE + ASUM = ASUM + SUMA + ASUMR = REAL(ASUM) + ASUMI = AIMAG(ASUM) + TEST = ABS(ASUMR) + ABS(ASUMI) + IF (PP.LT.TOL .AND. TEST.LT.TOL) IAS = 1 + 180 CONTINUE + IF (IBS.EQ.1) GO TO 200 + SUMB = UP(LR+2) + UP(LRP1)*ZC + JU = LRP1 + DO 190 JR=1,LR + JU = JU - 1 + SUMB = SUMB + DR(JR)*UP(JU) + 190 CONTINUE + BSUM = BSUM + SUMB + BSUMR = REAL(BSUM) + BSUMI = AIMAG(BSUM) + TEST = ABS(BSUMR) + ABS(BSUMI) + IF (PP.LT.BTOL .AND. TEST.LT.TOL) IBS = 1 + 200 CONTINUE + IF (IAS.EQ.1 .AND. IBS.EQ.1) GO TO 220 + 210 CONTINUE + 220 CONTINUE + ASUM = ASUM + CONE + BSUM = -BSUM*RFN13/RTZTA + GO TO 120 + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/amos/cuni1.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/amos/cuni1.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,168 @@ + SUBROUTINE CUNI1(Z, FNU, KODE, N, Y, NZ, NLAST, FNUL, TOL, ELIM, + * ALIM) +C***BEGIN PROLOGUE CUNI1 +C***REFER TO CBESI,CBESK +C +C CUNI1 COMPUTES I(FNU,Z) BY MEANS OF THE UNIFORM ASYMPTOTIC +C EXPANSION FOR I(FNU,Z) IN -PI/3.LE.ARG Z.LE.PI/3. +C +C FNUL IS THE SMALLEST ORDER PERMITTED FOR THE ASYMPTOTIC +C EXPANSION. NLAST=0 MEANS ALL OF THE Y VALUES WERE SET. +C NLAST.NE.0 IS THE NUMBER LEFT TO BE COMPUTED BY ANOTHER +C FORMULA FOR ORDERS FNU TO FNU+NLAST-1 BECAUSE FNU+NLAST-1.LT.FNUL. +C Y(I)=CZERO FOR I=NLAST+1,N +C +C***ROUTINES CALLED CUCHK,CUNIK,CUOIK,R1MACH +C***END PROLOGUE CUNI1 + COMPLEX CFN, CONE, CRSC, CSCL, CSR, CSS, CWRK, CZERO, C1, C2, + * PHI, RZ, SUM, S1, S2, Y, Z, ZETA1, ZETA2, CY + REAL ALIM, APHI, ASCLE, BRY, C2I, C2M, C2R, ELIM, FN, FNU, FNUL, + * RS1, TOL, YY, R1MACH + INTEGER I, IFLAG, INIT, K, KODE, M, N, ND, NLAST, NN, NUF, NW, NZ + DIMENSION BRY(3), Y(N), CWRK(16), CSS(3), CSR(3), CY(2) + DATA CZERO, CONE / (0.0E0,0.0E0), (1.0E0,0.0E0) / +C + NZ = 0 + ND = N + NLAST = 0 +C----------------------------------------------------------------------- +C COMPUTED VALUES WITH EXPONENTS BETWEEN ALIM AND ELIM IN MAG- +C NITUDE ARE SCALED TO KEEP INTERMEDIATE ARITHMETIC ON SCALE, +C EXP(ALIM)=EXP(ELIM)*TOL +C----------------------------------------------------------------------- + CSCL = CMPLX(1.0E0/TOL,0.0E0) + CRSC = CMPLX(TOL,0.0E0) + CSS(1) = CSCL + CSS(2) = CONE + CSS(3) = CRSC + CSR(1) = CRSC + CSR(2) = CONE + CSR(3) = CSCL + BRY(1) = 1.0E+3*R1MACH(1)/TOL +C----------------------------------------------------------------------- +C CHECK FOR UNDERFLOW AND OVERFLOW ON FIRST MEMBER +C----------------------------------------------------------------------- + FN = AMAX1(FNU,1.0E0) + INIT = 0 + CALL CUNIK(Z, FN, 1, 1, TOL, INIT, PHI, ZETA1, ZETA2, SUM, CWRK) + IF (KODE.EQ.1) GO TO 10 + CFN = CMPLX(FN,0.0E0) + S1 = -ZETA1 + CFN*(CFN/(Z+ZETA2)) + GO TO 20 + 10 CONTINUE + S1 = -ZETA1 + ZETA2 + 20 CONTINUE + RS1 = REAL(S1) + IF (ABS(RS1).GT.ELIM) GO TO 130 + 30 CONTINUE + NN = MIN0(2,ND) + DO 80 I=1,NN + FN = FNU + FLOAT(ND-I) + INIT = 0 + CALL CUNIK(Z, FN, 1, 0, TOL, INIT, PHI, ZETA1, ZETA2, SUM, CWRK) + IF (KODE.EQ.1) GO TO 40 + CFN = CMPLX(FN,0.0E0) + YY = AIMAG(Z) + S1 = -ZETA1 + CFN*(CFN/(Z+ZETA2)) + CMPLX(0.0E0,YY) + GO TO 50 + 40 CONTINUE + S1 = -ZETA1 + ZETA2 + 50 CONTINUE +C----------------------------------------------------------------------- +C TEST FOR UNDERFLOW AND OVERFLOW +C----------------------------------------------------------------------- + RS1 = REAL(S1) + IF (ABS(RS1).GT.ELIM) GO TO 110 + IF (I.EQ.1) IFLAG = 2 + IF (ABS(RS1).LT.ALIM) GO TO 60 +C----------------------------------------------------------------------- +C REFINE TEST AND SCALE +C----------------------------------------------------------------------- + APHI = CABS(PHI) + RS1 = RS1 + ALOG(APHI) + IF (ABS(RS1).GT.ELIM) GO TO 110 + IF (I.EQ.1) IFLAG = 1 + IF (RS1.LT.0.0E0) GO TO 60 + IF (I.EQ.1) IFLAG = 3 + 60 CONTINUE +C----------------------------------------------------------------------- +C SCALE S1 IF CABS(S1).LT.ASCLE +C----------------------------------------------------------------------- + S2 = PHI*SUM + C2R = REAL(S1) + C2I = AIMAG(S1) + C2M = EXP(C2R)*REAL(CSS(IFLAG)) + S1 = CMPLX(C2M,0.0E0)*CMPLX(COS(C2I),SIN(C2I)) + S2 = S2*S1 + IF (IFLAG.NE.1) GO TO 70 + CALL CUCHK(S2, NW, BRY(1), TOL) + IF (NW.NE.0) GO TO 110 + 70 CONTINUE + M = ND - I + 1 + CY(I) = S2 + Y(M) = S2*CSR(IFLAG) + 80 CONTINUE + IF (ND.LE.2) GO TO 100 + RZ = CMPLX(2.0E0,0.0E0)/Z + BRY(2) = 1.0E0/BRY(1) + BRY(3) = R1MACH(2) + S1 = CY(1) + S2 = CY(2) + C1 = CSR(IFLAG) + ASCLE = BRY(IFLAG) + K = ND - 2 + FN = FLOAT(K) + DO 90 I=3,ND + C2 = S2 + S2 = S1 + CMPLX(FNU+FN,0.0E0)*RZ*S2 + S1 = C2 + C2 = S2*C1 + Y(K) = C2 + K = K - 1 + FN = FN - 1.0E0 + IF (IFLAG.GE.3) GO TO 90 + C2R = REAL(C2) + C2I = AIMAG(C2) + C2R = ABS(C2R) + C2I = ABS(C2I) + C2M = AMAX1(C2R,C2I) + IF (C2M.LE.ASCLE) GO TO 90 + IFLAG = IFLAG + 1 + ASCLE = BRY(IFLAG) + S1 = S1*C1 + S2 = C2 + S1 = S1*CSS(IFLAG) + S2 = S2*CSS(IFLAG) + C1 = CSR(IFLAG) + 90 CONTINUE + 100 CONTINUE + RETURN +C----------------------------------------------------------------------- +C SET UNDERFLOW AND UPDATE PARAMETERS +C----------------------------------------------------------------------- + 110 CONTINUE + IF (RS1.GT.0.0E0) GO TO 120 + Y(ND) = CZERO + NZ = NZ + 1 + ND = ND - 1 + IF (ND.EQ.0) GO TO 100 + CALL CUOIK(Z, FNU, KODE, 1, ND, Y, NUF, TOL, ELIM, ALIM) + IF (NUF.LT.0) GO TO 120 + ND = ND - NUF + NZ = NZ + NUF + IF (ND.EQ.0) GO TO 100 + FN = FNU + FLOAT(ND-1) + IF (FN.GE.FNUL) GO TO 30 + NLAST = ND + RETURN + 120 CONTINUE + NZ = -1 + RETURN + 130 CONTINUE + IF (RS1.GT.0.0E0) GO TO 120 + NZ = N + DO 140 I=1,N + Y(I) = CZERO + 140 CONTINUE + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/amos/cuni2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/amos/cuni2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,215 @@ + SUBROUTINE CUNI2(Z, FNU, KODE, N, Y, NZ, NLAST, FNUL, TOL, ELIM, + * ALIM) +C***BEGIN PROLOGUE CUNI2 +C***REFER TO CBESI,CBESK +C +C CUNI2 COMPUTES I(FNU,Z) IN THE RIGHT HALF PLANE BY MEANS OF +C UNIFORM ASYMPTOTIC EXPANSION FOR J(FNU,ZN) WHERE ZN IS Z*I +C OR -Z*I AND ZN IS IN THE RIGHT HALF PLANE ALSO. +C +C FNUL IS THE SMALLEST ORDER PERMITTED FOR THE ASYMPTOTIC +C EXPANSION. NLAST=0 MEANS ALL OF THE Y VALUES WERE SET. +C NLAST.NE.0 IS THE NUMBER LEFT TO BE COMPUTED BY ANOTHER +C FORMULA FOR ORDERS FNU TO FNU+NLAST-1 BECAUSE FNU+NLAST-1.LT.FNUL. +C Y(I)=CZERO FOR I=NLAST+1,N +C +C***ROUTINES CALLED CAIRY,CUCHK,CUNHJ,CUOIK,R1MACH +C***END PROLOGUE CUNI2 + COMPLEX AI, ARG, ASUM, BSUM, CFN, CI, CID, CIP, CONE, CRSC, CSCL, + * CSR, CSS, CY, CZERO, C1, C2, DAI, PHI, RZ, S1, S2, Y, Z, ZB, + * ZETA1, ZETA2, ZN, ZAR + REAL AARG, AIC, ALIM, ANG, APHI, ASCLE, AY, BRY, CAR, C2I, C2M, + * C2R, ELIM, FN, FNU, FNUL, HPI, RS1, SAR, TOL, YY, R1MACH + INTEGER I, IFLAG, IN, INU, J, K, KODE, N, NAI, ND, NDAI, NLAST, + * NN, NUF, NW, NZ, IDUM + DIMENSION BRY(3), Y(N), CIP(4), CSS(3), CSR(3), CY(2) + DATA CZERO,CONE,CI/(0.0E0,0.0E0),(1.0E0,0.0E0),(0.0E0,1.0E0)/ + DATA CIP(1),CIP(2),CIP(3),CIP(4)/ + 1 (1.0E0,0.0E0), (0.0E0,1.0E0), (-1.0E0,0.0E0), (0.0E0,-1.0E0)/ + DATA HPI, AIC / + 1 1.57079632679489662E+00, 1.265512123484645396E+00/ +C + NZ = 0 + ND = N + NLAST = 0 +C----------------------------------------------------------------------- +C COMPUTED VALUES WITH EXPONENTS BETWEEN ALIM AND ELIM IN MAG- +C NITUDE ARE SCALED TO KEEP INTERMEDIATE ARITHMETIC ON SCALE, +C EXP(ALIM)=EXP(ELIM)*TOL +C----------------------------------------------------------------------- + CSCL = CMPLX(1.0E0/TOL,0.0E0) + CRSC = CMPLX(TOL,0.0E0) + CSS(1) = CSCL + CSS(2) = CONE + CSS(3) = CRSC + CSR(1) = CRSC + CSR(2) = CONE + CSR(3) = CSCL + BRY(1) = 1.0E+3*R1MACH(1)/TOL + YY = AIMAG(Z) +C----------------------------------------------------------------------- +C ZN IS IN THE RIGHT HALF PLANE AFTER ROTATION BY CI OR -CI +C----------------------------------------------------------------------- + ZN = -Z*CI + ZB = Z + CID = -CI + INU = INT(FNU) + ANG = HPI*(FNU-FLOAT(INU)) + CAR = COS(ANG) + SAR = SIN(ANG) + C2 = CMPLX(CAR,SAR) + ZAR = C2 + IN = INU + N - 1 + IN = MOD(IN,4) + C2 = C2*CIP(IN+1) + IF (YY.GT.0.0E0) GO TO 10 + ZN = CONJG(-ZN) + ZB = CONJG(ZB) + CID = -CID + C2 = CONJG(C2) + 10 CONTINUE +C----------------------------------------------------------------------- +C CHECK FOR UNDERFLOW AND OVERFLOW ON FIRST MEMBER +C----------------------------------------------------------------------- + FN = AMAX1(FNU,1.0E0) + CALL CUNHJ(ZN, FN, 1, TOL, PHI, ARG, ZETA1, ZETA2, ASUM, BSUM) + IF (KODE.EQ.1) GO TO 20 + CFN = CMPLX(FNU,0.0E0) + S1 = -ZETA1 + CFN*(CFN/(ZB+ZETA2)) + GO TO 30 + 20 CONTINUE + S1 = -ZETA1 + ZETA2 + 30 CONTINUE + RS1 = REAL(S1) + IF (ABS(RS1).GT.ELIM) GO TO 150 + 40 CONTINUE + NN = MIN0(2,ND) + DO 90 I=1,NN + FN = FNU + FLOAT(ND-I) + CALL CUNHJ(ZN, FN, 0, TOL, PHI, ARG, ZETA1, ZETA2, ASUM, BSUM) + IF (KODE.EQ.1) GO TO 50 + CFN = CMPLX(FN,0.0E0) + AY = ABS(YY) + S1 = -ZETA1 + CFN*(CFN/(ZB+ZETA2)) + CMPLX(0.0E0,AY) + GO TO 60 + 50 CONTINUE + S1 = -ZETA1 + ZETA2 + 60 CONTINUE +C----------------------------------------------------------------------- +C TEST FOR UNDERFLOW AND OVERFLOW +C----------------------------------------------------------------------- + RS1 = REAL(S1) + IF (ABS(RS1).GT.ELIM) GO TO 120 + IF (I.EQ.1) IFLAG = 2 + IF (ABS(RS1).LT.ALIM) GO TO 70 +C----------------------------------------------------------------------- +C REFINE TEST AND SCALE +C----------------------------------------------------------------------- +C----------------------------------------------------------------------- + APHI = CABS(PHI) + AARG = CABS(ARG) + RS1 = RS1 + ALOG(APHI) - 0.25E0*ALOG(AARG) - AIC + IF (ABS(RS1).GT.ELIM) GO TO 120 + IF (I.EQ.1) IFLAG = 1 + IF (RS1.LT.0.0E0) GO TO 70 + IF (I.EQ.1) IFLAG = 3 + 70 CONTINUE +C----------------------------------------------------------------------- +C SCALE S1 TO KEEP INTERMEDIATE ARITHMETIC ON SCALE NEAR +C EXPONENT EXTREMES +C----------------------------------------------------------------------- + CALL CAIRY(ARG, 0, 2, AI, NAI, IDUM) + CALL CAIRY(ARG, 1, 2, DAI, NDAI, IDUM) + S2 = PHI*(AI*ASUM+DAI*BSUM) + C2R = REAL(S1) + C2I = AIMAG(S1) + C2M = EXP(C2R)*REAL(CSS(IFLAG)) + S1 = CMPLX(C2M,0.0E0)*CMPLX(COS(C2I),SIN(C2I)) + S2 = S2*S1 + IF (IFLAG.NE.1) GO TO 80 + CALL CUCHK(S2, NW, BRY(1), TOL) + IF (NW.NE.0) GO TO 120 + 80 CONTINUE + IF (YY.LE.0.0E0) S2 = CONJG(S2) + J = ND - I + 1 + S2 = S2*C2 + CY(I) = S2 + Y(J) = S2*CSR(IFLAG) + C2 = C2*CID + 90 CONTINUE + IF (ND.LE.2) GO TO 110 + RZ = CMPLX(2.0E0,0.0E0)/Z + BRY(2) = 1.0E0/BRY(1) + BRY(3) = R1MACH(2) + S1 = CY(1) + S2 = CY(2) + C1 = CSR(IFLAG) + ASCLE = BRY(IFLAG) + K = ND - 2 + FN = FLOAT(K) + DO 100 I=3,ND + C2 = S2 + S2 = S1 + CMPLX(FNU+FN,0.0E0)*RZ*S2 + S1 = C2 + C2 = S2*C1 + Y(K) = C2 + K = K - 1 + FN = FN - 1.0E0 + IF (IFLAG.GE.3) GO TO 100 + C2R = REAL(C2) + C2I = AIMAG(C2) + C2R = ABS(C2R) + C2I = ABS(C2I) + C2M = AMAX1(C2R,C2I) + IF (C2M.LE.ASCLE) GO TO 100 + IFLAG = IFLAG + 1 + ASCLE = BRY(IFLAG) + S1 = S1*C1 + S2 = C2 + S1 = S1*CSS(IFLAG) + S2 = S2*CSS(IFLAG) + C1 = CSR(IFLAG) + 100 CONTINUE + 110 CONTINUE + RETURN + 120 CONTINUE + IF (RS1.GT.0.0E0) GO TO 140 +C----------------------------------------------------------------------- +C SET UNDERFLOW AND UPDATE PARAMETERS +C----------------------------------------------------------------------- + Y(ND) = CZERO + NZ = NZ + 1 + ND = ND - 1 + IF (ND.EQ.0) GO TO 110 + CALL CUOIK(Z, FNU, KODE, 1, ND, Y, NUF, TOL, ELIM, ALIM) + IF (NUF.LT.0) GO TO 140 + ND = ND - NUF + NZ = NZ + NUF + IF (ND.EQ.0) GO TO 110 + FN = FNU + FLOAT(ND-1) + IF (FN.LT.FNUL) GO TO 130 +C FN = AIMAG(CID) +C J = NUF + 1 +C K = MOD(J,4) + 1 +C S1 = CIP(K) +C IF (FN.LT.0.0E0) S1 = CONJG(S1) +C C2 = C2*S1 + IN = INU + ND - 1 + IN = MOD(IN,4) + 1 + C2 = ZAR*CIP(IN) + IF (YY.LE.0.0E0)C2=CONJG(C2) + GO TO 40 + 130 CONTINUE + NLAST = ND + RETURN + 140 CONTINUE + NZ = -1 + RETURN + 150 CONTINUE + IF (RS1.GT.0.0E0) GO TO 140 + NZ = N + DO 160 I=1,N + Y(I) = CZERO + 160 CONTINUE + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/amos/cunik.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/amos/cunik.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,188 @@ + SUBROUTINE CUNIK(ZR, FNU, IKFLG, IPMTR, TOL, INIT, PHI, ZETA1, + * ZETA2, SUM, CWRK) +C***BEGIN PROLOGUE CUNIK +C***REFER TO CBESI,CBESK +C +C CUNIK COMPUTES PARAMETERS FOR THE UNIFORM ASYMPTOTIC +C EXPANSIONS OF THE I AND K FUNCTIONS ON IKFLG= 1 OR 2 +C RESPECTIVELY BY +C +C W(FNU,ZR) = PHI*EXP(ZETA)*SUM +C +C WHERE ZETA=-ZETA1 + ZETA2 OR +C ZETA1 - ZETA2 +C +C THE FIRST CALL MUST HAVE INIT=0. SUBSEQUENT CALLS WITH THE +C SAME ZR AND FNU WILL RETURN THE I OR K FUNCTION ON IKFLG= +C 1 OR 2 WITH NO CHANGE IN INIT. CWRK IS A COMPLEX WORK +C ARRAY. IPMTR=0 COMPUTES ALL PARAMETERS. IPMTR=1 COMPUTES PHI, +C ZETA1,ZETA2. +C +C***ROUTINES CALLED (NONE) +C***END PROLOGUE CUNIK + COMPLEX CFN, CON, CONE, CRFN, CWRK, CZERO, PHI, S, SR, SUM, T, + * T2, ZETA1, ZETA2, ZN, ZR + REAL AC, C, FNU, RFN, TEST, TOL, TSTR, TSTI + INTEGER I, IKFLG, INIT, IPMTR, J, K, L + DIMENSION C(120), CWRK(16), CON(2) + DATA CZERO, CONE / (0.0E0,0.0E0), (1.0E0,0.0E0) / + DATA CON(1), CON(2) / + 1(3.98942280401432678E-01,0.0E0),(1.25331413731550025E+00,0.0E0)/ + DATA C(1), C(2), C(3), C(4), C(5), C(6), C(7), C(8), C(9), C(10), + 1 C(11), C(12), C(13), C(14), C(15), C(16), C(17), C(18), + 2 C(19), C(20), C(21), C(22), C(23), C(24)/ + 3 1.00000000000000000E+00, -2.08333333333333333E-01, + 4 1.25000000000000000E-01, 3.34201388888888889E-01, + 5 -4.01041666666666667E-01, 7.03125000000000000E-02, + 6 -1.02581259645061728E+00, 1.84646267361111111E+00, + 7 -8.91210937500000000E-01, 7.32421875000000000E-02, + 8 4.66958442342624743E+00, -1.12070026162229938E+01, + 9 8.78912353515625000E+00, -2.36408691406250000E+00, + A 1.12152099609375000E-01, -2.82120725582002449E+01, + B 8.46362176746007346E+01, -9.18182415432400174E+01, + C 4.25349987453884549E+01, -7.36879435947963170E+00, + D 2.27108001708984375E-01, 2.12570130039217123E+02, + E -7.65252468141181642E+02, 1.05999045252799988E+03/ + DATA C(25), C(26), C(27), C(28), C(29), C(30), C(31), C(32), + 1 C(33), C(34), C(35), C(36), C(37), C(38), C(39), C(40), + 2 C(41), C(42), C(43), C(44), C(45), C(46), C(47), C(48)/ + 3 -6.99579627376132541E+02, 2.18190511744211590E+02, + 4 -2.64914304869515555E+01, 5.72501420974731445E-01, + 5 -1.91945766231840700E+03, 8.06172218173730938E+03, + 6 -1.35865500064341374E+04, 1.16553933368645332E+04, + 7 -5.30564697861340311E+03, 1.20090291321635246E+03, + 8 -1.08090919788394656E+02, 1.72772750258445740E+00, + 9 2.02042913309661486E+04, -9.69805983886375135E+04, + A 1.92547001232531532E+05, -2.03400177280415534E+05, + B 1.22200464983017460E+05, -4.11926549688975513E+04, + C 7.10951430248936372E+03, -4.93915304773088012E+02, + D 6.07404200127348304E+00, -2.42919187900551333E+05, + E 1.31176361466297720E+06, -2.99801591853810675E+06/ + DATA C(49), C(50), C(51), C(52), C(53), C(54), C(55), C(56), + 1 C(57), C(58), C(59), C(60), C(61), C(62), C(63), C(64), + 2 C(65), C(66), C(67), C(68), C(69), C(70), C(71), C(72)/ + 3 3.76327129765640400E+06, -2.81356322658653411E+06, + 4 1.26836527332162478E+06, -3.31645172484563578E+05, + 5 4.52187689813627263E+04, -2.49983048181120962E+03, + 6 2.43805296995560639E+01, 3.28446985307203782E+06, + 7 -1.97068191184322269E+07, 5.09526024926646422E+07, + 8 -7.41051482115326577E+07, 6.63445122747290267E+07, + 9 -3.75671766607633513E+07, 1.32887671664218183E+07, + A -2.78561812808645469E+06, 3.08186404612662398E+05, + B -1.38860897537170405E+04, 1.10017140269246738E+02, + C -4.93292536645099620E+07, 3.25573074185765749E+08, + D -9.39462359681578403E+08, 1.55359689957058006E+09, + E -1.62108055210833708E+09, 1.10684281682301447E+09/ + DATA C(73), C(74), C(75), C(76), C(77), C(78), C(79), C(80), + 1 C(81), C(82), C(83), C(84), C(85), C(86), C(87), C(88), + 2 C(89), C(90), C(91), C(92), C(93), C(94), C(95), C(96)/ + 3 -4.95889784275030309E+08, 1.42062907797533095E+08, + 4 -2.44740627257387285E+07, 2.24376817792244943E+06, + 5 -8.40054336030240853E+04, 5.51335896122020586E+02, + 6 8.14789096118312115E+08, -5.86648149205184723E+09, + 7 1.86882075092958249E+10, -3.46320433881587779E+10, + 8 4.12801855797539740E+10, -3.30265997498007231E+10, + 9 1.79542137311556001E+10, -6.56329379261928433E+09, + A 1.55927986487925751E+09, -2.25105661889415278E+08, + B 1.73951075539781645E+07, -5.49842327572288687E+05, + C 3.03809051092238427E+03, -1.46792612476956167E+10, + D 1.14498237732025810E+11, -3.99096175224466498E+11, + E 8.19218669548577329E+11, -1.09837515608122331E+12/ + DATA C(97), C(98), C(99), C(100), C(101), C(102), C(103), C(104), + 1 C(105), C(106), C(107), C(108), C(109), C(110), C(111), + 2 C(112), C(113), C(114), C(115), C(116), C(117), C(118)/ + 3 1.00815810686538209E+12, -6.45364869245376503E+11, + 4 2.87900649906150589E+11, -8.78670721780232657E+10, + 5 1.76347306068349694E+10, -2.16716498322379509E+09, + 6 1.43157876718888981E+08, -3.87183344257261262E+06, + 7 1.82577554742931747E+04, 2.86464035717679043E+11, + 8 -2.40629790002850396E+12, 9.10934118523989896E+12, + 9 -2.05168994109344374E+13, 3.05651255199353206E+13, + A -3.16670885847851584E+13, 2.33483640445818409E+13, + B -1.23204913055982872E+13, 4.61272578084913197E+12, + C -1.19655288019618160E+12, 2.05914503232410016E+11, + D -2.18229277575292237E+10, 1.24700929351271032E+09/ + DATA C(119), C(120)/ + 1 -2.91883881222208134E+07, 1.18838426256783253E+05/ +C + IF (INIT.NE.0) GO TO 40 +C----------------------------------------------------------------------- +C INITIALIZE ALL VARIABLES +C----------------------------------------------------------------------- + RFN = 1.0E0/FNU + CRFN = CMPLX(RFN,0.0E0) +C T = ZR*CRFN +C----------------------------------------------------------------------- +C OVERFLOW TEST (ZR/FNU TOO SMALL) +C----------------------------------------------------------------------- + TSTR = REAL(ZR) + TSTI = AIMAG(ZR) + TEST = R1MACH(1)*1.0E+3 + AC = FNU*TEST + IF (ABS(TSTR).GT.AC .OR. ABS(TSTI).GT.AC) GO TO 15 + AC = 2.0E0*ABS(ALOG(TEST))+FNU + ZETA1 = CMPLX(AC,0.0E0) + ZETA2 = CMPLX(FNU,0.0E0) + PHI=CONE + RETURN + 15 CONTINUE + T=ZR*CRFN + S = CONE + T*T + SR = CSQRT(S) + CFN = CMPLX(FNU,0.0E0) + ZN = (CONE+SR)/T + ZETA1 = CFN*CLOG(ZN) + ZETA2 = CFN*SR + T = CONE/SR + SR = T*CRFN + CWRK(16) = CSQRT(SR) + PHI = CWRK(16)*CON(IKFLG) + IF (IPMTR.NE.0) RETURN + T2 = CONE/S + CWRK(1) = CONE + CRFN = CONE + AC = 1.0E0 + L = 1 + DO 20 K=2,15 + S = CZERO + DO 10 J=1,K + L = L + 1 + S = S*T2 + CMPLX(C(L),0.0E0) + 10 CONTINUE + CRFN = CRFN*SR + CWRK(K) = CRFN*S + AC = AC*RFN + TSTR = REAL(CWRK(K)) + TSTI = AIMAG(CWRK(K)) + TEST = ABS(TSTR) + ABS(TSTI) + IF (AC.LT.TOL .AND. TEST.LT.TOL) GO TO 30 + 20 CONTINUE + K = 15 + 30 CONTINUE + INIT = K + 40 CONTINUE + IF (IKFLG.EQ.2) GO TO 60 +C----------------------------------------------------------------------- +C COMPUTE SUM FOR THE I FUNCTION +C----------------------------------------------------------------------- + S = CZERO + DO 50 I=1,INIT + S = S + CWRK(I) + 50 CONTINUE + SUM = S + PHI = CWRK(16)*CON(1) + RETURN + 60 CONTINUE +C----------------------------------------------------------------------- +C COMPUTE SUM FOR THE K FUNCTION +C----------------------------------------------------------------------- + S = CZERO + T = CONE + DO 70 I=1,INIT + S = S + T*CWRK(I) + T = -T + 70 CONTINUE + SUM = S + PHI = CWRK(16)*CON(2) + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/amos/cunk1.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/amos/cunk1.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,343 @@ + SUBROUTINE CUNK1(Z, FNU, KODE, MR, N, Y, NZ, TOL, ELIM, ALIM) +C***BEGIN PROLOGUE CUNK1 +C***REFER TO CBESK +C +C CUNK1 COMPUTES K(FNU,Z) AND ITS ANALYTIC CONTINUATION FROM THE +C RIGHT HALF PLANE TO THE LEFT HALF PLANE BY MEANS OF THE +C UNIFORM ASYMPTOTIC EXPANSION. +C MR INDICATES THE DIRECTION OF ROTATION FOR ANALYTIC CONTINUATION. +C NZ=-1 MEANS AN OVERFLOW WILL OCCUR +C +C***ROUTINES CALLED CS1S2,CUCHK,CUNIK,R1MACH +C***END PROLOGUE CUNK1 + COMPLEX CFN, CK, CONE, CRSC, CS, CSCL, CSGN, CSPN, CSR, CSS, + * CWRK, CY, CZERO, C1, C2, PHI, RZ, SUM, S1, S2, Y, Z, + * ZETA1, ZETA2, ZR, PHID, ZETA1D, ZETA2D, SUMD + REAL ALIM, ANG, APHI, ASC, ASCLE, BRY, CPN, C2I, C2M, C2R, ELIM, + * FMR, FN, FNF, FNU, PI, RS1, SGN, SPN, TOL, X, R1MACH + INTEGER I, IB, IFLAG, IFN, IL, INIT, INU, IUF, K, KDFLG, KFLAG, + * KK, KODE, MR, N, NW, NZ, J, IPARD, INITD, IC + DIMENSION BRY(3), INIT(2), Y(N), SUM(2), PHI(2), ZETA1(2), + * ZETA2(2), CY(2), CWRK(16,3), CSS(3), CSR(3) + DATA CZERO, CONE / (0.0E0,0.0E0) , (1.0E0,0.0E0) / + DATA PI / 3.14159265358979324E0 / +C + KDFLG = 1 + NZ = 0 +C----------------------------------------------------------------------- +C EXP(-ALIM)=EXP(-ELIM)/TOL=APPROX. ONE PRECISION GREATER THAN +C THE UNDERFLOW LIMIT +C----------------------------------------------------------------------- + CSCL = CMPLX(1.0E0/TOL,0.0E0) + CRSC = CMPLX(TOL,0.0E0) + CSS(1) = CSCL + CSS(2) = CONE + CSS(3) = CRSC + CSR(1) = CRSC + CSR(2) = CONE + CSR(3) = CSCL + BRY(1) = 1.0E+3*R1MACH(1)/TOL + BRY(2) = 1.0E0/BRY(1) + BRY(3) = R1MACH(2) + X = REAL(Z) + ZR = Z + IF (X.LT.0.0E0) ZR = -Z + J=2 + DO 70 I=1,N +C----------------------------------------------------------------------- +C J FLIP FLOPS BETWEEN 1 AND 2 IN J = 3 - J +C----------------------------------------------------------------------- + J = 3 - J + FN = FNU + FLOAT(I-1) + INIT(J) = 0 + CALL CUNIK(ZR, FN, 2, 0, TOL, INIT(J), PHI(J), ZETA1(J), + * ZETA2(J), SUM(J), CWRK(1,J)) + IF (KODE.EQ.1) GO TO 20 + CFN = CMPLX(FN,0.0E0) + S1 = ZETA1(J) - CFN*(CFN/(ZR+ZETA2(J))) + GO TO 30 + 20 CONTINUE + S1 = ZETA1(J) - ZETA2(J) + 30 CONTINUE +C----------------------------------------------------------------------- +C TEST FOR UNDERFLOW AND OVERFLOW +C----------------------------------------------------------------------- + RS1 = REAL(S1) + IF (ABS(RS1).GT.ELIM) GO TO 60 + IF (KDFLG.EQ.1) KFLAG = 2 + IF (ABS(RS1).LT.ALIM) GO TO 40 +C----------------------------------------------------------------------- +C REFINE TEST AND SCALE +C----------------------------------------------------------------------- + APHI = CABS(PHI(J)) + RS1 = RS1 + ALOG(APHI) + IF (ABS(RS1).GT.ELIM) GO TO 60 + IF (KDFLG.EQ.1) KFLAG = 1 + IF (RS1.LT.0.0E0) GO TO 40 + IF (KDFLG.EQ.1) KFLAG = 3 + 40 CONTINUE +C----------------------------------------------------------------------- +C SCALE S1 TO KEEP INTERMEDIATE ARITHMETIC ON SCALE NEAR +C EXPONENT EXTREMES +C----------------------------------------------------------------------- + S2 = PHI(J)*SUM(J) + C2R = REAL(S1) + C2I = AIMAG(S1) + C2M = EXP(C2R)*REAL(CSS(KFLAG)) + S1 = CMPLX(C2M,0.0E0)*CMPLX(COS(C2I),SIN(C2I)) + S2 = S2*S1 + IF (KFLAG.NE.1) GO TO 50 + CALL CUCHK(S2, NW, BRY(1), TOL) + IF (NW.NE.0) GO TO 60 + 50 CONTINUE + CY(KDFLG) = S2 + Y(I) = S2*CSR(KFLAG) + IF (KDFLG.EQ.2) GO TO 75 + KDFLG = 2 + GO TO 70 + 60 CONTINUE + IF (RS1.GT.0.0E0) GO TO 290 +C----------------------------------------------------------------------- +C FOR X.LT.0.0, THE I FUNCTION TO BE ADDED WILL OVERFLOW +C----------------------------------------------------------------------- + IF (X.LT.0.0E0) GO TO 290 + KDFLG = 1 + Y(I) = CZERO + NZ=NZ+1 + IF (I.EQ.1) GO TO 70 + IF (Y(I-1).EQ.CZERO) GO TO 70 + Y(I-1) = CZERO + NZ=NZ+1 + 70 CONTINUE + I=N + 75 CONTINUE + RZ = CMPLX(2.0E0,0.0E0)/ZR + CK = CMPLX(FN,0.0E0)*RZ + IB = I+1 + IF (N.LT.IB) GO TO 160 +C----------------------------------------------------------------------- +C TEST LAST MEMBER FOR UNDERFLOW AND OVERFLOW, SET SEQUENCE TO ZERO +C ON UNDERFLOW +C----------------------------------------------------------------------- + FN = FNU+FLOAT(N-1) + IPARD = 1 + IF (MR.NE.0) IPARD = 0 + INITD = 0 + CALL CUNIK(ZR,FN,2,IPARD,TOL,INITD,PHID,ZETA1D,ZETA2D,SUMD, + *CWRK(1,3)) + IF (KODE.EQ.1) GO TO 80 + CFN=CMPLX(FN,0.0E0) + S1=ZETA1D-CFN*(CFN/(ZR+ZETA2D)) + GO TO 90 + 80 CONTINUE + S1=ZETA1D-ZETA2D + 90 CONTINUE + RS1=REAL(S1) + IF (ABS(RS1).GT.ELIM) GO TO 95 + IF (ABS(RS1).LT.ALIM) GO TO 100 +C----------------------------------------------------------------------- +C REFINE ESTIMATE AND TEST +C----------------------------------------------------------------------- + APHI=CABS(PHID) + RS1=RS1+ALOG(APHI) + IF (ABS(RS1).LT.ELIM) GO TO 100 + 95 CONTINUE + IF (RS1.GT.0.0E0) GO TO 290 +C----------------------------------------------------------------------- +C FOR X.LT.0.0, THE I FUNCTION TO BE ADDED WILL OVERFLOW +C----------------------------------------------------------------------- + IF (X.LT.0.0E0) GO TO 290 + NZ=N + DO 96 I=1,N + Y(I) = CZERO + 96 CONTINUE + RETURN + 100 CONTINUE +C----------------------------------------------------------------------- +C RECUR FORWARD FOR REMAINDER OF THE SEQUENCE +C----------------------------------------------------------------------- + S1 = CY(1) + S2 = CY(2) + C1 = CSR(KFLAG) + ASCLE = BRY(KFLAG) + DO 120 I=IB,N + C2 = S2 + S2 = CK*S2 + S1 + S1 = C2 + CK = CK + RZ + C2 = S2*C1 + Y(I) = C2 + IF (KFLAG.GE.3) GO TO 120 + C2R = REAL(C2) + C2I = AIMAG(C2) + C2R = ABS(C2R) + C2I = ABS(C2I) + C2M = AMAX1(C2R,C2I) + IF (C2M.LE.ASCLE) GO TO 120 + KFLAG = KFLAG + 1 + ASCLE = BRY(KFLAG) + S1 = S1*C1 + S2 = C2 + S1 = S1*CSS(KFLAG) + S2 = S2*CSS(KFLAG) + C1 = CSR(KFLAG) + 120 CONTINUE + 160 CONTINUE + IF (MR.EQ.0) RETURN +C----------------------------------------------------------------------- +C ANALYTIC CONTINUATION FOR RE(Z).LT.0.0E0 +C----------------------------------------------------------------------- + NZ = 0 + FMR = FLOAT(MR) + SGN = -SIGN(PI,FMR) +C----------------------------------------------------------------------- +C CSPN AND CSGN ARE COEFF OF K AND I FUNCIONS RESP. +C----------------------------------------------------------------------- + CSGN = CMPLX(0.0E0,SGN) + INU = INT(FNU) + FNF = FNU - FLOAT(INU) + IFN = INU + N - 1 + ANG = FNF*SGN + CPN = COS(ANG) + SPN = SIN(ANG) + CSPN = CMPLX(CPN,SPN) + IF (MOD(IFN,2).EQ.1) CSPN = -CSPN + ASC = BRY(1) + KK = N + IUF = 0 + KDFLG = 1 + IB = IB-1 + IC = IB-1 + DO 260 K=1,N + FN = FNU + FLOAT(KK-1) +C----------------------------------------------------------------------- +C LOGIC TO SORT OUT CASES WHOSE PARAMETERS WERE SET FOR THE K +C FUNCTION ABOVE +C----------------------------------------------------------------------- + M=3 + IF (N.GT.2) GO TO 175 + 170 CONTINUE + INITD = INIT(J) + PHID = PHI(J) + ZETA1D = ZETA1(J) + ZETA2D = ZETA2(J) + SUMD = SUM(J) + M = J + J = 3 - J + GO TO 180 + 175 CONTINUE + IF ((KK.EQ.N).AND.(IB.LT.N)) GO TO 180 + IF ((KK.EQ.IB).OR.(KK.EQ.IC)) GO TO 170 + INITD = 0 + 180 CONTINUE + CALL CUNIK(ZR, FN, 1, 0, TOL, INITD, PHID, ZETA1D, + * ZETA2D, SUMD, CWRK(1,M)) + IF (KODE.EQ.1) GO TO 190 + CFN = CMPLX(FN,0.0E0) + S1 = -ZETA1D + CFN*(CFN/(ZR+ZETA2D)) + GO TO 200 + 190 CONTINUE + S1 = -ZETA1D + ZETA2D + 200 CONTINUE +C----------------------------------------------------------------------- +C TEST FOR UNDERFLOW AND OVERFLOW +C----------------------------------------------------------------------- + RS1 = REAL(S1) + IF (ABS(RS1).GT.ELIM) GO TO 250 + IF (KDFLG.EQ.1) IFLAG = 2 + IF (ABS(RS1).LT.ALIM) GO TO 210 +C----------------------------------------------------------------------- +C REFINE TEST AND SCALE +C----------------------------------------------------------------------- + APHI = CABS(PHID) + RS1 = RS1 + ALOG(APHI) + IF (ABS(RS1).GT.ELIM) GO TO 250 + IF (KDFLG.EQ.1) IFLAG = 1 + IF (RS1.LT.0.0E0) GO TO 210 + IF (KDFLG.EQ.1) IFLAG = 3 + 210 CONTINUE + S2 = CSGN*PHID*SUMD + C2R = REAL(S1) + C2I = AIMAG(S1) + C2M = EXP(C2R)*REAL(CSS(IFLAG)) + S1 = CMPLX(C2M,0.0E0)*CMPLX(COS(C2I),SIN(C2I)) + S2 = S2*S1 + IF (IFLAG.NE.1) GO TO 220 + CALL CUCHK(S2, NW, BRY(1), TOL) + IF (NW.NE.0) S2 = CMPLX(0.0E0,0.0E0) + 220 CONTINUE + CY(KDFLG) = S2 + C2 = S2 + S2 = S2*CSR(IFLAG) +C----------------------------------------------------------------------- +C ADD I AND K FUNCTIONS, K SEQUENCE IN Y(I), I=1,N +C----------------------------------------------------------------------- + S1 = Y(KK) + IF (KODE.EQ.1) GO TO 240 + CALL CS1S2(ZR, S1, S2, NW, ASC, ALIM, IUF) + NZ = NZ + NW + 240 CONTINUE + Y(KK) = S1*CSPN + S2 + KK = KK - 1 + CSPN = -CSPN + IF (C2.NE.CZERO) GO TO 245 + KDFLG = 1 + GO TO 260 + 245 CONTINUE + IF (KDFLG.EQ.2) GO TO 265 + KDFLG = 2 + GO TO 260 + 250 CONTINUE + IF (RS1.GT.0.0E0) GO TO 290 + S2 = CZERO + GO TO 220 + 260 CONTINUE + K = N + 265 CONTINUE + IL = N - K + IF (IL.EQ.0) RETURN +C----------------------------------------------------------------------- +C RECUR BACKWARD FOR REMAINDER OF I SEQUENCE AND ADD IN THE +C K FUNCTIONS, SCALING THE I SEQUENCE DURING RECURRENCE TO KEEP +C INTERMEDIATE ARITHMETIC ON SCALE NEAR EXPONENT EXTREMES. +C----------------------------------------------------------------------- + S1 = CY(1) + S2 = CY(2) + CS = CSR(IFLAG) + ASCLE = BRY(IFLAG) + FN = FLOAT(INU+IL) + DO 280 I=1,IL + C2 = S2 + S2 = S1 + CMPLX(FN+FNF,0.0E0)*RZ*S2 + S1 = C2 + FN = FN - 1.0E0 + C2 = S2*CS + CK = C2 + C1 = Y(KK) + IF (KODE.EQ.1) GO TO 270 + CALL CS1S2(ZR, C1, C2, NW, ASC, ALIM, IUF) + NZ = NZ + NW + 270 CONTINUE + Y(KK) = C1*CSPN + C2 + KK = KK - 1 + CSPN = -CSPN + IF (IFLAG.GE.3) GO TO 280 + C2R = REAL(CK) + C2I = AIMAG(CK) + C2R = ABS(C2R) + C2I = ABS(C2I) + C2M = AMAX1(C2R,C2I) + IF (C2M.LE.ASCLE) GO TO 280 + IFLAG = IFLAG + 1 + ASCLE = BRY(IFLAG) + S1 = S1*CS + S2 = CK + S1 = S1*CSS(IFLAG) + S2 = S2*CSS(IFLAG) + CS = CSR(IFLAG) + 280 CONTINUE + RETURN + 290 CONTINUE + NZ = -1 + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/amos/cunk2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/amos/cunk2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,393 @@ + SUBROUTINE CUNK2(Z, FNU, KODE, MR, N, Y, NZ, TOL, ELIM, ALIM) +C***BEGIN PROLOGUE CUNK2 +C***REFER TO CBESK +C +C CUNK2 COMPUTES K(FNU,Z) AND ITS ANALYTIC CONTINUATION FROM THE +C RIGHT HALF PLANE TO THE LEFT HALF PLANE BY MEANS OF THE +C UNIFORM ASYMPTOTIC EXPANSIONS FOR H(KIND,FNU,ZN) AND J(FNU,ZN) +C WHERE ZN IS IN THE RIGHT HALF PLANE, KIND=(3-MR)/2, MR=+1 OR +C -1. HERE ZN=ZR*I OR -ZR*I WHERE ZR=Z IF Z IS IN THE RIGHT +C HALF PLANE OR ZR=-Z IF Z IS IN THE LEFT HALF PLANE. MR INDIC- +C ATES THE DIRECTION OF ROTATION FOR ANALYTIC CONTINUATION. +C NZ=-1 MEANS AN OVERFLOW WILL OCCUR +C +C***ROUTINES CALLED CAIRY,CS1S2,CUCHK,CUNHJ,R1MACH +C***END PROLOGUE CUNK2 + COMPLEX AI, ARG, ASUM, BSUM, CFN, CI, CIP, + * CK, CONE, CRSC, CR1, CR2, CS, CSCL, CSGN, CSPN, CSR, CSS, CY, + * CZERO, C1, C2, DAI, PHI, RZ, S1, S2, Y, Z, ZB, ZETA1, + * ZETA2, ZN, ZR, PHID, ARGD, ZETA1D, ZETA2D, ASUMD, BSUMD + REAL AARG, AIC, ALIM, ANG, APHI, ASC, ASCLE, BRY, CAR, CPN, C2I, + * C2M, C2R, ELIM, FMR, FN, FNF, FNU, HPI, PI, RS1, SAR, SGN, SPN, + * TOL, X, YY, R1MACH + INTEGER I, IB, IFLAG, IFN, IL, IN, INU, IUF, K, KDFLG, KFLAG, KK, + * KODE, MR, N, NAI, NDAI, NW, NZ, IDUM, J, IPARD, IC + DIMENSION BRY(3), Y(N), ASUM(2), BSUM(2), PHI(2), ARG(2), + * ZETA1(2), ZETA2(2), CY(2), CIP(4), CSS(3), CSR(3) + DATA CZERO, CONE, CI, CR1, CR2 / + 1 (0.0E0,0.0E0),(1.0E0,0.0E0),(0.0E0,1.0E0), + 1(1.0E0,1.73205080756887729E0),(-0.5E0,-8.66025403784438647E-01)/ + DATA HPI, PI, AIC / + 1 1.57079632679489662E+00, 3.14159265358979324E+00, + 1 1.26551212348464539E+00/ + DATA CIP(1),CIP(2),CIP(3),CIP(4)/ + 1 (1.0E0,0.0E0), (0.0E0,-1.0E0), (-1.0E0,0.0E0), (0.0E0,1.0E0)/ +C + KDFLG = 1 + NZ = 0 +C----------------------------------------------------------------------- +C EXP(-ALIM)=EXP(-ELIM)/TOL=APPROX. ONE PRECISION GREATER THAN +C THE UNDERFLOW LIMIT +C----------------------------------------------------------------------- + CSCL = CMPLX(1.0E0/TOL,0.0E0) + CRSC = CMPLX(TOL,0.0E0) + CSS(1) = CSCL + CSS(2) = CONE + CSS(3) = CRSC + CSR(1) = CRSC + CSR(2) = CONE + CSR(3) = CSCL + BRY(1) = 1.0E+3*R1MACH(1)/TOL + BRY(2) = 1.0E0/BRY(1) + BRY(3) = R1MACH(2) + X = REAL(Z) + ZR = Z + IF (X.LT.0.0E0) ZR = -Z + YY = AIMAG(ZR) + ZN = -ZR*CI + ZB = ZR + INU = INT(FNU) + FNF = FNU - FLOAT(INU) + ANG = -HPI*FNF + CAR = COS(ANG) + SAR = SIN(ANG) + CPN = -HPI*CAR + SPN = -HPI*SAR + C2 = CMPLX(-SPN,CPN) + KK = MOD(INU,4) + 1 + CS = CR1*C2*CIP(KK) + IF (YY.GT.0.0E0) GO TO 10 + ZN = CONJG(-ZN) + ZB = CONJG(ZB) + 10 CONTINUE +C----------------------------------------------------------------------- +C K(FNU,Z) IS COMPUTED FROM H(2,FNU,-I*Z) WHERE Z IS IN THE FIRST +C QUADRANT. FOURTH QUADRANT VALUES (YY.LE.0.0E0) ARE COMPUTED BY +C CONJUGATION SINCE THE K FUNCTION IS REAL ON THE POSITIVE REAL AXIS +C----------------------------------------------------------------------- + J = 2 + DO 70 I=1,N +C----------------------------------------------------------------------- +C J FLIP FLOPS BETWEEN 1 AND 2 IN J = 3 - J +C----------------------------------------------------------------------- + J = 3 - J + FN = FNU + FLOAT(I-1) + CALL CUNHJ(ZN, FN, 0, TOL, PHI(J), ARG(J), ZETA1(J), ZETA2(J), + * ASUM(J), BSUM(J)) + IF (KODE.EQ.1) GO TO 20 + CFN = CMPLX(FN,0.0E0) + S1 = ZETA1(J) - CFN*(CFN/(ZB+ZETA2(J))) + GO TO 30 + 20 CONTINUE + S1 = ZETA1(J) - ZETA2(J) + 30 CONTINUE +C----------------------------------------------------------------------- +C TEST FOR UNDERFLOW AND OVERFLOW +C----------------------------------------------------------------------- + RS1 = REAL(S1) + IF (ABS(RS1).GT.ELIM) GO TO 60 + IF (KDFLG.EQ.1) KFLAG = 2 + IF (ABS(RS1).LT.ALIM) GO TO 40 +C----------------------------------------------------------------------- +C REFINE TEST AND SCALE +C----------------------------------------------------------------------- + APHI = CABS(PHI(J)) + AARG = CABS(ARG(J)) + RS1 = RS1 + ALOG(APHI) - 0.25E0*ALOG(AARG) - AIC + IF (ABS(RS1).GT.ELIM) GO TO 60 + IF (KDFLG.EQ.1) KFLAG = 1 + IF (RS1.LT.0.0E0) GO TO 40 + IF (KDFLG.EQ.1) KFLAG = 3 + 40 CONTINUE +C----------------------------------------------------------------------- +C SCALE S1 TO KEEP INTERMEDIATE ARITHMETIC ON SCALE NEAR +C EXPONENT EXTREMES +C----------------------------------------------------------------------- + C2 = ARG(J)*CR2 + CALL CAIRY(C2, 0, 2, AI, NAI, IDUM) + CALL CAIRY(C2, 1, 2, DAI, NDAI, IDUM) + S2 = CS*PHI(J)*(AI*ASUM(J)+CR2*DAI*BSUM(J)) + C2R = REAL(S1) + C2I = AIMAG(S1) + C2M = EXP(C2R)*REAL(CSS(KFLAG)) + S1 = CMPLX(C2M,0.0E0)*CMPLX(COS(C2I),SIN(C2I)) + S2 = S2*S1 + IF (KFLAG.NE.1) GO TO 50 + CALL CUCHK(S2, NW, BRY(1), TOL) + IF (NW.NE.0) GO TO 60 + 50 CONTINUE + IF (YY.LE.0.0E0) S2 = CONJG(S2) + CY(KDFLG) = S2 + Y(I) = S2*CSR(KFLAG) + CS = -CI*CS + IF (KDFLG.EQ.2) GO TO 75 + KDFLG = 2 + GO TO 70 + 60 CONTINUE + IF (RS1.GT.0.0E0) GO TO 300 +C----------------------------------------------------------------------- +C FOR X.LT.0.0, THE I FUNCTION TO BE ADDED WILL OVERFLOW +C----------------------------------------------------------------------- + IF (X.LT.0.0E0) GO TO 300 + KDFLG = 1 + Y(I) = CZERO + CS = -CI*CS + NZ=NZ+1 + IF (I.EQ.1) GO TO 70 + IF (Y(I-1).EQ.CZERO) GO TO 70 + Y(I-1) = CZERO + NZ=NZ+1 + 70 CONTINUE + I=N + 75 CONTINUE + RZ = CMPLX(2.0E0,0.0E0)/ZR + CK = CMPLX(FN,0.0E0)*RZ + IB = I + 1 + IF (N.LT.IB) GO TO 170 +C----------------------------------------------------------------------- +C TEST LAST MEMBER FOR UNDERFLOW AND OVERFLOW, SET SEQUENCE TO ZERO +C ON UNDERFLOW +C----------------------------------------------------------------------- + FN = FNU+FLOAT(N-1) + IPARD = 1 + IF (MR.NE.0) IPARD = 0 + CALL CUNHJ(ZN,FN,IPARD,TOL,PHID,ARGD,ZETA1D,ZETA2D,ASUMD,BSUMD) + IF (KODE.EQ.1) GO TO 80 + CFN=CMPLX(FN,0.0E0) + S1=ZETA1D-CFN*(CFN/(ZB+ZETA2D)) + GO TO 90 + 80 CONTINUE + S1=ZETA1D-ZETA2D + 90 CONTINUE + RS1=REAL(S1) + IF (ABS(RS1).GT.ELIM) GO TO 95 + IF (ABS(RS1).LT.ALIM) GO TO 100 +C----------------------------------------------------------------------- +C REFINE ESTIMATE AND TEST +C----------------------------------------------------------------------- + APHI=CABS(PHID) + AARG = CABS(ARGD) + RS1=RS1+ALOG(APHI)-0.25E0*ALOG(AARG)-AIC + IF (ABS(RS1).LT.ELIM) GO TO 100 + 95 CONTINUE + IF (RS1.GT.0.0E0) GO TO 300 +C----------------------------------------------------------------------- +C FOR X.LT.0.0, THE I FUNCTION TO BE ADDED WILL OVERFLOW +C----------------------------------------------------------------------- + IF (X.LT.0.0E0) GO TO 300 + NZ=N + DO 96 I=1,N + Y(I) = CZERO + 96 CONTINUE + RETURN + 100 CONTINUE +C----------------------------------------------------------------------- +C SCALED FORWARD RECURRENCE FOR REMAINDER OF THE SEQUENCE +C----------------------------------------------------------------------- + S1 = CY(1) + S2 = CY(2) + C1 = CSR(KFLAG) + ASCLE = BRY(KFLAG) + DO 120 I=IB,N + C2 = S2 + S2 = CK*S2 + S1 + S1 = C2 + CK = CK + RZ + C2 = S2*C1 + Y(I) = C2 + IF (KFLAG.GE.3) GO TO 120 + C2R = REAL(C2) + C2I = AIMAG(C2) + C2R = ABS(C2R) + C2I = ABS(C2I) + C2M = AMAX1(C2R,C2I) + IF (C2M.LE.ASCLE) GO TO 120 + KFLAG = KFLAG + 1 + ASCLE = BRY(KFLAG) + S1 = S1*C1 + S2 = C2 + S1 = S1*CSS(KFLAG) + S2 = S2*CSS(KFLAG) + C1 = CSR(KFLAG) + 120 CONTINUE + 170 CONTINUE + IF (MR.EQ.0) RETURN +C----------------------------------------------------------------------- +C ANALYTIC CONTINUATION FOR RE(Z).LT.0.0E0 +C----------------------------------------------------------------------- + NZ = 0 + FMR = FLOAT(MR) + SGN = -SIGN(PI,FMR) +C----------------------------------------------------------------------- +C CSPN AND CSGN ARE COEFF OF K AND I FUNCTIONS RESP. +C----------------------------------------------------------------------- + CSGN = CMPLX(0.0E0,SGN) + IF (YY.LE.0.0E0) CSGN = CONJG(CSGN) + IFN = INU + N - 1 + ANG = FNF*SGN + CPN = COS(ANG) + SPN = SIN(ANG) + CSPN = CMPLX(CPN,SPN) + IF (MOD(IFN,2).EQ.1) CSPN = -CSPN +C----------------------------------------------------------------------- +C CS=COEFF OF THE J FUNCTION TO GET THE I FUNCTION. I(FNU,Z) IS +C COMPUTED FROM EXP(I*FNU*HPI)*J(FNU,-I*Z) WHERE Z IS IN THE FIRST +C QUADRANT. FOURTH QUADRANT VALUES (YY.LE.0.0E0) ARE COMPUTED BY +C CONJUGATION SINCE THE I FUNCTION IS REAL ON THE POSITIVE REAL AXIS +C----------------------------------------------------------------------- + CS = CMPLX(CAR,-SAR)*CSGN + IN = MOD(IFN,4) + 1 + C2 = CIP(IN) + CS = CS*CONJG(C2) + ASC = BRY(1) + KK = N + KDFLG = 1 + IB = IB-1 + IC = IB-1 + IUF = 0 + DO 270 K=1,N +C----------------------------------------------------------------------- +C LOGIC TO SORT OUT CASES WHOSE PARAMETERS WERE SET FOR THE K +C FUNCTION ABOVE +C----------------------------------------------------------------------- + FN = FNU+FLOAT(KK-1) + IF (N.GT.2) GO TO 180 + 175 CONTINUE + PHID = PHI(J) + ARGD = ARG(J) + ZETA1D = ZETA1(J) + ZETA2D = ZETA2(J) + ASUMD = ASUM(J) + BSUMD = BSUM(J) + J = 3 - J + GO TO 190 + 180 CONTINUE + IF ((KK.EQ.N).AND.(IB.LT.N)) GO TO 190 + IF ((KK.EQ.IB).OR.(KK.EQ.IC)) GO TO 175 + CALL CUNHJ(ZN, FN, 0, TOL, PHID, ARGD, ZETA1D, ZETA2D, + * ASUMD, BSUMD) + 190 CONTINUE + IF (KODE.EQ.1) GO TO 200 + CFN = CMPLX(FN,0.0E0) + S1 = -ZETA1D + CFN*(CFN/(ZB+ZETA2D)) + GO TO 210 + 200 CONTINUE + S1 = -ZETA1D + ZETA2D + 210 CONTINUE +C----------------------------------------------------------------------- +C TEST FOR UNDERFLOW AND OVERFLOW +C----------------------------------------------------------------------- + RS1 = REAL(S1) + IF (ABS(RS1).GT.ELIM) GO TO 260 + IF (KDFLG.EQ.1) IFLAG = 2 + IF (ABS(RS1).LT.ALIM) GO TO 220 +C----------------------------------------------------------------------- +C REFINE TEST AND SCALE +C----------------------------------------------------------------------- + APHI = CABS(PHID) + AARG = CABS(ARGD) + RS1 = RS1 + ALOG(APHI) - 0.25E0*ALOG(AARG) - AIC + IF (ABS(RS1).GT.ELIM) GO TO 260 + IF (KDFLG.EQ.1) IFLAG = 1 + IF (RS1.LT.0.0E0) GO TO 220 + IF (KDFLG.EQ.1) IFLAG = 3 + 220 CONTINUE + CALL CAIRY(ARGD, 0, 2, AI, NAI, IDUM) + CALL CAIRY(ARGD, 1, 2, DAI, NDAI, IDUM) + S2 = CS*PHID*(AI*ASUMD+DAI*BSUMD) + C2R = REAL(S1) + C2I = AIMAG(S1) + C2M = EXP(C2R)*REAL(CSS(IFLAG)) + S1 = CMPLX(C2M,0.0E0)*CMPLX(COS(C2I),SIN(C2I)) + S2 = S2*S1 + IF (IFLAG.NE.1) GO TO 230 + CALL CUCHK(S2, NW, BRY(1), TOL) + IF (NW.NE.0) S2 = CMPLX(0.0E0,0.0E0) + 230 CONTINUE + IF (YY.LE.0.0E0) S2 = CONJG(S2) + CY(KDFLG) = S2 + C2 = S2 + S2 = S2*CSR(IFLAG) +C----------------------------------------------------------------------- +C ADD I AND K FUNCTIONS, K SEQUENCE IN Y(I), I=1,N +C----------------------------------------------------------------------- + S1 = Y(KK) + IF (KODE.EQ.1) GO TO 250 + CALL CS1S2(ZR, S1, S2, NW, ASC, ALIM, IUF) + NZ = NZ + NW + 250 CONTINUE + Y(KK) = S1*CSPN + S2 + KK = KK - 1 + CSPN = -CSPN + CS = -CS*CI + IF (C2.NE.CZERO) GO TO 255 + KDFLG = 1 + GO TO 270 + 255 CONTINUE + IF (KDFLG.EQ.2) GO TO 275 + KDFLG = 2 + GO TO 270 + 260 CONTINUE + IF (RS1.GT.0.0E0) GO TO 300 + S2 = CZERO + GO TO 230 + 270 CONTINUE + K = N + 275 CONTINUE + IL = N-K + IF (IL.EQ.0) RETURN +C----------------------------------------------------------------------- +C RECUR BACKWARD FOR REMAINDER OF I SEQUENCE AND ADD IN THE +C K FUNCTIONS, SCALING THE I SEQUENCE DURING RECURRENCE TO KEEP +C INTERMEDIATE ARITHMETIC ON SCALE NEAR EXPONENT EXTREMES. +C----------------------------------------------------------------------- + S1 = CY(1) + S2 = CY(2) + CS = CSR(IFLAG) + ASCLE = BRY(IFLAG) + FN = FLOAT(INU+IL) + DO 290 I=1,IL + C2 = S2 + S2 = S1 + CMPLX(FN+FNF,0.0E0)*RZ*S2 + S1 = C2 + FN = FN - 1.0E0 + C2 = S2*CS + CK = C2 + C1 = Y(KK) + IF (KODE.EQ.1) GO TO 280 + CALL CS1S2(ZR, C1, C2, NW, ASC, ALIM, IUF) + NZ = NZ + NW + 280 CONTINUE + Y(KK) = C1*CSPN + C2 + KK = KK - 1 + CSPN = -CSPN + IF (IFLAG.GE.3) GO TO 290 + C2R = REAL(CK) + C2I = AIMAG(CK) + C2R = ABS(C2R) + C2I = ABS(C2I) + C2M = AMAX1(C2R,C2I) + IF (C2M.LE.ASCLE) GO TO 290 + IFLAG = IFLAG + 1 + ASCLE = BRY(IFLAG) + S1 = S1*CS + S2 = CK + S1 = S1*CSS(IFLAG) + S2 = S2*CSS(IFLAG) + CS = CSR(IFLAG) + 290 CONTINUE + RETURN + 300 CONTINUE + NZ = -1 + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/amos/cuoik.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/amos/cuoik.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,159 @@ + SUBROUTINE CUOIK(Z, FNU, KODE, IKFLG, N, Y, NUF, TOL, ELIM, ALIM) +C***BEGIN PROLOGUE CUOIK +C***REFER TO CBESI,CBESK,CBESH +C +C CUOIK COMPUTES THE LEADING TERMS OF THE UNIFORM ASYMPTOTIC +C EXPANSIONS FOR THE I AND K FUNCTIONS AND COMPARES THEM +C (IN LOGARITHMIC FORM) TO ALIM AND ELIM FOR OVER AND UNDERFLOW +C WHERE ALIM.LT.ELIM. IF THE MAGNITUDE, BASED ON THE LEADING +C EXPONENTIAL, IS LESS THAN ALIM OR GREATER THAN -ALIM, THEN +C THE RESULT IS ON SCALE. IF NOT, THEN A REFINED TEST USING OTHER +C MULTIPLIERS (IN LOGARITHMIC FORM) IS MADE BASED ON ELIM. HERE +C EXP(-ELIM)=SMALLEST MACHINE NUMBER*1.0E+3 AND EXP(-ALIM)= +C EXP(-ELIM)/TOL +C +C IKFLG=1 MEANS THE I SEQUENCE IS TESTED +C =2 MEANS THE K SEQUENCE IS TESTED +C NUF = 0 MEANS THE LAST MEMBER OF THE SEQUENCE IS ON SCALE +C =-1 MEANS AN OVERFLOW WOULD OCCUR +C IKFLG=1 AND NUF.GT.0 MEANS THE LAST NUF Y VALUES WERE SET TO ZERO +C THE FIRST N-NUF VALUES MUST BE SET BY ANOTHER ROUTINE +C IKFLG=2 AND NUF.EQ.N MEANS ALL Y VALUES WERE SET TO ZERO +C IKFLG=2 AND 0.LT.NUF.LT.N NOT CONSIDERED. Y MUST BE SET BY +C ANOTHER ROUTINE +C +C***ROUTINES CALLED CUCHK,CUNHJ,CUNIK,R1MACH +C***END PROLOGUE CUOIK + COMPLEX ARG, ASUM, BSUM, CWRK, CZ, CZERO, PHI, SUM, Y, Z, ZB, + * ZETA1, ZETA2, ZN, ZR + REAL AARG, AIC, ALIM, APHI, ASCLE, AX, AY, ELIM, FNN, FNU, GNN, + * GNU, RCZ, TOL, X, YY + INTEGER I, IFORM, IKFLG, INIT, KODE, N, NN, NUF, NW + DIMENSION Y(N), CWRK(16) + DATA CZERO / (0.0E0,0.0E0) / + DATA AIC / 1.265512123484645396E+00 / + NUF = 0 + NN = N + X = REAL(Z) + ZR = Z + IF (X.LT.0.0E0) ZR = -Z + ZB = ZR + YY = AIMAG(ZR) + AX = ABS(X)*1.7321E0 + AY = ABS(YY) + IFORM = 1 + IF (AY.GT.AX) IFORM = 2 + GNU = AMAX1(FNU,1.0E0) + IF (IKFLG.EQ.1) GO TO 10 + FNN = FLOAT(NN) + GNN = FNU + FNN - 1.0E0 + GNU = AMAX1(GNN,FNN) + 10 CONTINUE +C----------------------------------------------------------------------- +C ONLY THE MAGNITUDE OF ARG AND PHI ARE NEEDED ALONG WITH THE +C REAL PARTS OF ZETA1, ZETA2 AND ZB. NO ATTEMPT IS MADE TO GET +C THE SIGN OF THE IMAGINARY PART CORRECT. +C----------------------------------------------------------------------- + IF (IFORM.EQ.2) GO TO 20 + INIT = 0 + CALL CUNIK(ZR, GNU, IKFLG, 1, TOL, INIT, PHI, ZETA1, ZETA2, SUM, + * CWRK) + CZ = -ZETA1 + ZETA2 + GO TO 40 + 20 CONTINUE + ZN = -ZR*CMPLX(0.0E0,1.0E0) + IF (YY.GT.0.0E0) GO TO 30 + ZN = CONJG(-ZN) + 30 CONTINUE + CALL CUNHJ(ZN, GNU, 1, TOL, PHI, ARG, ZETA1, ZETA2, ASUM, BSUM) + CZ = -ZETA1 + ZETA2 + AARG = CABS(ARG) + 40 CONTINUE + IF (KODE.EQ.2) CZ = CZ - ZB + IF (IKFLG.EQ.2) CZ = -CZ + APHI = CABS(PHI) + RCZ = REAL(CZ) +C----------------------------------------------------------------------- +C OVERFLOW TEST +C----------------------------------------------------------------------- + IF (RCZ.GT.ELIM) GO TO 170 + IF (RCZ.LT.ALIM) GO TO 50 + RCZ = RCZ + ALOG(APHI) + IF (IFORM.EQ.2) RCZ = RCZ - 0.25E0*ALOG(AARG) - AIC + IF (RCZ.GT.ELIM) GO TO 170 + GO TO 100 + 50 CONTINUE +C----------------------------------------------------------------------- +C UNDERFLOW TEST +C----------------------------------------------------------------------- + IF (RCZ.LT.(-ELIM)) GO TO 60 + IF (RCZ.GT.(-ALIM)) GO TO 100 + RCZ = RCZ + ALOG(APHI) + IF (IFORM.EQ.2) RCZ = RCZ - 0.25E0*ALOG(AARG) - AIC + IF (RCZ.GT.(-ELIM)) GO TO 80 + 60 CONTINUE + DO 70 I=1,NN + Y(I) = CZERO + 70 CONTINUE + NUF = NN + RETURN + 80 CONTINUE + ASCLE = 1.0E+3*R1MACH(1)/TOL + CZ = CZ + CLOG(PHI) + IF (IFORM.EQ.1) GO TO 90 + CZ = CZ - CMPLX(0.25E0,0.0E0)*CLOG(ARG) - CMPLX(AIC,0.0E0) + 90 CONTINUE + AX = EXP(RCZ)/TOL + AY = AIMAG(CZ) + CZ = CMPLX(AX,0.0E0)*CMPLX(COS(AY),SIN(AY)) + CALL CUCHK(CZ, NW, ASCLE, TOL) + IF (NW.EQ.1) GO TO 60 + 100 CONTINUE + IF (IKFLG.EQ.2) RETURN + IF (N.EQ.1) RETURN +C----------------------------------------------------------------------- +C SET UNDERFLOWS ON I SEQUENCE +C----------------------------------------------------------------------- + 110 CONTINUE + GNU = FNU + FLOAT(NN-1) + IF (IFORM.EQ.2) GO TO 120 + INIT = 0 + CALL CUNIK(ZR, GNU, IKFLG, 1, TOL, INIT, PHI, ZETA1, ZETA2, SUM, + * CWRK) + CZ = -ZETA1 + ZETA2 + GO TO 130 + 120 CONTINUE + CALL CUNHJ(ZN, GNU, 1, TOL, PHI, ARG, ZETA1, ZETA2, ASUM, BSUM) + CZ = -ZETA1 + ZETA2 + AARG = CABS(ARG) + 130 CONTINUE + IF (KODE.EQ.2) CZ = CZ - ZB + APHI = CABS(PHI) + RCZ = REAL(CZ) + IF (RCZ.LT.(-ELIM)) GO TO 140 + IF (RCZ.GT.(-ALIM)) RETURN + RCZ = RCZ + ALOG(APHI) + IF (IFORM.EQ.2) RCZ = RCZ - 0.25E0*ALOG(AARG) - AIC + IF (RCZ.GT.(-ELIM)) GO TO 150 + 140 CONTINUE + Y(NN) = CZERO + NN = NN - 1 + NUF = NUF + 1 + IF (NN.EQ.0) RETURN + GO TO 110 + 150 CONTINUE + ASCLE = 1.0E+3*R1MACH(1)/TOL + CZ = CZ + CLOG(PHI) + IF (IFORM.EQ.1) GO TO 160 + CZ = CZ - CMPLX(0.25E0,0.0E0)*CLOG(ARG) - CMPLX(AIC,0.0E0) + 160 CONTINUE + AX = EXP(RCZ)/TOL + AY = AIMAG(CZ) + CZ = CMPLX(AX,0.0E0)*CMPLX(COS(AY),SIN(AY)) + CALL CUCHK(CZ, NW, ASCLE, TOL) + IF (NW.EQ.1) GO TO 140 + RETURN + 170 CONTINUE + NUF = -1 + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/amos/cwrsk.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/amos/cwrsk.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,75 @@ + SUBROUTINE CWRSK(ZR, FNU, KODE, N, Y, NZ, CW, TOL, ELIM, ALIM) +C***BEGIN PROLOGUE CWRSK +C***REFER TO CBESI,CBESK +C +C CWRSK COMPUTES THE I BESSEL FUNCTION FOR RE(Z).GE.0.0 BY +C NORMALIZING THE I FUNCTION RATIOS FROM CRATI BY THE WRONSKIAN +C +C***ROUTINES CALLED CBKNU,CRATI,R1MACH +C***END PROLOGUE CWRSK + COMPLEX CINU, CSCL, CT, CW, C1, C2, RCT, ST, Y, ZR + REAL ACT, ACW, ALIM, ASCLE, ELIM, FNU, S1, S2, TOL, YY + INTEGER I, KODE, N, NW, NZ + DIMENSION Y(N), CW(2) +C----------------------------------------------------------------------- +C I(FNU+I-1,Z) BY BACKWARD RECURRENCE FOR RATIOS +C Y(I)=I(FNU+I,Z)/I(FNU+I-1,Z) FROM CRATI NORMALIZED BY THE +C WRONSKIAN WITH K(FNU,Z) AND K(FNU+1,Z) FROM CBKNU. +C----------------------------------------------------------------------- + NZ = 0 + CALL CBKNU(ZR, FNU, KODE, 2, CW, NW, TOL, ELIM, ALIM) + IF (NW.NE.0) GO TO 50 + CALL CRATI(ZR, FNU, N, Y, TOL) +C----------------------------------------------------------------------- +C RECUR FORWARD ON I(FNU+1,Z) = R(FNU,Z)*I(FNU,Z), +C R(FNU+J-1,Z)=Y(J), J=1,...,N +C----------------------------------------------------------------------- + CINU = CMPLX(1.0E0,0.0E0) + IF (KODE.EQ.1) GO TO 10 + YY = AIMAG(ZR) + S1 = COS(YY) + S2 = SIN(YY) + CINU = CMPLX(S1,S2) + 10 CONTINUE +C----------------------------------------------------------------------- +C ON LOW EXPONENT MACHINES THE K FUNCTIONS CAN BE CLOSE TO BOTH +C THE UNDER AND OVERFLOW LIMITS AND THE NORMALIZATION MUST BE +C SCALED TO PREVENT OVER OR UNDERFLOW. CUOIK HAS DETERMINED THAT +C THE RESULT IS ON SCALE. +C----------------------------------------------------------------------- + ACW = CABS(CW(2)) + ASCLE = 1.0E+3*R1MACH(1)/TOL + CSCL = CMPLX(1.0E0,0.0E0) + IF (ACW.GT.ASCLE) GO TO 20 + CSCL = CMPLX(1.0E0/TOL,0.0E0) + GO TO 30 + 20 CONTINUE + ASCLE = 1.0E0/ASCLE + IF (ACW.LT.ASCLE) GO TO 30 + CSCL = CMPLX(TOL,0.0E0) + 30 CONTINUE + C1 = CW(1)*CSCL + C2 = CW(2)*CSCL + ST = Y(1) +C----------------------------------------------------------------------- +C CINU=CINU*(CONJG(CT)/CABS(CT))*(1.0E0/CABS(CT) PREVENTS +C UNDER- OR OVERFLOW PREMATURELY BY SQUARING CABS(CT) +C----------------------------------------------------------------------- + CT = ZR*(C2+ST*C1) + ACT = CABS(CT) + RCT = CMPLX(1.0E0/ACT,0.0E0) + CT = CONJG(CT)*RCT + CINU = CINU*RCT*CT + Y(1) = CINU*CSCL + IF (N.EQ.1) RETURN + DO 40 I=2,N + CINU = ST*CINU + ST = Y(I) + Y(I) = CINU*CSCL + 40 CONTINUE + RETURN + 50 CONTINUE + NZ = -1 + IF(NW.EQ.(-2)) NZ=-2 + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/amos/gamln.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/amos/gamln.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,189 @@ + FUNCTION GAMLN(Z,IERR) +C***BEGIN PROLOGUE GAMLN +C***DATE WRITTEN 830501 (YYMMDD) +C***REVISION DATE 830501 (YYMMDD) +C***CATEGORY NO. B5F +C***KEYWORDS GAMMA FUNCTION,LOGARITHM OF GAMMA FUNCTION +C***AUTHOR AMOS, DONALD E., SANDIA NATIONAL LABORATORIES +C***PURPOSE TO COMPUTE THE LOGARITHM OF THE GAMMA FUNCTION +C***DESCRIPTION +C +C GAMLN COMPUTES THE NATURAL LOG OF THE GAMMA FUNCTION FOR +C Z.GT.0. THE ASYMPTOTIC EXPANSION IS USED TO GENERATE VALUES +C GREATER THAN ZMIN WHICH ARE ADJUSTED BY THE RECURSION +C G(Z+1)=Z*G(Z) FOR Z.LE.ZMIN. THE FUNCTION WAS MADE AS +C PORTABLE AS POSSIBLE BY COMPUTIMG ZMIN FROM THE NUMBER OF BASE +C 10 DIGITS IN A WORD, RLN=AMAX1(-ALOG10(R1MACH(4)),0.5E-18) +C LIMITED TO 18 DIGITS OF (RELATIVE) ACCURACY. +C +C SINCE INTEGER ARGUMENTS ARE COMMON, A TABLE LOOK UP ON 100 +C VALUES IS USED FOR SPEED OF EXECUTION. +C +C DESCRIPTION OF ARGUMENTS +C +C INPUT +C Z - REAL ARGUMENT, Z.GT.0.0E0 +C +C OUTPUT +C GAMLN - NATURAL LOG OF THE GAMMA FUNCTION AT Z +C IERR - ERROR FLAG +C IERR=0, NORMAL RETURN, COMPUTATION COMPLETED +C IERR=1, Z.LE.0.0E0, NO COMPUTATION +C +C***REFERENCES COMPUTATION OF BESSEL FUNCTIONS OF COMPLEX ARGUMENT +C BY D. E. AMOS, SAND83-0083, MAY, 1983. +C***ROUTINES CALLED I1MACH,R1MACH +C***END PROLOGUE GAMLN +C + INTEGER I, I1M, K, MZ, NZ, IERR, I1MACH + REAL CF, CON, FLN, FZ, GLN, RLN, S, TLG, TRM, TST, T1, WDTOL, Z, + * ZDMY, ZINC, ZM, ZMIN, ZP, ZSQ + REAL R1MACH + DIMENSION CF(22), GLN(100) +C LNGAMMA(N), N=1,100 + DATA GLN(1), GLN(2), GLN(3), GLN(4), GLN(5), GLN(6), GLN(7), + 1 GLN(8), GLN(9), GLN(10), GLN(11), GLN(12), GLN(13), GLN(14), + 2 GLN(15), GLN(16), GLN(17), GLN(18), GLN(19), GLN(20), + 3 GLN(21), GLN(22)/ + 4 0.00000000000000000E+00, 0.00000000000000000E+00, + 5 6.93147180559945309E-01, 1.79175946922805500E+00, + 6 3.17805383034794562E+00, 4.78749174278204599E+00, + 7 6.57925121201010100E+00, 8.52516136106541430E+00, + 8 1.06046029027452502E+01, 1.28018274800814696E+01, + 9 1.51044125730755153E+01, 1.75023078458738858E+01, + A 1.99872144956618861E+01, 2.25521638531234229E+01, + B 2.51912211827386815E+01, 2.78992713838408916E+01, + C 3.06718601060806728E+01, 3.35050734501368889E+01, + D 3.63954452080330536E+01, 3.93398841871994940E+01, + E 4.23356164607534850E+01, 4.53801388984769080E+01/ + DATA GLN(23), GLN(24), GLN(25), GLN(26), GLN(27), GLN(28), + 1 GLN(29), GLN(30), GLN(31), GLN(32), GLN(33), GLN(34), + 2 GLN(35), GLN(36), GLN(37), GLN(38), GLN(39), GLN(40), + 3 GLN(41), GLN(42), GLN(43), GLN(44)/ + 4 4.84711813518352239E+01, 5.16066755677643736E+01, + 5 5.47847293981123192E+01, 5.80036052229805199E+01, + 6 6.12617017610020020E+01, 6.45575386270063311E+01, + 7 6.78897431371815350E+01, 7.12570389671680090E+01, + 8 7.46582363488301644E+01, 7.80922235533153106E+01, + 9 8.15579594561150372E+01, 8.50544670175815174E+01, + A 8.85808275421976788E+01, 9.21361756036870925E+01, + B 9.57196945421432025E+01, 9.93306124547874269E+01, + C 1.02968198614513813E+02, 1.06631760260643459E+02, + D 1.10320639714757395E+02, 1.14034211781461703E+02, + E 1.17771881399745072E+02, 1.21533081515438634E+02/ + DATA GLN(45), GLN(46), GLN(47), GLN(48), GLN(49), GLN(50), + 1 GLN(51), GLN(52), GLN(53), GLN(54), GLN(55), GLN(56), + 2 GLN(57), GLN(58), GLN(59), GLN(60), GLN(61), GLN(62), + 3 GLN(63), GLN(64), GLN(65), GLN(66)/ + 4 1.25317271149356895E+02, 1.29123933639127215E+02, + 5 1.32952575035616310E+02, 1.36802722637326368E+02, + 6 1.40673923648234259E+02, 1.44565743946344886E+02, + 7 1.48477766951773032E+02, 1.52409592584497358E+02, + 8 1.56360836303078785E+02, 1.60331128216630907E+02, + 9 1.64320112263195181E+02, 1.68327445448427652E+02, + A 1.72352797139162802E+02, 1.76395848406997352E+02, + B 1.80456291417543771E+02, 1.84533828861449491E+02, + C 1.88628173423671591E+02, 1.92739047287844902E+02, + D 1.96866181672889994E+02, 2.01009316399281527E+02, + E 2.05168199482641199E+02, 2.09342586752536836E+02/ + DATA GLN(67), GLN(68), GLN(69), GLN(70), GLN(71), GLN(72), + 1 GLN(73), GLN(74), GLN(75), GLN(76), GLN(77), GLN(78), + 2 GLN(79), GLN(80), GLN(81), GLN(82), GLN(83), GLN(84), + 3 GLN(85), GLN(86), GLN(87), GLN(88)/ + 4 2.13532241494563261E+02, 2.17736934113954227E+02, + 5 2.21956441819130334E+02, 2.26190548323727593E+02, + 6 2.30439043565776952E+02, 2.34701723442818268E+02, + 7 2.38978389561834323E+02, 2.43268849002982714E+02, + 8 2.47572914096186884E+02, 2.51890402209723194E+02, + 9 2.56221135550009525E+02, 2.60564940971863209E+02, + A 2.64921649798552801E+02, 2.69291097651019823E+02, + B 2.73673124285693704E+02, 2.78067573440366143E+02, + C 2.82474292687630396E+02, 2.86893133295426994E+02, + D 2.91323950094270308E+02, 2.95766601350760624E+02, + E 3.00220948647014132E+02, 3.04686856765668715E+02/ + DATA GLN(89), GLN(90), GLN(91), GLN(92), GLN(93), GLN(94), + 1 GLN(95), GLN(96), GLN(97), GLN(98), GLN(99), GLN(100)/ + 2 3.09164193580146922E+02, 3.13652829949879062E+02, + 3 3.18152639620209327E+02, 3.22663499126726177E+02, + 4 3.27185287703775217E+02, 3.31717887196928473E+02, + 5 3.36261181979198477E+02, 3.40815058870799018E+02, + 6 3.45379407062266854E+02, 3.49954118040770237E+02, + 7 3.54539085519440809E+02, 3.59134205369575399E+02/ +C COEFFICIENTS OF ASYMPTOTIC EXPANSION + DATA CF(1), CF(2), CF(3), CF(4), CF(5), CF(6), CF(7), CF(8), + 1 CF(9), CF(10), CF(11), CF(12), CF(13), CF(14), CF(15), + 2 CF(16), CF(17), CF(18), CF(19), CF(20), CF(21), CF(22)/ + 3 8.33333333333333333E-02, -2.77777777777777778E-03, + 4 7.93650793650793651E-04, -5.95238095238095238E-04, + 5 8.41750841750841751E-04, -1.91752691752691753E-03, + 6 6.41025641025641026E-03, -2.95506535947712418E-02, + 7 1.79644372368830573E-01, -1.39243221690590112E+00, + 8 1.34028640441683920E+01, -1.56848284626002017E+02, + 9 2.19310333333333333E+03, -3.61087712537249894E+04, + A 6.91472268851313067E+05, -1.52382215394074162E+07, + B 3.82900751391414141E+08, -1.08822660357843911E+10, + C 3.47320283765002252E+11, -1.23696021422692745E+13, + D 4.88788064793079335E+14, -2.13203339609193739E+16/ +C +C LN(2*PI) + DATA CON / 1.83787706640934548E+00/ +C +C***FIRST EXECUTABLE STATEMENT GAMLN + IERR=0 + IF (Z.LE.0.0E0) GO TO 70 + IF (Z.GT.101.0E0) GO TO 10 + NZ = INT(Z) + FZ = Z - FLOAT(NZ) + IF (FZ.GT.0.0E0) GO TO 10 + IF (NZ.GT.100) GO TO 10 + GAMLN = GLN(NZ) + RETURN + 10 CONTINUE + WDTOL = R1MACH(4) + WDTOL = AMAX1(WDTOL,0.5E-18) + I1M = I1MACH(11) + RLN = R1MACH(5)*FLOAT(I1M) + FLN = AMIN1(RLN,20.0E0) + FLN = AMAX1(FLN,3.0E0) + FLN = FLN - 3.0E0 + ZM = 1.8000E0 + 0.3875E0*FLN + MZ = INT(ZM) + 1 + ZMIN = FLOAT(MZ) + ZDMY = Z + ZINC = 0.0E0 + IF (Z.GE.ZMIN) GO TO 20 + ZINC = ZMIN - FLOAT(NZ) + ZDMY = Z + ZINC + 20 CONTINUE + ZP = 1.0E0/ZDMY + T1 = CF(1)*ZP + S = T1 + IF (ZP.LT.WDTOL) GO TO 40 + ZSQ = ZP*ZP + TST = T1*WDTOL + DO 30 K=2,22 + ZP = ZP*ZSQ + TRM = CF(K)*ZP + IF (ABS(TRM).LT.TST) GO TO 40 + S = S + TRM + 30 CONTINUE + 40 CONTINUE + IF (ZINC.NE.0.0E0) GO TO 50 + TLG = ALOG(Z) + GAMLN = Z*(TLG-1.0E0) + 0.5E0*(CON-TLG) + S + RETURN + 50 CONTINUE + ZP = 1.0E0 + NZ = INT(ZINC) + DO 60 I=1,NZ + ZP = ZP*(Z+FLOAT(I-1)) + 60 CONTINUE + TLG = ALOG(ZDMY) + GAMLN = ZDMY*(TLG-1.0E0) - ALOG(ZP) + 0.5E0*(CON-TLG) + S + RETURN +C +C + 70 CONTINUE + IERR=1 + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas-xtra/Makefile.in --- a/libcruft/blas-xtra/Makefile.in Wed May 14 18:09:56 2008 +0200 +++ b/libcruft/blas-xtra/Makefile.in Sun Apr 27 22:34:17 2008 +0200 @@ -26,7 +26,9 @@ EXTERNAL_DISTFILES = $(DISTFILES) -FSRC = xddot.f xdnrm2.f xdznrm2.f xerbla.f xzdotc.f xzdotu.f +FSRC = xddot.f xdnrm2.f xdznrm2.f xzdotc.f xzdotu.f \ + xsdot.f xsnrm2.f xscnrm2.f xcdotc.f xcdotu.f \ + xerbla.f include $(TOPDIR)/Makeconf diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas-xtra/xcdotc.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/blas-xtra/xcdotc.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,7 @@ + subroutine xcdotc (n, zx, incx, zy, incy, retval) + complex cdotc, zx(*), zy(*), retval + integer n, incx, incy + external cdotc + retval = cdotc (n, zx, incx, zy, incy) + return + end diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas-xtra/xcdotu.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/blas-xtra/xcdotu.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,7 @@ + subroutine xcdotu (n, zx, incx, zy, incy, retval) + complex cdotu, zx(*), zy(*), retval + integer n, incx, incy + external cdotu + retval = cdotu (n, zx, incx, zy, incy) + return + end diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas-xtra/xscnrm2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/blas-xtra/xscnrm2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,7 @@ + subroutine xscnrm2 (n, x, incx, retval) + real scnrm2, retval + complex x(*) + integer n, incx + retval = scnrm2 (n, x, incx) + return + end diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas-xtra/xsdot.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/blas-xtra/xsdot.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,6 @@ + subroutine xsdot (n, dx, incx, dy, incy, retval) + real ddot, dx(*), dy(*), retval + integer n, incx, incy + retval = sdot (n, dx, incx, dy, incy) + return + end diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas-xtra/xsnrm2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/blas-xtra/xsnrm2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,6 @@ + subroutine xsnrm2 (n, x, incx, retval) + real snrm2, x(*), retval + integer n, incx + retval = snrm2 (n, x, incx) + return + end diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas/Makefile.in --- a/libcruft/blas/Makefile.in Wed May 14 18:09:56 2008 +0200 +++ b/libcruft/blas/Makefile.in Sun Apr 27 22:34:17 2008 +0200 @@ -33,7 +33,13 @@ sgemv.f sscal.f ssyrk.f strsm.f zaxpy.f zcopy.f zdotc.f zdotu.f \ zdrot.f zdscal.f zgemm.f zgemv.f zgerc.f zgeru.f zhemv.f zher.f \ zher2.f zher2k.f zherk.f zscal.f zswap.f ztbsv.f ztrmm.f ztrmv.f \ - ztrsm.f ztrsv.f + ztrsm.f ztrsv.f sasum.f saxpy.f scabs1.f scopy.f \ + sger.f smach.f snrm2.f srot.f sswap.f ssymv.f ssyr.f \ + ssyr2.f ssyr2k.f stbsv.f strmm.f strmv.f strsv.f \ + scasum.f scnrm2.f caxpy.f ccopy.f cdotc.f cdotu.f \ + csrot.f csscal.f cgemm.f cgemv.f cgerc.f cgeru.f chemv.f cher.f \ + cher2.f cher2k.f cherk.f cscal.f cswap.f ctbsv.f ctrmm.f ctrmv.f \ + ctrsm.f ctrsv.f include $(TOPDIR)/Makeconf diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas/caxpy.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/blas/caxpy.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,52 @@ + SUBROUTINE CAXPY(N,CA,CX,INCX,CY,INCY) +* .. Scalar Arguments .. + COMPLEX CA + INTEGER INCX,INCY,N +* .. +* .. Array Arguments .. + COMPLEX CX(*),CY(*) +* .. +* +* Purpose +* ======= +* +* CAXPY constant times a vector plus a vector. +* +* Further Details +* =============== +* +* jack dongarra, linpack, 3/11/78. +* modified 12/3/93, array(1) declarations changed to array(*) +* +* .. Local Scalars .. + INTEGER I,IX,IY +* .. +* .. External Functions .. + REAL SCABS1 + EXTERNAL SCABS1 +* .. + IF (N.LE.0) RETURN + IF (SCABS1(CA).EQ.0.0E+0) RETURN + IF (INCX.EQ.1 .AND. INCY.EQ.1) GO TO 20 +* +* code for unequal increments or equal increments +* not equal to 1 +* + IX = 1 + IY = 1 + IF (INCX.LT.0) IX = (-N+1)*INCX + 1 + IF (INCY.LT.0) IY = (-N+1)*INCY + 1 + DO 10 I = 1,N + CY(IY) = CY(IY) + CA*CX(IX) + IX = IX + INCX + IY = IY + INCY + 10 CONTINUE + RETURN +* +* code for both increments equal to 1 +* + 20 DO 30 I = 1,N + CY(I) = CY(I) + CA*CX(I) + 30 CONTINUE + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas/ccopy.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/blas/ccopy.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,46 @@ + SUBROUTINE CCOPY(N,CX,INCX,CY,INCY) +* .. Scalar Arguments .. + INTEGER INCX,INCY,N +* .. +* .. Array Arguments .. + COMPLEX CX(*),CY(*) +* .. +* +* Purpose +* ======= +* +* CCOPY copies a vector x to a vector y. +* +* Further Details +* =============== +* +* jack dongarra, linpack, 3/11/78. +* modified 12/3/93, array(1) declarations changed to array(*) +* +* .. Local Scalars .. + INTEGER I,IX,IY +* .. + IF (N.LE.0) RETURN + IF (INCX.EQ.1 .AND. INCY.EQ.1) GO TO 20 +* +* code for unequal increments or equal increments +* not equal to 1 +* + IX = 1 + IY = 1 + IF (INCX.LT.0) IX = (-N+1)*INCX + 1 + IF (INCY.LT.0) IY = (-N+1)*INCY + 1 + DO 10 I = 1,N + CY(IY) = CX(IX) + IX = IX + INCX + IY = IY + INCY + 10 CONTINUE + RETURN +* +* code for both increments equal to 1 +* + 20 DO 30 I = 1,N + CY(I) = CX(I) + 30 CONTINUE + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas/cdotc.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/blas/cdotc.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,55 @@ + COMPLEX FUNCTION CDOTC(N,CX,INCX,CY,INCY) +* .. Scalar Arguments .. + INTEGER INCX,INCY,N +* .. +* .. Array Arguments .. + COMPLEX CX(*),CY(*) +* .. +* +* Purpose +* ======= +* +* forms the dot product of two vectors, conjugating the first +* vector. +* +* Further Details +* =============== +* +* jack dongarra, linpack, 3/11/78. +* modified 12/3/93, array(1) declarations changed to array(*) +* +* .. Local Scalars .. + COMPLEX CTEMP + INTEGER I,IX,IY +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG +* .. + CTEMP = (0.0,0.0) + CDOTC = (0.0,0.0) + IF (N.LE.0) RETURN + IF (INCX.EQ.1 .AND. INCY.EQ.1) GO TO 20 +* +* code for unequal increments or equal increments +* not equal to 1 +* + IX = 1 + IY = 1 + IF (INCX.LT.0) IX = (-N+1)*INCX + 1 + IF (INCY.LT.0) IY = (-N+1)*INCY + 1 + DO 10 I = 1,N + CTEMP = CTEMP + CONJG(CX(IX))*CY(IY) + IX = IX + INCX + IY = IY + INCY + 10 CONTINUE + CDOTC = CTEMP + RETURN +* +* code for both increments equal to 1 +* + 20 DO 30 I = 1,N + CTEMP = CTEMP + CONJG(CX(I))*CY(I) + 30 CONTINUE + CDOTC = CTEMP + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas/cdotu.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/blas/cdotu.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,51 @@ + COMPLEX FUNCTION CDOTU(N,CX,INCX,CY,INCY) +* .. Scalar Arguments .. + INTEGER INCX,INCY,N +* .. +* .. Array Arguments .. + COMPLEX CX(*),CY(*) +* .. +* +* Purpose +* ======= +* +* CDOTU forms the dot product of two vectors. +* +* Further Details +* =============== +* +* jack dongarra, linpack, 3/11/78. +* modified 12/3/93, array(1) declarations changed to array(*) +* +* .. Local Scalars .. + COMPLEX CTEMP + INTEGER I,IX,IY +* .. + CTEMP = (0.0,0.0) + CDOTU = (0.0,0.0) + IF (N.LE.0) RETURN + IF (INCX.EQ.1 .AND. INCY.EQ.1) GO TO 20 +* +* code for unequal increments or equal increments +* not equal to 1 +* + IX = 1 + IY = 1 + IF (INCX.LT.0) IX = (-N+1)*INCX + 1 + IF (INCY.LT.0) IY = (-N+1)*INCY + 1 + DO 10 I = 1,N + CTEMP = CTEMP + CX(IX)*CY(IY) + IX = IX + INCX + IY = IY + INCY + 10 CONTINUE + CDOTU = CTEMP + RETURN +* +* code for both increments equal to 1 +* + 20 DO 30 I = 1,N + CTEMP = CTEMP + CX(I)*CY(I) + 30 CONTINUE + CDOTU = CTEMP + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas/cgemm.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/blas/cgemm.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,414 @@ + SUBROUTINE CGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC) +* .. Scalar Arguments .. + COMPLEX ALPHA,BETA + INTEGER K,LDA,LDB,LDC,M,N + CHARACTER TRANSA,TRANSB +* .. +* .. Array Arguments .. + COMPLEX A(LDA,*),B(LDB,*),C(LDC,*) +* .. +* +* Purpose +* ======= +* +* CGEMM performs one of the matrix-matrix operations +* +* C := alpha*op( A )*op( B ) + beta*C, +* +* where op( X ) is one of +* +* op( X ) = X or op( X ) = X' or op( X ) = conjg( X' ), +* +* alpha and beta are scalars, and A, B and C are matrices, with op( A ) +* an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. +* +* Arguments +* ========== +* +* TRANSA - CHARACTER*1. +* On entry, TRANSA specifies the form of op( A ) to be used in +* the matrix multiplication as follows: +* +* TRANSA = 'N' or 'n', op( A ) = A. +* +* TRANSA = 'T' or 't', op( A ) = A'. +* +* TRANSA = 'C' or 'c', op( A ) = conjg( A' ). +* +* Unchanged on exit. +* +* TRANSB - CHARACTER*1. +* On entry, TRANSB specifies the form of op( B ) to be used in +* the matrix multiplication as follows: +* +* TRANSB = 'N' or 'n', op( B ) = B. +* +* TRANSB = 'T' or 't', op( B ) = B'. +* +* TRANSB = 'C' or 'c', op( B ) = conjg( B' ). +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix +* op( A ) and of the matrix C. M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix +* op( B ) and the number of columns of the matrix C. N must be +* at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry, K specifies the number of columns of the matrix +* op( A ) and the number of rows of the matrix op( B ). K must +* be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, ka ), where ka is +* k when TRANSA = 'N' or 'n', and is m otherwise. +* Before entry with TRANSA = 'N' or 'n', the leading m by k +* part of the array A must contain the matrix A, otherwise +* the leading k by m part of the array A must contain the +* matrix A. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When TRANSA = 'N' or 'n' then +* LDA must be at least max( 1, m ), otherwise LDA must be at +* least max( 1, k ). +* Unchanged on exit. +* +* B - COMPLEX array of DIMENSION ( LDB, kb ), where kb is +* n when TRANSB = 'N' or 'n', and is k otherwise. +* Before entry with TRANSB = 'N' or 'n', the leading k by n +* part of the array B must contain the matrix B, otherwise +* the leading n by k part of the array B must contain the +* matrix B. +* Unchanged on exit. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. When TRANSB = 'N' or 'n' then +* LDB must be at least max( 1, k ), otherwise LDB must be at +* least max( 1, n ). +* Unchanged on exit. +* +* BETA - COMPLEX . +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then C need not be set on input. +* Unchanged on exit. +* +* C - COMPLEX array of DIMENSION ( LDC, n ). +* Before entry, the leading m by n part of the array C must +* contain the matrix C, except when beta is zero, in which +* case C need not be set on entry. +* On exit, the array C is overwritten by the m by n matrix +* ( alpha*op( A )*op( B ) + beta*C ). +* +* LDC - INTEGER. +* On entry, LDC specifies the first dimension of C as declared +* in the calling (sub) program. LDC must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG,MAX +* .. +* .. Local Scalars .. + COMPLEX TEMP + INTEGER I,INFO,J,L,NCOLA,NROWA,NROWB + LOGICAL CONJA,CONJB,NOTA,NOTB +* .. +* .. Parameters .. + COMPLEX ONE + PARAMETER (ONE= (1.0E+0,0.0E+0)) + COMPLEX ZERO + PARAMETER (ZERO= (0.0E+0,0.0E+0)) +* .. +* +* Set NOTA and NOTB as true if A and B respectively are not +* conjugated or transposed, set CONJA and CONJB as true if A and +* B respectively are to be transposed but not conjugated and set +* NROWA, NCOLA and NROWB as the number of rows and columns of A +* and the number of rows of B respectively. +* + NOTA = LSAME(TRANSA,'N') + NOTB = LSAME(TRANSB,'N') + CONJA = LSAME(TRANSA,'C') + CONJB = LSAME(TRANSB,'C') + IF (NOTA) THEN + NROWA = M + NCOLA = K + ELSE + NROWA = K + NCOLA = M + END IF + IF (NOTB) THEN + NROWB = K + ELSE + NROWB = N + END IF +* +* Test the input parameters. +* + INFO = 0 + IF ((.NOT.NOTA) .AND. (.NOT.CONJA) .AND. + + (.NOT.LSAME(TRANSA,'T'))) THEN + INFO = 1 + ELSE IF ((.NOT.NOTB) .AND. (.NOT.CONJB) .AND. + + (.NOT.LSAME(TRANSB,'T'))) THEN + INFO = 2 + ELSE IF (M.LT.0) THEN + INFO = 3 + ELSE IF (N.LT.0) THEN + INFO = 4 + ELSE IF (K.LT.0) THEN + INFO = 5 + ELSE IF (LDA.LT.MAX(1,NROWA)) THEN + INFO = 8 + ELSE IF (LDB.LT.MAX(1,NROWB)) THEN + INFO = 10 + ELSE IF (LDC.LT.MAX(1,M)) THEN + INFO = 13 + END IF + IF (INFO.NE.0) THEN + CALL XERBLA('CGEMM ',INFO) + RETURN + END IF +* +* Quick return if possible. +* + IF ((M.EQ.0) .OR. (N.EQ.0) .OR. + + (((ALPHA.EQ.ZERO).OR. (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN +* +* And when alpha.eq.zero. +* + IF (ALPHA.EQ.ZERO) THEN + IF (BETA.EQ.ZERO) THEN + DO 20 J = 1,N + DO 10 I = 1,M + C(I,J) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1,N + DO 30 I = 1,M + C(I,J) = BETA*C(I,J) + 30 CONTINUE + 40 CONTINUE + END IF + RETURN + END IF +* +* Start the operations. +* + IF (NOTB) THEN + IF (NOTA) THEN +* +* Form C := alpha*A*B + beta*C. +* + DO 90 J = 1,N + IF (BETA.EQ.ZERO) THEN + DO 50 I = 1,M + C(I,J) = ZERO + 50 CONTINUE + ELSE IF (BETA.NE.ONE) THEN + DO 60 I = 1,M + C(I,J) = BETA*C(I,J) + 60 CONTINUE + END IF + DO 80 L = 1,K + IF (B(L,J).NE.ZERO) THEN + TEMP = ALPHA*B(L,J) + DO 70 I = 1,M + C(I,J) = C(I,J) + TEMP*A(I,L) + 70 CONTINUE + END IF + 80 CONTINUE + 90 CONTINUE + ELSE IF (CONJA) THEN +* +* Form C := alpha*conjg( A' )*B + beta*C. +* + DO 120 J = 1,N + DO 110 I = 1,M + TEMP = ZERO + DO 100 L = 1,K + TEMP = TEMP + CONJG(A(L,I))*B(L,J) + 100 CONTINUE + IF (BETA.EQ.ZERO) THEN + C(I,J) = ALPHA*TEMP + ELSE + C(I,J) = ALPHA*TEMP + BETA*C(I,J) + END IF + 110 CONTINUE + 120 CONTINUE + ELSE +* +* Form C := alpha*A'*B + beta*C +* + DO 150 J = 1,N + DO 140 I = 1,M + TEMP = ZERO + DO 130 L = 1,K + TEMP = TEMP + A(L,I)*B(L,J) + 130 CONTINUE + IF (BETA.EQ.ZERO) THEN + C(I,J) = ALPHA*TEMP + ELSE + C(I,J) = ALPHA*TEMP + BETA*C(I,J) + END IF + 140 CONTINUE + 150 CONTINUE + END IF + ELSE IF (NOTA) THEN + IF (CONJB) THEN +* +* Form C := alpha*A*conjg( B' ) + beta*C. +* + DO 200 J = 1,N + IF (BETA.EQ.ZERO) THEN + DO 160 I = 1,M + C(I,J) = ZERO + 160 CONTINUE + ELSE IF (BETA.NE.ONE) THEN + DO 170 I = 1,M + C(I,J) = BETA*C(I,J) + 170 CONTINUE + END IF + DO 190 L = 1,K + IF (B(J,L).NE.ZERO) THEN + TEMP = ALPHA*CONJG(B(J,L)) + DO 180 I = 1,M + C(I,J) = C(I,J) + TEMP*A(I,L) + 180 CONTINUE + END IF + 190 CONTINUE + 200 CONTINUE + ELSE +* +* Form C := alpha*A*B' + beta*C +* + DO 250 J = 1,N + IF (BETA.EQ.ZERO) THEN + DO 210 I = 1,M + C(I,J) = ZERO + 210 CONTINUE + ELSE IF (BETA.NE.ONE) THEN + DO 220 I = 1,M + C(I,J) = BETA*C(I,J) + 220 CONTINUE + END IF + DO 240 L = 1,K + IF (B(J,L).NE.ZERO) THEN + TEMP = ALPHA*B(J,L) + DO 230 I = 1,M + C(I,J) = C(I,J) + TEMP*A(I,L) + 230 CONTINUE + END IF + 240 CONTINUE + 250 CONTINUE + END IF + ELSE IF (CONJA) THEN + IF (CONJB) THEN +* +* Form C := alpha*conjg( A' )*conjg( B' ) + beta*C. +* + DO 280 J = 1,N + DO 270 I = 1,M + TEMP = ZERO + DO 260 L = 1,K + TEMP = TEMP + CONJG(A(L,I))*CONJG(B(J,L)) + 260 CONTINUE + IF (BETA.EQ.ZERO) THEN + C(I,J) = ALPHA*TEMP + ELSE + C(I,J) = ALPHA*TEMP + BETA*C(I,J) + END IF + 270 CONTINUE + 280 CONTINUE + ELSE +* +* Form C := alpha*conjg( A' )*B' + beta*C +* + DO 310 J = 1,N + DO 300 I = 1,M + TEMP = ZERO + DO 290 L = 1,K + TEMP = TEMP + CONJG(A(L,I))*B(J,L) + 290 CONTINUE + IF (BETA.EQ.ZERO) THEN + C(I,J) = ALPHA*TEMP + ELSE + C(I,J) = ALPHA*TEMP + BETA*C(I,J) + END IF + 300 CONTINUE + 310 CONTINUE + END IF + ELSE + IF (CONJB) THEN +* +* Form C := alpha*A'*conjg( B' ) + beta*C +* + DO 340 J = 1,N + DO 330 I = 1,M + TEMP = ZERO + DO 320 L = 1,K + TEMP = TEMP + A(L,I)*CONJG(B(J,L)) + 320 CONTINUE + IF (BETA.EQ.ZERO) THEN + C(I,J) = ALPHA*TEMP + ELSE + C(I,J) = ALPHA*TEMP + BETA*C(I,J) + END IF + 330 CONTINUE + 340 CONTINUE + ELSE +* +* Form C := alpha*A'*B' + beta*C +* + DO 370 J = 1,N + DO 360 I = 1,M + TEMP = ZERO + DO 350 L = 1,K + TEMP = TEMP + A(L,I)*B(J,L) + 350 CONTINUE + IF (BETA.EQ.ZERO) THEN + C(I,J) = ALPHA*TEMP + ELSE + C(I,J) = ALPHA*TEMP + BETA*C(I,J) + END IF + 360 CONTINUE + 370 CONTINUE + END IF + END IF +* + RETURN +* +* End of CGEMM . +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas/cgemv.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/blas/cgemv.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,281 @@ + SUBROUTINE CGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY) +* .. Scalar Arguments .. + COMPLEX ALPHA,BETA + INTEGER INCX,INCY,LDA,M,N + CHARACTER TRANS +* .. +* .. Array Arguments .. + COMPLEX A(LDA,*),X(*),Y(*) +* .. +* +* Purpose +* ======= +* +* CGEMV performs one of the matrix-vector operations +* +* y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, or +* +* y := alpha*conjg( A' )*x + beta*y, +* +* where alpha and beta are scalars, x and y are vectors and A is an +* m by n matrix. +* +* Arguments +* ========== +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' y := alpha*A*x + beta*y. +* +* TRANS = 'T' or 't' y := alpha*A'*x + beta*y. +* +* TRANS = 'C' or 'c' y := alpha*conjg( A' )*x + beta*y. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix A. +* M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, n ). +* Before entry, the leading m by n part of the array A must +* contain the matrix of coefficients. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, m ). +* Unchanged on exit. +* +* X - COMPLEX array of DIMENSION at least +* ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' +* and at least +* ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. +* Before entry, the incremented array X must contain the +* vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* BETA - COMPLEX . +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then Y need not be set on input. +* Unchanged on exit. +* +* Y - COMPLEX array of DIMENSION at least +* ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' +* and at least +* ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. +* Before entry with BETA non-zero, the incremented array Y +* must contain the vector y. On exit, Y is overwritten by the +* updated vector y. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX ONE + PARAMETER (ONE= (1.0E+0,0.0E+0)) + COMPLEX ZERO + PARAMETER (ZERO= (0.0E+0,0.0E+0)) +* .. +* .. Local Scalars .. + COMPLEX TEMP + INTEGER I,INFO,IX,IY,J,JX,JY,KX,KY,LENX,LENY + LOGICAL NOCONJ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG,MAX +* .. +* +* Test the input parameters. +* + INFO = 0 + IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND. + + .NOT.LSAME(TRANS,'C')) THEN + INFO = 1 + ELSE IF (M.LT.0) THEN + INFO = 2 + ELSE IF (N.LT.0) THEN + INFO = 3 + ELSE IF (LDA.LT.MAX(1,M)) THEN + INFO = 6 + ELSE IF (INCX.EQ.0) THEN + INFO = 8 + ELSE IF (INCY.EQ.0) THEN + INFO = 11 + END IF + IF (INFO.NE.0) THEN + CALL XERBLA('CGEMV ',INFO) + RETURN + END IF +* +* Quick return if possible. +* + IF ((M.EQ.0) .OR. (N.EQ.0) .OR. + + ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN +* + NOCONJ = LSAME(TRANS,'T') +* +* Set LENX and LENY, the lengths of the vectors x and y, and set +* up the start points in X and Y. +* + IF (LSAME(TRANS,'N')) THEN + LENX = N + LENY = M + ELSE + LENX = M + LENY = N + END IF + IF (INCX.GT.0) THEN + KX = 1 + ELSE + KX = 1 - (LENX-1)*INCX + END IF + IF (INCY.GT.0) THEN + KY = 1 + ELSE + KY = 1 - (LENY-1)*INCY + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* +* First form y := beta*y. +* + IF (BETA.NE.ONE) THEN + IF (INCY.EQ.1) THEN + IF (BETA.EQ.ZERO) THEN + DO 10 I = 1,LENY + Y(I) = ZERO + 10 CONTINUE + ELSE + DO 20 I = 1,LENY + Y(I) = BETA*Y(I) + 20 CONTINUE + END IF + ELSE + IY = KY + IF (BETA.EQ.ZERO) THEN + DO 30 I = 1,LENY + Y(IY) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40 I = 1,LENY + Y(IY) = BETA*Y(IY) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF (ALPHA.EQ.ZERO) RETURN + IF (LSAME(TRANS,'N')) THEN +* +* Form y := alpha*A*x + y. +* + JX = KX + IF (INCY.EQ.1) THEN + DO 60 J = 1,N + IF (X(JX).NE.ZERO) THEN + TEMP = ALPHA*X(JX) + DO 50 I = 1,M + Y(I) = Y(I) + TEMP*A(I,J) + 50 CONTINUE + END IF + JX = JX + INCX + 60 CONTINUE + ELSE + DO 80 J = 1,N + IF (X(JX).NE.ZERO) THEN + TEMP = ALPHA*X(JX) + IY = KY + DO 70 I = 1,M + Y(IY) = Y(IY) + TEMP*A(I,J) + IY = IY + INCY + 70 CONTINUE + END IF + JX = JX + INCX + 80 CONTINUE + END IF + ELSE +* +* Form y := alpha*A'*x + y or y := alpha*conjg( A' )*x + y. +* + JY = KY + IF (INCX.EQ.1) THEN + DO 110 J = 1,N + TEMP = ZERO + IF (NOCONJ) THEN + DO 90 I = 1,M + TEMP = TEMP + A(I,J)*X(I) + 90 CONTINUE + ELSE + DO 100 I = 1,M + TEMP = TEMP + CONJG(A(I,J))*X(I) + 100 CONTINUE + END IF + Y(JY) = Y(JY) + ALPHA*TEMP + JY = JY + INCY + 110 CONTINUE + ELSE + DO 140 J = 1,N + TEMP = ZERO + IX = KX + IF (NOCONJ) THEN + DO 120 I = 1,M + TEMP = TEMP + A(I,J)*X(IX) + IX = IX + INCX + 120 CONTINUE + ELSE + DO 130 I = 1,M + TEMP = TEMP + CONJG(A(I,J))*X(IX) + IX = IX + INCX + 130 CONTINUE + END IF + Y(JY) = Y(JY) + ALPHA*TEMP + JY = JY + INCY + 140 CONTINUE + END IF + END IF +* + RETURN +* +* End of CGEMV . +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas/cgerc.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/blas/cgerc.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,159 @@ + SUBROUTINE CGERC(M,N,ALPHA,X,INCX,Y,INCY,A,LDA) +* .. Scalar Arguments .. + COMPLEX ALPHA + INTEGER INCX,INCY,LDA,M,N +* .. +* .. Array Arguments .. + COMPLEX A(LDA,*),X(*),Y(*) +* .. +* +* Purpose +* ======= +* +* CGERC performs the rank 1 operation +* +* A := alpha*x*conjg( y' ) + A, +* +* where alpha is a scalar, x is an m element vector, y is an n element +* vector and A is an m by n matrix. +* +* Arguments +* ========== +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix A. +* M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - COMPLEX array of dimension at least +* ( 1 + ( m - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the m +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* Y - COMPLEX array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. +* Unchanged on exit. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, n ). +* Before entry, the leading m by n part of the array A must +* contain the matrix of coefficients. On exit, A is +* overwritten by the updated matrix. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER (ZERO= (0.0E+0,0.0E+0)) +* .. +* .. Local Scalars .. + COMPLEX TEMP + INTEGER I,INFO,IX,J,JY,KX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG,MAX +* .. +* +* Test the input parameters. +* + INFO = 0 + IF (M.LT.0) THEN + INFO = 1 + ELSE IF (N.LT.0) THEN + INFO = 2 + ELSE IF (INCX.EQ.0) THEN + INFO = 5 + ELSE IF (INCY.EQ.0) THEN + INFO = 7 + ELSE IF (LDA.LT.MAX(1,M)) THEN + INFO = 9 + END IF + IF (INFO.NE.0) THEN + CALL XERBLA('CGERC ',INFO) + RETURN + END IF +* +* Quick return if possible. +* + IF ((M.EQ.0) .OR. (N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* + IF (INCY.GT.0) THEN + JY = 1 + ELSE + JY = 1 - (N-1)*INCY + END IF + IF (INCX.EQ.1) THEN + DO 20 J = 1,N + IF (Y(JY).NE.ZERO) THEN + TEMP = ALPHA*CONJG(Y(JY)) + DO 10 I = 1,M + A(I,J) = A(I,J) + X(I)*TEMP + 10 CONTINUE + END IF + JY = JY + INCY + 20 CONTINUE + ELSE + IF (INCX.GT.0) THEN + KX = 1 + ELSE + KX = 1 - (M-1)*INCX + END IF + DO 40 J = 1,N + IF (Y(JY).NE.ZERO) THEN + TEMP = ALPHA*CONJG(Y(JY)) + IX = KX + DO 30 I = 1,M + A(I,J) = A(I,J) + X(IX)*TEMP + IX = IX + INCX + 30 CONTINUE + END IF + JY = JY + INCY + 40 CONTINUE + END IF +* + RETURN +* +* End of CGERC . +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas/cgeru.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/blas/cgeru.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,159 @@ + SUBROUTINE CGERU(M,N,ALPHA,X,INCX,Y,INCY,A,LDA) +* .. Scalar Arguments .. + COMPLEX ALPHA + INTEGER INCX,INCY,LDA,M,N +* .. +* .. Array Arguments .. + COMPLEX A(LDA,*),X(*),Y(*) +* .. +* +* Purpose +* ======= +* +* CGERU performs the rank 1 operation +* +* A := alpha*x*y' + A, +* +* where alpha is a scalar, x is an m element vector, y is an n element +* vector and A is an m by n matrix. +* +* Arguments +* ========== +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix A. +* M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - COMPLEX array of dimension at least +* ( 1 + ( m - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the m +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* Y - COMPLEX array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. +* Unchanged on exit. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, n ). +* Before entry, the leading m by n part of the array A must +* contain the matrix of coefficients. On exit, A is +* overwritten by the updated matrix. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER (ZERO= (0.0E+0,0.0E+0)) +* .. +* .. Local Scalars .. + COMPLEX TEMP + INTEGER I,INFO,IX,J,JY,KX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* +* Test the input parameters. +* + INFO = 0 + IF (M.LT.0) THEN + INFO = 1 + ELSE IF (N.LT.0) THEN + INFO = 2 + ELSE IF (INCX.EQ.0) THEN + INFO = 5 + ELSE IF (INCY.EQ.0) THEN + INFO = 7 + ELSE IF (LDA.LT.MAX(1,M)) THEN + INFO = 9 + END IF + IF (INFO.NE.0) THEN + CALL XERBLA('CGERU ',INFO) + RETURN + END IF +* +* Quick return if possible. +* + IF ((M.EQ.0) .OR. (N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* + IF (INCY.GT.0) THEN + JY = 1 + ELSE + JY = 1 - (N-1)*INCY + END IF + IF (INCX.EQ.1) THEN + DO 20 J = 1,N + IF (Y(JY).NE.ZERO) THEN + TEMP = ALPHA*Y(JY) + DO 10 I = 1,M + A(I,J) = A(I,J) + X(I)*TEMP + 10 CONTINUE + END IF + JY = JY + INCY + 20 CONTINUE + ELSE + IF (INCX.GT.0) THEN + KX = 1 + ELSE + KX = 1 - (M-1)*INCX + END IF + DO 40 J = 1,N + IF (Y(JY).NE.ZERO) THEN + TEMP = ALPHA*Y(JY) + IX = KX + DO 30 I = 1,M + A(I,J) = A(I,J) + X(IX)*TEMP + IX = IX + INCX + 30 CONTINUE + END IF + JY = JY + INCY + 40 CONTINUE + END IF +* + RETURN +* +* End of CGERU . +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas/chemv.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/blas/chemv.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,266 @@ + SUBROUTINE CHEMV(UPLO,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY) +* .. Scalar Arguments .. + COMPLEX ALPHA,BETA + INTEGER INCX,INCY,LDA,N + CHARACTER UPLO +* .. +* .. Array Arguments .. + COMPLEX A(LDA,*),X(*),Y(*) +* .. +* +* Purpose +* ======= +* +* CHEMV performs the matrix-vector operation +* +* y := alpha*A*x + beta*y, +* +* where alpha and beta are scalars, x and y are n element vectors and +* A is an n by n hermitian matrix. +* +* Arguments +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array A is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of A +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of A +* is to be referenced. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular part of the hermitian matrix and the strictly +* lower triangular part of A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular part of the hermitian matrix and the strictly +* upper triangular part of A is not referenced. +* Note that the imaginary parts of the diagonal elements need +* not be set and are assumed to be zero. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* X - COMPLEX array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* BETA - COMPLEX . +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then Y need not be set on input. +* Unchanged on exit. +* +* Y - COMPLEX array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. On exit, Y is overwritten by the updated +* vector y. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX ONE + PARAMETER (ONE= (1.0E+0,0.0E+0)) + COMPLEX ZERO + PARAMETER (ZERO= (0.0E+0,0.0E+0)) +* .. +* .. Local Scalars .. + COMPLEX TEMP1,TEMP2 + INTEGER I,INFO,IX,IY,J,JX,JY,KX,KY +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG,MAX,REAL +* .. +* +* Test the input parameters. +* + INFO = 0 + IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN + INFO = 1 + ELSE IF (N.LT.0) THEN + INFO = 2 + ELSE IF (LDA.LT.MAX(1,N)) THEN + INFO = 5 + ELSE IF (INCX.EQ.0) THEN + INFO = 7 + ELSE IF (INCY.EQ.0) THEN + INFO = 10 + END IF + IF (INFO.NE.0) THEN + CALL XERBLA('CHEMV ',INFO) + RETURN + END IF +* +* Quick return if possible. +* + IF ((N.EQ.0) .OR. ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN +* +* Set up the start points in X and Y. +* + IF (INCX.GT.0) THEN + KX = 1 + ELSE + KX = 1 - (N-1)*INCX + END IF + IF (INCY.GT.0) THEN + KY = 1 + ELSE + KY = 1 - (N-1)*INCY + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through the triangular part +* of A. +* +* First form y := beta*y. +* + IF (BETA.NE.ONE) THEN + IF (INCY.EQ.1) THEN + IF (BETA.EQ.ZERO) THEN + DO 10 I = 1,N + Y(I) = ZERO + 10 CONTINUE + ELSE + DO 20 I = 1,N + Y(I) = BETA*Y(I) + 20 CONTINUE + END IF + ELSE + IY = KY + IF (BETA.EQ.ZERO) THEN + DO 30 I = 1,N + Y(IY) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40 I = 1,N + Y(IY) = BETA*Y(IY) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF (ALPHA.EQ.ZERO) RETURN + IF (LSAME(UPLO,'U')) THEN +* +* Form y when A is stored in upper triangle. +* + IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN + DO 60 J = 1,N + TEMP1 = ALPHA*X(J) + TEMP2 = ZERO + DO 50 I = 1,J - 1 + Y(I) = Y(I) + TEMP1*A(I,J) + TEMP2 = TEMP2 + CONJG(A(I,J))*X(I) + 50 CONTINUE + Y(J) = Y(J) + TEMP1*REAL(A(J,J)) + ALPHA*TEMP2 + 60 CONTINUE + ELSE + JX = KX + JY = KY + DO 80 J = 1,N + TEMP1 = ALPHA*X(JX) + TEMP2 = ZERO + IX = KX + IY = KY + DO 70 I = 1,J - 1 + Y(IY) = Y(IY) + TEMP1*A(I,J) + TEMP2 = TEMP2 + CONJG(A(I,J))*X(IX) + IX = IX + INCX + IY = IY + INCY + 70 CONTINUE + Y(JY) = Y(JY) + TEMP1*REAL(A(J,J)) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + 80 CONTINUE + END IF + ELSE +* +* Form y when A is stored in lower triangle. +* + IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN + DO 100 J = 1,N + TEMP1 = ALPHA*X(J) + TEMP2 = ZERO + Y(J) = Y(J) + TEMP1*REAL(A(J,J)) + DO 90 I = J + 1,N + Y(I) = Y(I) + TEMP1*A(I,J) + TEMP2 = TEMP2 + CONJG(A(I,J))*X(I) + 90 CONTINUE + Y(J) = Y(J) + ALPHA*TEMP2 + 100 CONTINUE + ELSE + JX = KX + JY = KY + DO 120 J = 1,N + TEMP1 = ALPHA*X(JX) + TEMP2 = ZERO + Y(JY) = Y(JY) + TEMP1*REAL(A(J,J)) + IX = JX + IY = JY + DO 110 I = J + 1,N + IX = IX + INCX + IY = IY + INCY + Y(IY) = Y(IY) + TEMP1*A(I,J) + TEMP2 = TEMP2 + CONJG(A(I,J))*X(IX) + 110 CONTINUE + Y(JY) = Y(JY) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + 120 CONTINUE + END IF + END IF +* + RETURN +* +* End of CHEMV . +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas/cher.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/blas/cher.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,214 @@ + SUBROUTINE CHER(UPLO,N,ALPHA,X,INCX,A,LDA) +* .. Scalar Arguments .. + REAL ALPHA + INTEGER INCX,LDA,N + CHARACTER UPLO +* .. +* .. Array Arguments .. + COMPLEX A(LDA,*),X(*) +* .. +* +* Purpose +* ======= +* +* CHER performs the hermitian rank 1 operation +* +* A := alpha*x*conjg( x' ) + A, +* +* where alpha is a real scalar, x is an n element vector and A is an +* n by n hermitian matrix. +* +* Arguments +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array A is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of A +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of A +* is to be referenced. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - REAL . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - COMPLEX array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular part of the hermitian matrix and the strictly +* lower triangular part of A is not referenced. On exit, the +* upper triangular part of the array A is overwritten by the +* upper triangular part of the updated matrix. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular part of the hermitian matrix and the strictly +* upper triangular part of A is not referenced. On exit, the +* lower triangular part of the array A is overwritten by the +* lower triangular part of the updated matrix. +* Note that the imaginary parts of the diagonal elements need +* not be set, they are assumed to be zero, and on exit they +* are set to zero. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER (ZERO= (0.0E+0,0.0E+0)) +* .. +* .. Local Scalars .. + COMPLEX TEMP + INTEGER I,INFO,IX,J,JX,KX +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG,MAX,REAL +* .. +* +* Test the input parameters. +* + INFO = 0 + IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN + INFO = 1 + ELSE IF (N.LT.0) THEN + INFO = 2 + ELSE IF (INCX.EQ.0) THEN + INFO = 5 + ELSE IF (LDA.LT.MAX(1,N)) THEN + INFO = 7 + END IF + IF (INFO.NE.0) THEN + CALL XERBLA('CHER ',INFO) + RETURN + END IF +* +* Quick return if possible. +* + IF ((N.EQ.0) .OR. (ALPHA.EQ.REAL(ZERO))) RETURN +* +* Set the start point in X if the increment is not unity. +* + IF (INCX.LE.0) THEN + KX = 1 - (N-1)*INCX + ELSE IF (INCX.NE.1) THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through the triangular part +* of A. +* + IF (LSAME(UPLO,'U')) THEN +* +* Form A when A is stored in upper triangle. +* + IF (INCX.EQ.1) THEN + DO 20 J = 1,N + IF (X(J).NE.ZERO) THEN + TEMP = ALPHA*CONJG(X(J)) + DO 10 I = 1,J - 1 + A(I,J) = A(I,J) + X(I)*TEMP + 10 CONTINUE + A(J,J) = REAL(A(J,J)) + REAL(X(J)*TEMP) + ELSE + A(J,J) = REAL(A(J,J)) + END IF + 20 CONTINUE + ELSE + JX = KX + DO 40 J = 1,N + IF (X(JX).NE.ZERO) THEN + TEMP = ALPHA*CONJG(X(JX)) + IX = KX + DO 30 I = 1,J - 1 + A(I,J) = A(I,J) + X(IX)*TEMP + IX = IX + INCX + 30 CONTINUE + A(J,J) = REAL(A(J,J)) + REAL(X(JX)*TEMP) + ELSE + A(J,J) = REAL(A(J,J)) + END IF + JX = JX + INCX + 40 CONTINUE + END IF + ELSE +* +* Form A when A is stored in lower triangle. +* + IF (INCX.EQ.1) THEN + DO 60 J = 1,N + IF (X(J).NE.ZERO) THEN + TEMP = ALPHA*CONJG(X(J)) + A(J,J) = REAL(A(J,J)) + REAL(TEMP*X(J)) + DO 50 I = J + 1,N + A(I,J) = A(I,J) + X(I)*TEMP + 50 CONTINUE + ELSE + A(J,J) = REAL(A(J,J)) + END IF + 60 CONTINUE + ELSE + JX = KX + DO 80 J = 1,N + IF (X(JX).NE.ZERO) THEN + TEMP = ALPHA*CONJG(X(JX)) + A(J,J) = REAL(A(J,J)) + REAL(TEMP*X(JX)) + IX = JX + DO 70 I = J + 1,N + IX = IX + INCX + A(I,J) = A(I,J) + X(IX)*TEMP + 70 CONTINUE + ELSE + A(J,J) = REAL(A(J,J)) + END IF + JX = JX + INCX + 80 CONTINUE + END IF + END IF +* + RETURN +* +* End of CHER . +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas/cher2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/blas/cher2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,249 @@ + SUBROUTINE CHER2(UPLO,N,ALPHA,X,INCX,Y,INCY,A,LDA) +* .. Scalar Arguments .. + COMPLEX ALPHA + INTEGER INCX,INCY,LDA,N + CHARACTER UPLO +* .. +* .. Array Arguments .. + COMPLEX A(LDA,*),X(*),Y(*) +* .. +* +* Purpose +* ======= +* +* CHER2 performs the hermitian rank 2 operation +* +* A := alpha*x*conjg( y' ) + conjg( alpha )*y*conjg( x' ) + A, +* +* where alpha is a scalar, x and y are n element vectors and A is an n +* by n hermitian matrix. +* +* Arguments +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array A is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of A +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of A +* is to be referenced. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - COMPLEX array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* Y - COMPLEX array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. +* Unchanged on exit. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular part of the hermitian matrix and the strictly +* lower triangular part of A is not referenced. On exit, the +* upper triangular part of the array A is overwritten by the +* upper triangular part of the updated matrix. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular part of the hermitian matrix and the strictly +* upper triangular part of A is not referenced. On exit, the +* lower triangular part of the array A is overwritten by the +* lower triangular part of the updated matrix. +* Note that the imaginary parts of the diagonal elements need +* not be set, they are assumed to be zero, and on exit they +* are set to zero. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER (ZERO= (0.0E+0,0.0E+0)) +* .. +* .. Local Scalars .. + COMPLEX TEMP1,TEMP2 + INTEGER I,INFO,IX,IY,J,JX,JY,KX,KY +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG,MAX,REAL +* .. +* +* Test the input parameters. +* + INFO = 0 + IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN + INFO = 1 + ELSE IF (N.LT.0) THEN + INFO = 2 + ELSE IF (INCX.EQ.0) THEN + INFO = 5 + ELSE IF (INCY.EQ.0) THEN + INFO = 7 + ELSE IF (LDA.LT.MAX(1,N)) THEN + INFO = 9 + END IF + IF (INFO.NE.0) THEN + CALL XERBLA('CHER2 ',INFO) + RETURN + END IF +* +* Quick return if possible. +* + IF ((N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN +* +* Set up the start points in X and Y if the increments are not both +* unity. +* + IF ((INCX.NE.1) .OR. (INCY.NE.1)) THEN + IF (INCX.GT.0) THEN + KX = 1 + ELSE + KX = 1 - (N-1)*INCX + END IF + IF (INCY.GT.0) THEN + KY = 1 + ELSE + KY = 1 - (N-1)*INCY + END IF + JX = KX + JY = KY + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through the triangular part +* of A. +* + IF (LSAME(UPLO,'U')) THEN +* +* Form A when A is stored in the upper triangle. +* + IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN + DO 20 J = 1,N + IF ((X(J).NE.ZERO) .OR. (Y(J).NE.ZERO)) THEN + TEMP1 = ALPHA*CONJG(Y(J)) + TEMP2 = CONJG(ALPHA*X(J)) + DO 10 I = 1,J - 1 + A(I,J) = A(I,J) + X(I)*TEMP1 + Y(I)*TEMP2 + 10 CONTINUE + A(J,J) = REAL(A(J,J)) + + + REAL(X(J)*TEMP1+Y(J)*TEMP2) + ELSE + A(J,J) = REAL(A(J,J)) + END IF + 20 CONTINUE + ELSE + DO 40 J = 1,N + IF ((X(JX).NE.ZERO) .OR. (Y(JY).NE.ZERO)) THEN + TEMP1 = ALPHA*CONJG(Y(JY)) + TEMP2 = CONJG(ALPHA*X(JX)) + IX = KX + IY = KY + DO 30 I = 1,J - 1 + A(I,J) = A(I,J) + X(IX)*TEMP1 + Y(IY)*TEMP2 + IX = IX + INCX + IY = IY + INCY + 30 CONTINUE + A(J,J) = REAL(A(J,J)) + + + REAL(X(JX)*TEMP1+Y(JY)*TEMP2) + ELSE + A(J,J) = REAL(A(J,J)) + END IF + JX = JX + INCX + JY = JY + INCY + 40 CONTINUE + END IF + ELSE +* +* Form A when A is stored in the lower triangle. +* + IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN + DO 60 J = 1,N + IF ((X(J).NE.ZERO) .OR. (Y(J).NE.ZERO)) THEN + TEMP1 = ALPHA*CONJG(Y(J)) + TEMP2 = CONJG(ALPHA*X(J)) + A(J,J) = REAL(A(J,J)) + + + REAL(X(J)*TEMP1+Y(J)*TEMP2) + DO 50 I = J + 1,N + A(I,J) = A(I,J) + X(I)*TEMP1 + Y(I)*TEMP2 + 50 CONTINUE + ELSE + A(J,J) = REAL(A(J,J)) + END IF + 60 CONTINUE + ELSE + DO 80 J = 1,N + IF ((X(JX).NE.ZERO) .OR. (Y(JY).NE.ZERO)) THEN + TEMP1 = ALPHA*CONJG(Y(JY)) + TEMP2 = CONJG(ALPHA*X(JX)) + A(J,J) = REAL(A(J,J)) + + + REAL(X(JX)*TEMP1+Y(JY)*TEMP2) + IX = JX + IY = JY + DO 70 I = J + 1,N + IX = IX + INCX + IY = IY + INCY + A(I,J) = A(I,J) + X(IX)*TEMP1 + Y(IY)*TEMP2 + 70 CONTINUE + ELSE + A(J,J) = REAL(A(J,J)) + END IF + JX = JX + INCX + JY = JY + INCY + 80 CONTINUE + END IF + END IF +* + RETURN +* +* End of CHER2 . +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas/cher2k.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/blas/cher2k.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,368 @@ + SUBROUTINE CHER2K(UPLO,TRANS,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC) +* .. Scalar Arguments .. + COMPLEX ALPHA + REAL BETA + INTEGER K,LDA,LDB,LDC,N + CHARACTER TRANS,UPLO +* .. +* .. Array Arguments .. + COMPLEX A(LDA,*),B(LDB,*),C(LDC,*) +* .. +* +* Purpose +* ======= +* +* CHER2K performs one of the hermitian rank 2k operations +* +* C := alpha*A*conjg( B' ) + conjg( alpha )*B*conjg( A' ) + beta*C, +* +* or +* +* C := alpha*conjg( A' )*B + conjg( alpha )*conjg( B' )*A + beta*C, +* +* where alpha and beta are scalars with beta real, C is an n by n +* hermitian matrix and A and B are n by k matrices in the first case +* and k by n matrices in the second case. +* +* Arguments +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array C is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of C +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of C +* is to be referenced. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' C := alpha*A*conjg( B' ) + +* conjg( alpha )*B*conjg( A' ) + +* beta*C. +* +* TRANS = 'C' or 'c' C := alpha*conjg( A' )*B + +* conjg( alpha )*conjg( B' )*A + +* beta*C. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix C. N must be +* at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry with TRANS = 'N' or 'n', K specifies the number +* of columns of the matrices A and B, and on entry with +* TRANS = 'C' or 'c', K specifies the number of rows of the +* matrices A and B. K must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, ka ), where ka is +* k when TRANS = 'N' or 'n', and is n otherwise. +* Before entry with TRANS = 'N' or 'n', the leading n by k +* part of the array A must contain the matrix A, otherwise +* the leading k by n part of the array A must contain the +* matrix A. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When TRANS = 'N' or 'n' +* then LDA must be at least max( 1, n ), otherwise LDA must +* be at least max( 1, k ). +* Unchanged on exit. +* +* B - COMPLEX array of DIMENSION ( LDB, kb ), where kb is +* k when TRANS = 'N' or 'n', and is n otherwise. +* Before entry with TRANS = 'N' or 'n', the leading n by k +* part of the array B must contain the matrix B, otherwise +* the leading k by n part of the array B must contain the +* matrix B. +* Unchanged on exit. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. When TRANS = 'N' or 'n' +* then LDB must be at least max( 1, n ), otherwise LDB must +* be at least max( 1, k ). +* Unchanged on exit. +* +* BETA - REAL . +* On entry, BETA specifies the scalar beta. +* Unchanged on exit. +* +* C - COMPLEX array of DIMENSION ( LDC, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array C must contain the upper +* triangular part of the hermitian matrix and the strictly +* lower triangular part of C is not referenced. On exit, the +* upper triangular part of the array C is overwritten by the +* upper triangular part of the updated matrix. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array C must contain the lower +* triangular part of the hermitian matrix and the strictly +* upper triangular part of C is not referenced. On exit, the +* lower triangular part of the array C is overwritten by the +* lower triangular part of the updated matrix. +* Note that the imaginary parts of the diagonal elements need +* not be set, they are assumed to be zero, and on exit they +* are set to zero. +* +* LDC - INTEGER. +* On entry, LDC specifies the first dimension of C as declared +* in the calling (sub) program. LDC must be at least +* max( 1, n ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* -- Modified 8-Nov-93 to set C(J,J) to REAL( C(J,J) ) when BETA = 1. +* Ed Anderson, Cray Research Inc. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG,MAX,REAL +* .. +* .. Local Scalars .. + COMPLEX TEMP1,TEMP2 + INTEGER I,INFO,J,L,NROWA + LOGICAL UPPER +* .. +* .. Parameters .. + REAL ONE + PARAMETER (ONE=1.0E+0) + COMPLEX ZERO + PARAMETER (ZERO= (0.0E+0,0.0E+0)) +* .. +* +* Test the input parameters. +* + IF (LSAME(TRANS,'N')) THEN + NROWA = N + ELSE + NROWA = K + END IF + UPPER = LSAME(UPLO,'U') +* + INFO = 0 + IF ((.NOT.UPPER) .AND. (.NOT.LSAME(UPLO,'L'))) THEN + INFO = 1 + ELSE IF ((.NOT.LSAME(TRANS,'N')) .AND. + + (.NOT.LSAME(TRANS,'C'))) THEN + INFO = 2 + ELSE IF (N.LT.0) THEN + INFO = 3 + ELSE IF (K.LT.0) THEN + INFO = 4 + ELSE IF (LDA.LT.MAX(1,NROWA)) THEN + INFO = 7 + ELSE IF (LDB.LT.MAX(1,NROWA)) THEN + INFO = 9 + ELSE IF (LDC.LT.MAX(1,N)) THEN + INFO = 12 + END IF + IF (INFO.NE.0) THEN + CALL XERBLA('CHER2K',INFO) + RETURN + END IF +* +* Quick return if possible. +* + IF ((N.EQ.0) .OR. (((ALPHA.EQ.ZERO).OR. + + (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN +* +* And when alpha.eq.zero. +* + IF (ALPHA.EQ.ZERO) THEN + IF (UPPER) THEN + IF (BETA.EQ.REAL(ZERO)) THEN + DO 20 J = 1,N + DO 10 I = 1,J + C(I,J) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1,N + DO 30 I = 1,J - 1 + C(I,J) = BETA*C(I,J) + 30 CONTINUE + C(J,J) = BETA*REAL(C(J,J)) + 40 CONTINUE + END IF + ELSE + IF (BETA.EQ.REAL(ZERO)) THEN + DO 60 J = 1,N + DO 50 I = J,N + C(I,J) = ZERO + 50 CONTINUE + 60 CONTINUE + ELSE + DO 80 J = 1,N + C(J,J) = BETA*REAL(C(J,J)) + DO 70 I = J + 1,N + C(I,J) = BETA*C(I,J) + 70 CONTINUE + 80 CONTINUE + END IF + END IF + RETURN + END IF +* +* Start the operations. +* + IF (LSAME(TRANS,'N')) THEN +* +* Form C := alpha*A*conjg( B' ) + conjg( alpha )*B*conjg( A' ) + +* C. +* + IF (UPPER) THEN + DO 130 J = 1,N + IF (BETA.EQ.REAL(ZERO)) THEN + DO 90 I = 1,J + C(I,J) = ZERO + 90 CONTINUE + ELSE IF (BETA.NE.ONE) THEN + DO 100 I = 1,J - 1 + C(I,J) = BETA*C(I,J) + 100 CONTINUE + C(J,J) = BETA*REAL(C(J,J)) + ELSE + C(J,J) = REAL(C(J,J)) + END IF + DO 120 L = 1,K + IF ((A(J,L).NE.ZERO) .OR. (B(J,L).NE.ZERO)) THEN + TEMP1 = ALPHA*CONJG(B(J,L)) + TEMP2 = CONJG(ALPHA*A(J,L)) + DO 110 I = 1,J - 1 + C(I,J) = C(I,J) + A(I,L)*TEMP1 + + + B(I,L)*TEMP2 + 110 CONTINUE + C(J,J) = REAL(C(J,J)) + + + REAL(A(J,L)*TEMP1+B(J,L)*TEMP2) + END IF + 120 CONTINUE + 130 CONTINUE + ELSE + DO 180 J = 1,N + IF (BETA.EQ.REAL(ZERO)) THEN + DO 140 I = J,N + C(I,J) = ZERO + 140 CONTINUE + ELSE IF (BETA.NE.ONE) THEN + DO 150 I = J + 1,N + C(I,J) = BETA*C(I,J) + 150 CONTINUE + C(J,J) = BETA*REAL(C(J,J)) + ELSE + C(J,J) = REAL(C(J,J)) + END IF + DO 170 L = 1,K + IF ((A(J,L).NE.ZERO) .OR. (B(J,L).NE.ZERO)) THEN + TEMP1 = ALPHA*CONJG(B(J,L)) + TEMP2 = CONJG(ALPHA*A(J,L)) + DO 160 I = J + 1,N + C(I,J) = C(I,J) + A(I,L)*TEMP1 + + + B(I,L)*TEMP2 + 160 CONTINUE + C(J,J) = REAL(C(J,J)) + + + REAL(A(J,L)*TEMP1+B(J,L)*TEMP2) + END IF + 170 CONTINUE + 180 CONTINUE + END IF + ELSE +* +* Form C := alpha*conjg( A' )*B + conjg( alpha )*conjg( B' )*A + +* C. +* + IF (UPPER) THEN + DO 210 J = 1,N + DO 200 I = 1,J + TEMP1 = ZERO + TEMP2 = ZERO + DO 190 L = 1,K + TEMP1 = TEMP1 + CONJG(A(L,I))*B(L,J) + TEMP2 = TEMP2 + CONJG(B(L,I))*A(L,J) + 190 CONTINUE + IF (I.EQ.J) THEN + IF (BETA.EQ.REAL(ZERO)) THEN + C(J,J) = REAL(ALPHA*TEMP1+ + + CONJG(ALPHA)*TEMP2) + ELSE + C(J,J) = BETA*REAL(C(J,J)) + + + REAL(ALPHA*TEMP1+ + + CONJG(ALPHA)*TEMP2) + END IF + ELSE + IF (BETA.EQ.REAL(ZERO)) THEN + C(I,J) = ALPHA*TEMP1 + CONJG(ALPHA)*TEMP2 + ELSE + C(I,J) = BETA*C(I,J) + ALPHA*TEMP1 + + + CONJG(ALPHA)*TEMP2 + END IF + END IF + 200 CONTINUE + 210 CONTINUE + ELSE + DO 240 J = 1,N + DO 230 I = J,N + TEMP1 = ZERO + TEMP2 = ZERO + DO 220 L = 1,K + TEMP1 = TEMP1 + CONJG(A(L,I))*B(L,J) + TEMP2 = TEMP2 + CONJG(B(L,I))*A(L,J) + 220 CONTINUE + IF (I.EQ.J) THEN + IF (BETA.EQ.REAL(ZERO)) THEN + C(J,J) = REAL(ALPHA*TEMP1+ + + CONJG(ALPHA)*TEMP2) + ELSE + C(J,J) = BETA*REAL(C(J,J)) + + + REAL(ALPHA*TEMP1+ + + CONJG(ALPHA)*TEMP2) + END IF + ELSE + IF (BETA.EQ.REAL(ZERO)) THEN + C(I,J) = ALPHA*TEMP1 + CONJG(ALPHA)*TEMP2 + ELSE + C(I,J) = BETA*C(I,J) + ALPHA*TEMP1 + + + CONJG(ALPHA)*TEMP2 + END IF + END IF + 230 CONTINUE + 240 CONTINUE + END IF + END IF +* + RETURN +* +* End of CHER2K. +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas/cherk.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/blas/cherk.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,327 @@ + SUBROUTINE CHERK(UPLO,TRANS,N,K,ALPHA,A,LDA,BETA,C,LDC) +* .. Scalar Arguments .. + REAL ALPHA,BETA + INTEGER K,LDA,LDC,N + CHARACTER TRANS,UPLO +* .. +* .. Array Arguments .. + COMPLEX A(LDA,*),C(LDC,*) +* .. +* +* Purpose +* ======= +* +* CHERK performs one of the hermitian rank k operations +* +* C := alpha*A*conjg( A' ) + beta*C, +* +* or +* +* C := alpha*conjg( A' )*A + beta*C, +* +* where alpha and beta are real scalars, C is an n by n hermitian +* matrix and A is an n by k matrix in the first case and a k by n +* matrix in the second case. +* +* Arguments +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array C is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of C +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of C +* is to be referenced. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' C := alpha*A*conjg( A' ) + beta*C. +* +* TRANS = 'C' or 'c' C := alpha*conjg( A' )*A + beta*C. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix C. N must be +* at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry with TRANS = 'N' or 'n', K specifies the number +* of columns of the matrix A, and on entry with +* TRANS = 'C' or 'c', K specifies the number of rows of the +* matrix A. K must be at least zero. +* Unchanged on exit. +* +* ALPHA - REAL . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, ka ), where ka is +* k when TRANS = 'N' or 'n', and is n otherwise. +* Before entry with TRANS = 'N' or 'n', the leading n by k +* part of the array A must contain the matrix A, otherwise +* the leading k by n part of the array A must contain the +* matrix A. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When TRANS = 'N' or 'n' +* then LDA must be at least max( 1, n ), otherwise LDA must +* be at least max( 1, k ). +* Unchanged on exit. +* +* BETA - REAL . +* On entry, BETA specifies the scalar beta. +* Unchanged on exit. +* +* C - COMPLEX array of DIMENSION ( LDC, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array C must contain the upper +* triangular part of the hermitian matrix and the strictly +* lower triangular part of C is not referenced. On exit, the +* upper triangular part of the array C is overwritten by the +* upper triangular part of the updated matrix. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array C must contain the lower +* triangular part of the hermitian matrix and the strictly +* upper triangular part of C is not referenced. On exit, the +* lower triangular part of the array C is overwritten by the +* lower triangular part of the updated matrix. +* Note that the imaginary parts of the diagonal elements need +* not be set, they are assumed to be zero, and on exit they +* are set to zero. +* +* LDC - INTEGER. +* On entry, LDC specifies the first dimension of C as declared +* in the calling (sub) program. LDC must be at least +* max( 1, n ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* -- Modified 8-Nov-93 to set C(J,J) to REAL( C(J,J) ) when BETA = 1. +* Ed Anderson, Cray Research Inc. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CMPLX,CONJG,MAX,REAL +* .. +* .. Local Scalars .. + COMPLEX TEMP + REAL RTEMP + INTEGER I,INFO,J,L,NROWA + LOGICAL UPPER +* .. +* .. Parameters .. + REAL ONE,ZERO + PARAMETER (ONE=1.0E+0,ZERO=0.0E+0) +* .. +* +* Test the input parameters. +* + IF (LSAME(TRANS,'N')) THEN + NROWA = N + ELSE + NROWA = K + END IF + UPPER = LSAME(UPLO,'U') +* + INFO = 0 + IF ((.NOT.UPPER) .AND. (.NOT.LSAME(UPLO,'L'))) THEN + INFO = 1 + ELSE IF ((.NOT.LSAME(TRANS,'N')) .AND. + + (.NOT.LSAME(TRANS,'C'))) THEN + INFO = 2 + ELSE IF (N.LT.0) THEN + INFO = 3 + ELSE IF (K.LT.0) THEN + INFO = 4 + ELSE IF (LDA.LT.MAX(1,NROWA)) THEN + INFO = 7 + ELSE IF (LDC.LT.MAX(1,N)) THEN + INFO = 10 + END IF + IF (INFO.NE.0) THEN + CALL XERBLA('CHERK ',INFO) + RETURN + END IF +* +* Quick return if possible. +* + IF ((N.EQ.0) .OR. (((ALPHA.EQ.ZERO).OR. + + (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN +* +* And when alpha.eq.zero. +* + IF (ALPHA.EQ.ZERO) THEN + IF (UPPER) THEN + IF (BETA.EQ.ZERO) THEN + DO 20 J = 1,N + DO 10 I = 1,J + C(I,J) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1,N + DO 30 I = 1,J - 1 + C(I,J) = BETA*C(I,J) + 30 CONTINUE + C(J,J) = BETA*REAL(C(J,J)) + 40 CONTINUE + END IF + ELSE + IF (BETA.EQ.ZERO) THEN + DO 60 J = 1,N + DO 50 I = J,N + C(I,J) = ZERO + 50 CONTINUE + 60 CONTINUE + ELSE + DO 80 J = 1,N + C(J,J) = BETA*REAL(C(J,J)) + DO 70 I = J + 1,N + C(I,J) = BETA*C(I,J) + 70 CONTINUE + 80 CONTINUE + END IF + END IF + RETURN + END IF +* +* Start the operations. +* + IF (LSAME(TRANS,'N')) THEN +* +* Form C := alpha*A*conjg( A' ) + beta*C. +* + IF (UPPER) THEN + DO 130 J = 1,N + IF (BETA.EQ.ZERO) THEN + DO 90 I = 1,J + C(I,J) = ZERO + 90 CONTINUE + ELSE IF (BETA.NE.ONE) THEN + DO 100 I = 1,J - 1 + C(I,J) = BETA*C(I,J) + 100 CONTINUE + C(J,J) = BETA*REAL(C(J,J)) + ELSE + C(J,J) = REAL(C(J,J)) + END IF + DO 120 L = 1,K + IF (A(J,L).NE.CMPLX(ZERO)) THEN + TEMP = ALPHA*CONJG(A(J,L)) + DO 110 I = 1,J - 1 + C(I,J) = C(I,J) + TEMP*A(I,L) + 110 CONTINUE + C(J,J) = REAL(C(J,J)) + REAL(TEMP*A(I,L)) + END IF + 120 CONTINUE + 130 CONTINUE + ELSE + DO 180 J = 1,N + IF (BETA.EQ.ZERO) THEN + DO 140 I = J,N + C(I,J) = ZERO + 140 CONTINUE + ELSE IF (BETA.NE.ONE) THEN + C(J,J) = BETA*REAL(C(J,J)) + DO 150 I = J + 1,N + C(I,J) = BETA*C(I,J) + 150 CONTINUE + ELSE + C(J,J) = REAL(C(J,J)) + END IF + DO 170 L = 1,K + IF (A(J,L).NE.CMPLX(ZERO)) THEN + TEMP = ALPHA*CONJG(A(J,L)) + C(J,J) = REAL(C(J,J)) + REAL(TEMP*A(J,L)) + DO 160 I = J + 1,N + C(I,J) = C(I,J) + TEMP*A(I,L) + 160 CONTINUE + END IF + 170 CONTINUE + 180 CONTINUE + END IF + ELSE +* +* Form C := alpha*conjg( A' )*A + beta*C. +* + IF (UPPER) THEN + DO 220 J = 1,N + DO 200 I = 1,J - 1 + TEMP = ZERO + DO 190 L = 1,K + TEMP = TEMP + CONJG(A(L,I))*A(L,J) + 190 CONTINUE + IF (BETA.EQ.ZERO) THEN + C(I,J) = ALPHA*TEMP + ELSE + C(I,J) = ALPHA*TEMP + BETA*C(I,J) + END IF + 200 CONTINUE + RTEMP = ZERO + DO 210 L = 1,K + RTEMP = RTEMP + CONJG(A(L,J))*A(L,J) + 210 CONTINUE + IF (BETA.EQ.ZERO) THEN + C(J,J) = ALPHA*RTEMP + ELSE + C(J,J) = ALPHA*RTEMP + BETA*REAL(C(J,J)) + END IF + 220 CONTINUE + ELSE + DO 260 J = 1,N + RTEMP = ZERO + DO 230 L = 1,K + RTEMP = RTEMP + CONJG(A(L,J))*A(L,J) + 230 CONTINUE + IF (BETA.EQ.ZERO) THEN + C(J,J) = ALPHA*RTEMP + ELSE + C(J,J) = ALPHA*RTEMP + BETA*REAL(C(J,J)) + END IF + DO 250 I = J + 1,N + TEMP = ZERO + DO 240 L = 1,K + TEMP = TEMP + CONJG(A(L,I))*A(L,J) + 240 CONTINUE + IF (BETA.EQ.ZERO) THEN + C(I,J) = ALPHA*TEMP + ELSE + C(I,J) = ALPHA*TEMP + BETA*C(I,J) + END IF + 250 CONTINUE + 260 CONTINUE + END IF + END IF +* + RETURN +* +* End of CHERK . +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas/cscal.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/blas/cscal.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,39 @@ + SUBROUTINE CSCAL(N,CA,CX,INCX) +* .. Scalar Arguments .. + COMPLEX CA + INTEGER INCX,N +* .. +* .. Array Arguments .. + COMPLEX CX(*) +* .. +* +* Purpose +* ======= +* +* scales a vector by a constant. +* jack dongarra, linpack, 3/11/78. +* modified 3/93 to return if incx .le. 0. +* modified 12/3/93, array(1) declarations changed to array(*) +* +* +* .. Local Scalars .. + INTEGER I,NINCX +* .. + IF (N.LE.0 .OR. INCX.LE.0) RETURN + IF (INCX.EQ.1) GO TO 20 +* +* code for increment not equal to 1 +* + NINCX = N*INCX + DO 10 I = 1,NINCX,INCX + CX(I) = CA*CX(I) + 10 CONTINUE + RETURN +* +* code for increment equal to 1 +* + 20 DO 30 I = 1,N + CX(I) = CA*CX(I) + 30 CONTINUE + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas/csrot.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/blas/csrot.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,95 @@ + SUBROUTINE CSROT( N, CX, INCX, CY, INCY, C, S ) +* +* .. Scalar Arguments .. + INTEGER INCX, INCY, N + REAL C, S +* .. +* .. Array Arguments .. + COMPLEX CX( * ), CY( * ) +* .. +* +* Purpose +* ======= +* +* Applies a plane rotation, where the cos and sin (c and s) are real +* and the vectors cx and cy are complex. +* jack dongarra, linpack, 3/11/78. +* +* Arguments +* ========== +* +* N (input) INTEGER +* On entry, N specifies the order of the vectors cx and cy. +* N must be at least zero. +* Unchanged on exit. +* +* CX (input) COMPLEX array, dimension at least +* ( 1 + ( N - 1 )*abs( INCX ) ). +* Before entry, the incremented array CX must contain the n +* element vector cx. On exit, CX is overwritten by the updated +* vector cx. +* +* INCX (input) INTEGER +* On entry, INCX specifies the increment for the elements of +* CX. INCX must not be zero. +* Unchanged on exit. +* +* CY (input) COMPLEX array, dimension at least +* ( 1 + ( N - 1 )*abs( INCY ) ). +* Before entry, the incremented array CY must contain the n +* element vector cy. On exit, CY is overwritten by the updated +* vector cy. +* +* INCY (input) INTEGER +* On entry, INCY specifies the increment for the elements of +* CY. INCY must not be zero. +* Unchanged on exit. +* +* C (input) REAL +* On entry, C specifies the cosine, cos. +* Unchanged on exit. +* +* S (input) REAL +* On entry, S specifies the sine, sin. +* Unchanged on exit. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IX, IY + COMPLEX CTEMP +* .. +* .. Executable Statements .. +* + IF( N.LE.0 ) + $ RETURN + IF( INCX.EQ.1 .AND. INCY.EQ.1 ) + $ GO TO 20 +* +* code for unequal increments or equal increments not equal +* to 1 +* + IX = 1 + IY = 1 + IF( INCX.LT.0 ) + $ IX = ( -N+1 )*INCX + 1 + IF( INCY.LT.0 ) + $ IY = ( -N+1 )*INCY + 1 + DO 10 I = 1, N + CTEMP = C*CX( IX ) + S*CY( IY ) + CY( IY ) = C*CY( IY ) - S*CX( IX ) + CX( IX ) = CTEMP + IX = IX + INCX + IY = IY + INCY + 10 CONTINUE + RETURN +* +* code for both increments equal to 1 +* + 20 DO 30 I = 1, N + CTEMP = C*CX( I ) + S*CY( I ) + CY( I ) = C*CY( I ) - S*CX( I ) + CX( I ) = CTEMP + 30 CONTINUE + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas/csscal.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/blas/csscal.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,42 @@ + SUBROUTINE CSSCAL(N,SA,CX,INCX) +* .. Scalar Arguments .. + REAL SA + INTEGER INCX,N +* .. +* .. Array Arguments .. + COMPLEX CX(*) +* .. +* +* Purpose +* ======= +* +* scales a complex vector by a real constant. +* jack dongarra, linpack, 3/11/78. +* modified 3/93 to return if incx .le. 0. +* modified 12/3/93, array(1) declarations changed to array(*) +* +* +* .. Local Scalars .. + INTEGER I,NINCX +* .. +* .. Intrinsic Functions .. + INTRINSIC AIMAG,CMPLX,REAL +* .. + IF (N.LE.0 .OR. INCX.LE.0) RETURN + IF (INCX.EQ.1) GO TO 20 +* +* code for increment not equal to 1 +* + NINCX = N*INCX + DO 10 I = 1,NINCX,INCX + CX(I) = CMPLX(SA*REAL(CX(I)),SA*AIMAG(CX(I))) + 10 CONTINUE + RETURN +* +* code for increment equal to 1 +* + 20 DO 30 I = 1,N + CX(I) = CMPLX(SA*REAL(CX(I)),SA*AIMAG(CX(I))) + 30 CONTINUE + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas/cswap.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/blas/cswap.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,47 @@ + SUBROUTINE CSWAP(N,CX,INCX,CY,INCY) +* .. Scalar Arguments .. + INTEGER INCX,INCY,N +* .. +* .. Array Arguments .. + COMPLEX CX(*),CY(*) +* .. +* +* Purpose +* ======= +* +* interchanges two vectors. +* jack dongarra, linpack, 3/11/78. +* modified 12/3/93, array(1) declarations changed to array(*) +* +* +* .. Local Scalars .. + COMPLEX CTEMP + INTEGER I,IX,IY +* .. + IF (N.LE.0) RETURN + IF (INCX.EQ.1 .AND. INCY.EQ.1) GO TO 20 +* +* code for unequal increments or equal increments not equal +* to 1 +* + IX = 1 + IY = 1 + IF (INCX.LT.0) IX = (-N+1)*INCX + 1 + IF (INCY.LT.0) IY = (-N+1)*INCY + 1 + DO 10 I = 1,N + CTEMP = CX(IX) + CX(IX) = CY(IY) + CY(IY) = CTEMP + IX = IX + INCX + IY = IY + INCY + 10 CONTINUE + RETURN +* +* code for both increments equal to 1 + 20 DO 30 I = 1,N + CTEMP = CX(I) + CX(I) = CY(I) + CY(I) = CTEMP + 30 CONTINUE + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas/ctbsv.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/blas/ctbsv.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,367 @@ + SUBROUTINE CTBSV(UPLO,TRANS,DIAG,N,K,A,LDA,X,INCX) +* .. Scalar Arguments .. + INTEGER INCX,K,LDA,N + CHARACTER DIAG,TRANS,UPLO +* .. +* .. Array Arguments .. + COMPLEX A(LDA,*),X(*) +* .. +* +* Purpose +* ======= +* +* CTBSV solves one of the systems of equations +* +* A*x = b, or A'*x = b, or conjg( A' )*x = b, +* +* where b and x are n element vectors and A is an n by n unit, or +* non-unit, upper or lower triangular band matrix, with ( k + 1 ) +* diagonals. +* +* No test for singularity or near-singularity is included in this +* routine. Such tests must be performed before calling this routine. +* +* Arguments +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the equations to be solved as +* follows: +* +* TRANS = 'N' or 'n' A*x = b. +* +* TRANS = 'T' or 't' A'*x = b. +* +* TRANS = 'C' or 'c' conjg( A' )*x = b. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry with UPLO = 'U' or 'u', K specifies the number of +* super-diagonals of the matrix A. +* On entry with UPLO = 'L' or 'l', K specifies the number of +* sub-diagonals of the matrix A. +* K must satisfy 0 .le. K. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) +* by n part of the array A must contain the upper triangular +* band part of the matrix of coefficients, supplied column by +* column, with the leading diagonal of the matrix in row +* ( k + 1 ) of the array, the first super-diagonal starting at +* position 2 in row k, and so on. The top left k by k triangle +* of the array A is not referenced. +* The following program segment will transfer an upper +* triangular band matrix from conventional full matrix storage +* to band storage: +* +* DO 20, J = 1, N +* M = K + 1 - J +* DO 10, I = MAX( 1, J - K ), J +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) +* by n part of the array A must contain the lower triangular +* band part of the matrix of coefficients, supplied column by +* column, with the leading diagonal of the matrix in row 1 of +* the array, the first sub-diagonal starting at position 1 in +* row 2, and so on. The bottom right k by k triangle of the +* array A is not referenced. +* The following program segment will transfer a lower +* triangular band matrix from conventional full matrix storage +* to band storage: +* +* DO 20, J = 1, N +* M = 1 - J +* DO 10, I = J, MIN( N, J + K ) +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Note that when DIAG = 'U' or 'u' the elements of the array A +* corresponding to the diagonal elements of the matrix are not +* referenced, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* ( k + 1 ). +* Unchanged on exit. +* +* X - COMPLEX array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element right-hand side vector b. On exit, X is overwritten +* with the solution vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER (ZERO= (0.0E+0,0.0E+0)) +* .. +* .. Local Scalars .. + COMPLEX TEMP + INTEGER I,INFO,IX,J,JX,KPLUS1,KX,L + LOGICAL NOCONJ,NOUNIT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG,MAX,MIN +* .. +* +* Test the input parameters. +* + INFO = 0 + IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN + INFO = 1 + ELSE IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND. + + .NOT.LSAME(TRANS,'C')) THEN + INFO = 2 + ELSE IF (.NOT.LSAME(DIAG,'U') .AND. .NOT.LSAME(DIAG,'N')) THEN + INFO = 3 + ELSE IF (N.LT.0) THEN + INFO = 4 + ELSE IF (K.LT.0) THEN + INFO = 5 + ELSE IF (LDA.LT. (K+1)) THEN + INFO = 7 + ELSE IF (INCX.EQ.0) THEN + INFO = 9 + END IF + IF (INFO.NE.0) THEN + CALL XERBLA('CTBSV ',INFO) + RETURN + END IF +* +* Quick return if possible. +* + IF (N.EQ.0) RETURN +* + NOCONJ = LSAME(TRANS,'T') + NOUNIT = LSAME(DIAG,'N') +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF (INCX.LE.0) THEN + KX = 1 - (N-1)*INCX + ELSE IF (INCX.NE.1) THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed by sequentially with one pass through A. +* + IF (LSAME(TRANS,'N')) THEN +* +* Form x := inv( A )*x. +* + IF (LSAME(UPLO,'U')) THEN + KPLUS1 = K + 1 + IF (INCX.EQ.1) THEN + DO 20 J = N,1,-1 + IF (X(J).NE.ZERO) THEN + L = KPLUS1 - J + IF (NOUNIT) X(J) = X(J)/A(KPLUS1,J) + TEMP = X(J) + DO 10 I = J - 1,MAX(1,J-K),-1 + X(I) = X(I) - TEMP*A(L+I,J) + 10 CONTINUE + END IF + 20 CONTINUE + ELSE + KX = KX + (N-1)*INCX + JX = KX + DO 40 J = N,1,-1 + KX = KX - INCX + IF (X(JX).NE.ZERO) THEN + IX = KX + L = KPLUS1 - J + IF (NOUNIT) X(JX) = X(JX)/A(KPLUS1,J) + TEMP = X(JX) + DO 30 I = J - 1,MAX(1,J-K),-1 + X(IX) = X(IX) - TEMP*A(L+I,J) + IX = IX - INCX + 30 CONTINUE + END IF + JX = JX - INCX + 40 CONTINUE + END IF + ELSE + IF (INCX.EQ.1) THEN + DO 60 J = 1,N + IF (X(J).NE.ZERO) THEN + L = 1 - J + IF (NOUNIT) X(J) = X(J)/A(1,J) + TEMP = X(J) + DO 50 I = J + 1,MIN(N,J+K) + X(I) = X(I) - TEMP*A(L+I,J) + 50 CONTINUE + END IF + 60 CONTINUE + ELSE + JX = KX + DO 80 J = 1,N + KX = KX + INCX + IF (X(JX).NE.ZERO) THEN + IX = KX + L = 1 - J + IF (NOUNIT) X(JX) = X(JX)/A(1,J) + TEMP = X(JX) + DO 70 I = J + 1,MIN(N,J+K) + X(IX) = X(IX) - TEMP*A(L+I,J) + IX = IX + INCX + 70 CONTINUE + END IF + JX = JX + INCX + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := inv( A' )*x or x := inv( conjg( A') )*x. +* + IF (LSAME(UPLO,'U')) THEN + KPLUS1 = K + 1 + IF (INCX.EQ.1) THEN + DO 110 J = 1,N + TEMP = X(J) + L = KPLUS1 - J + IF (NOCONJ) THEN + DO 90 I = MAX(1,J-K),J - 1 + TEMP = TEMP - A(L+I,J)*X(I) + 90 CONTINUE + IF (NOUNIT) TEMP = TEMP/A(KPLUS1,J) + ELSE + DO 100 I = MAX(1,J-K),J - 1 + TEMP = TEMP - CONJG(A(L+I,J))*X(I) + 100 CONTINUE + IF (NOUNIT) TEMP = TEMP/CONJG(A(KPLUS1,J)) + END IF + X(J) = TEMP + 110 CONTINUE + ELSE + JX = KX + DO 140 J = 1,N + TEMP = X(JX) + IX = KX + L = KPLUS1 - J + IF (NOCONJ) THEN + DO 120 I = MAX(1,J-K),J - 1 + TEMP = TEMP - A(L+I,J)*X(IX) + IX = IX + INCX + 120 CONTINUE + IF (NOUNIT) TEMP = TEMP/A(KPLUS1,J) + ELSE + DO 130 I = MAX(1,J-K),J - 1 + TEMP = TEMP - CONJG(A(L+I,J))*X(IX) + IX = IX + INCX + 130 CONTINUE + IF (NOUNIT) TEMP = TEMP/CONJG(A(KPLUS1,J)) + END IF + X(JX) = TEMP + JX = JX + INCX + IF (J.GT.K) KX = KX + INCX + 140 CONTINUE + END IF + ELSE + IF (INCX.EQ.1) THEN + DO 170 J = N,1,-1 + TEMP = X(J) + L = 1 - J + IF (NOCONJ) THEN + DO 150 I = MIN(N,J+K),J + 1,-1 + TEMP = TEMP - A(L+I,J)*X(I) + 150 CONTINUE + IF (NOUNIT) TEMP = TEMP/A(1,J) + ELSE + DO 160 I = MIN(N,J+K),J + 1,-1 + TEMP = TEMP - CONJG(A(L+I,J))*X(I) + 160 CONTINUE + IF (NOUNIT) TEMP = TEMP/CONJG(A(1,J)) + END IF + X(J) = TEMP + 170 CONTINUE + ELSE + KX = KX + (N-1)*INCX + JX = KX + DO 200 J = N,1,-1 + TEMP = X(JX) + IX = KX + L = 1 - J + IF (NOCONJ) THEN + DO 180 I = MIN(N,J+K),J + 1,-1 + TEMP = TEMP - A(L+I,J)*X(IX) + IX = IX - INCX + 180 CONTINUE + IF (NOUNIT) TEMP = TEMP/A(1,J) + ELSE + DO 190 I = MIN(N,J+K),J + 1,-1 + TEMP = TEMP - CONJG(A(L+I,J))*X(IX) + IX = IX - INCX + 190 CONTINUE + IF (NOUNIT) TEMP = TEMP/CONJG(A(1,J)) + END IF + X(JX) = TEMP + JX = JX - INCX + IF ((N-J).GE.K) KX = KX - INCX + 200 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of CTBSV . +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas/ctrmm.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/blas/ctrmm.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,383 @@ + SUBROUTINE CTRMM(SIDE,UPLO,TRANSA,DIAG,M,N,ALPHA,A,LDA,B,LDB) +* .. Scalar Arguments .. + COMPLEX ALPHA + INTEGER LDA,LDB,M,N + CHARACTER DIAG,SIDE,TRANSA,UPLO +* .. +* .. Array Arguments .. + COMPLEX A(LDA,*),B(LDB,*) +* .. +* +* Purpose +* ======= +* +* CTRMM performs one of the matrix-matrix operations +* +* B := alpha*op( A )*B, or B := alpha*B*op( A ) +* +* where alpha is a scalar, B is an m by n matrix, A is a unit, or +* non-unit, upper or lower triangular matrix and op( A ) is one of +* +* op( A ) = A or op( A ) = A' or op( A ) = conjg( A' ). +* +* Arguments +* ========== +* +* SIDE - CHARACTER*1. +* On entry, SIDE specifies whether op( A ) multiplies B from +* the left or right as follows: +* +* SIDE = 'L' or 'l' B := alpha*op( A )*B. +* +* SIDE = 'R' or 'r' B := alpha*B*op( A ). +* +* Unchanged on exit. +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix A is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANSA - CHARACTER*1. +* On entry, TRANSA specifies the form of op( A ) to be used in +* the matrix multiplication as follows: +* +* TRANSA = 'N' or 'n' op( A ) = A. +* +* TRANSA = 'T' or 't' op( A ) = A'. +* +* TRANSA = 'C' or 'c' op( A ) = conjg( A' ). +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit triangular +* as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of B. M must be at +* least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of B. N must be +* at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX . +* On entry, ALPHA specifies the scalar alpha. When alpha is +* zero then A is not referenced and B need not be set before +* entry. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, k ), where k is m +* when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. +* Before entry with UPLO = 'U' or 'u', the leading k by k +* upper triangular part of the array A must contain the upper +* triangular matrix and the strictly lower triangular part of +* A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading k by k +* lower triangular part of the array A must contain the lower +* triangular matrix and the strictly upper triangular part of +* A is not referenced. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced either, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When SIDE = 'L' or 'l' then +* LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' +* then LDA must be at least max( 1, n ). +* Unchanged on exit. +* +* B - COMPLEX array of DIMENSION ( LDB, n ). +* Before entry, the leading m by n part of the array B must +* contain the matrix B, and on exit is overwritten by the +* transformed matrix. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. LDB must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG,MAX +* .. +* .. Local Scalars .. + COMPLEX TEMP + INTEGER I,INFO,J,K,NROWA + LOGICAL LSIDE,NOCONJ,NOUNIT,UPPER +* .. +* .. Parameters .. + COMPLEX ONE + PARAMETER (ONE= (1.0E+0,0.0E+0)) + COMPLEX ZERO + PARAMETER (ZERO= (0.0E+0,0.0E+0)) +* .. +* +* Test the input parameters. +* + LSIDE = LSAME(SIDE,'L') + IF (LSIDE) THEN + NROWA = M + ELSE + NROWA = N + END IF + NOCONJ = LSAME(TRANSA,'T') + NOUNIT = LSAME(DIAG,'N') + UPPER = LSAME(UPLO,'U') +* + INFO = 0 + IF ((.NOT.LSIDE) .AND. (.NOT.LSAME(SIDE,'R'))) THEN + INFO = 1 + ELSE IF ((.NOT.UPPER) .AND. (.NOT.LSAME(UPLO,'L'))) THEN + INFO = 2 + ELSE IF ((.NOT.LSAME(TRANSA,'N')) .AND. + + (.NOT.LSAME(TRANSA,'T')) .AND. + + (.NOT.LSAME(TRANSA,'C'))) THEN + INFO = 3 + ELSE IF ((.NOT.LSAME(DIAG,'U')) .AND. (.NOT.LSAME(DIAG,'N'))) THEN + INFO = 4 + ELSE IF (M.LT.0) THEN + INFO = 5 + ELSE IF (N.LT.0) THEN + INFO = 6 + ELSE IF (LDA.LT.MAX(1,NROWA)) THEN + INFO = 9 + ELSE IF (LDB.LT.MAX(1,M)) THEN + INFO = 11 + END IF + IF (INFO.NE.0) THEN + CALL XERBLA('CTRMM ',INFO) + RETURN + END IF +* +* Quick return if possible. +* + IF (M.EQ.0 .OR. N.EQ.0) RETURN +* +* And when alpha.eq.zero. +* + IF (ALPHA.EQ.ZERO) THEN + DO 20 J = 1,N + DO 10 I = 1,M + B(I,J) = ZERO + 10 CONTINUE + 20 CONTINUE + RETURN + END IF +* +* Start the operations. +* + IF (LSIDE) THEN + IF (LSAME(TRANSA,'N')) THEN +* +* Form B := alpha*A*B. +* + IF (UPPER) THEN + DO 50 J = 1,N + DO 40 K = 1,M + IF (B(K,J).NE.ZERO) THEN + TEMP = ALPHA*B(K,J) + DO 30 I = 1,K - 1 + B(I,J) = B(I,J) + TEMP*A(I,K) + 30 CONTINUE + IF (NOUNIT) TEMP = TEMP*A(K,K) + B(K,J) = TEMP + END IF + 40 CONTINUE + 50 CONTINUE + ELSE + DO 80 J = 1,N + DO 70 K = M,1,-1 + IF (B(K,J).NE.ZERO) THEN + TEMP = ALPHA*B(K,J) + B(K,J) = TEMP + IF (NOUNIT) B(K,J) = B(K,J)*A(K,K) + DO 60 I = K + 1,M + B(I,J) = B(I,J) + TEMP*A(I,K) + 60 CONTINUE + END IF + 70 CONTINUE + 80 CONTINUE + END IF + ELSE +* +* Form B := alpha*A'*B or B := alpha*conjg( A' )*B. +* + IF (UPPER) THEN + DO 120 J = 1,N + DO 110 I = M,1,-1 + TEMP = B(I,J) + IF (NOCONJ) THEN + IF (NOUNIT) TEMP = TEMP*A(I,I) + DO 90 K = 1,I - 1 + TEMP = TEMP + A(K,I)*B(K,J) + 90 CONTINUE + ELSE + IF (NOUNIT) TEMP = TEMP*CONJG(A(I,I)) + DO 100 K = 1,I - 1 + TEMP = TEMP + CONJG(A(K,I))*B(K,J) + 100 CONTINUE + END IF + B(I,J) = ALPHA*TEMP + 110 CONTINUE + 120 CONTINUE + ELSE + DO 160 J = 1,N + DO 150 I = 1,M + TEMP = B(I,J) + IF (NOCONJ) THEN + IF (NOUNIT) TEMP = TEMP*A(I,I) + DO 130 K = I + 1,M + TEMP = TEMP + A(K,I)*B(K,J) + 130 CONTINUE + ELSE + IF (NOUNIT) TEMP = TEMP*CONJG(A(I,I)) + DO 140 K = I + 1,M + TEMP = TEMP + CONJG(A(K,I))*B(K,J) + 140 CONTINUE + END IF + B(I,J) = ALPHA*TEMP + 150 CONTINUE + 160 CONTINUE + END IF + END IF + ELSE + IF (LSAME(TRANSA,'N')) THEN +* +* Form B := alpha*B*A. +* + IF (UPPER) THEN + DO 200 J = N,1,-1 + TEMP = ALPHA + IF (NOUNIT) TEMP = TEMP*A(J,J) + DO 170 I = 1,M + B(I,J) = TEMP*B(I,J) + 170 CONTINUE + DO 190 K = 1,J - 1 + IF (A(K,J).NE.ZERO) THEN + TEMP = ALPHA*A(K,J) + DO 180 I = 1,M + B(I,J) = B(I,J) + TEMP*B(I,K) + 180 CONTINUE + END IF + 190 CONTINUE + 200 CONTINUE + ELSE + DO 240 J = 1,N + TEMP = ALPHA + IF (NOUNIT) TEMP = TEMP*A(J,J) + DO 210 I = 1,M + B(I,J) = TEMP*B(I,J) + 210 CONTINUE + DO 230 K = J + 1,N + IF (A(K,J).NE.ZERO) THEN + TEMP = ALPHA*A(K,J) + DO 220 I = 1,M + B(I,J) = B(I,J) + TEMP*B(I,K) + 220 CONTINUE + END IF + 230 CONTINUE + 240 CONTINUE + END IF + ELSE +* +* Form B := alpha*B*A' or B := alpha*B*conjg( A' ). +* + IF (UPPER) THEN + DO 280 K = 1,N + DO 260 J = 1,K - 1 + IF (A(J,K).NE.ZERO) THEN + IF (NOCONJ) THEN + TEMP = ALPHA*A(J,K) + ELSE + TEMP = ALPHA*CONJG(A(J,K)) + END IF + DO 250 I = 1,M + B(I,J) = B(I,J) + TEMP*B(I,K) + 250 CONTINUE + END IF + 260 CONTINUE + TEMP = ALPHA + IF (NOUNIT) THEN + IF (NOCONJ) THEN + TEMP = TEMP*A(K,K) + ELSE + TEMP = TEMP*CONJG(A(K,K)) + END IF + END IF + IF (TEMP.NE.ONE) THEN + DO 270 I = 1,M + B(I,K) = TEMP*B(I,K) + 270 CONTINUE + END IF + 280 CONTINUE + ELSE + DO 320 K = N,1,-1 + DO 300 J = K + 1,N + IF (A(J,K).NE.ZERO) THEN + IF (NOCONJ) THEN + TEMP = ALPHA*A(J,K) + ELSE + TEMP = ALPHA*CONJG(A(J,K)) + END IF + DO 290 I = 1,M + B(I,J) = B(I,J) + TEMP*B(I,K) + 290 CONTINUE + END IF + 300 CONTINUE + TEMP = ALPHA + IF (NOUNIT) THEN + IF (NOCONJ) THEN + TEMP = TEMP*A(K,K) + ELSE + TEMP = TEMP*CONJG(A(K,K)) + END IF + END IF + IF (TEMP.NE.ONE) THEN + DO 310 I = 1,M + B(I,K) = TEMP*B(I,K) + 310 CONTINUE + END IF + 320 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of CTRMM . +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas/ctrmv.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/blas/ctrmv.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,309 @@ + SUBROUTINE CTRMV(UPLO,TRANS,DIAG,N,A,LDA,X,INCX) +* .. Scalar Arguments .. + INTEGER INCX,LDA,N + CHARACTER DIAG,TRANS,UPLO +* .. +* .. Array Arguments .. + COMPLEX A(LDA,*),X(*) +* .. +* +* Purpose +* ======= +* +* CTRMV performs one of the matrix-vector operations +* +* x := A*x, or x := A'*x, or x := conjg( A' )*x, +* +* where x is an n element vector and A is an n by n unit, or non-unit, +* upper or lower triangular matrix. +* +* Arguments +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' x := A*x. +* +* TRANS = 'T' or 't' x := A'*x. +* +* TRANS = 'C' or 'c' x := conjg( A' )*x. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular matrix and the strictly lower triangular part of +* A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular matrix and the strictly upper triangular part of +* A is not referenced. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced either, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* X - COMPLEX array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. On exit, X is overwritten with the +* tranformed vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER (ZERO= (0.0E+0,0.0E+0)) +* .. +* .. Local Scalars .. + COMPLEX TEMP + INTEGER I,INFO,IX,J,JX,KX + LOGICAL NOCONJ,NOUNIT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG,MAX +* .. +* +* Test the input parameters. +* + INFO = 0 + IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN + INFO = 1 + ELSE IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND. + + .NOT.LSAME(TRANS,'C')) THEN + INFO = 2 + ELSE IF (.NOT.LSAME(DIAG,'U') .AND. .NOT.LSAME(DIAG,'N')) THEN + INFO = 3 + ELSE IF (N.LT.0) THEN + INFO = 4 + ELSE IF (LDA.LT.MAX(1,N)) THEN + INFO = 6 + ELSE IF (INCX.EQ.0) THEN + INFO = 8 + END IF + IF (INFO.NE.0) THEN + CALL XERBLA('CTRMV ',INFO) + RETURN + END IF +* +* Quick return if possible. +* + IF (N.EQ.0) RETURN +* + NOCONJ = LSAME(TRANS,'T') + NOUNIT = LSAME(DIAG,'N') +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF (INCX.LE.0) THEN + KX = 1 - (N-1)*INCX + ELSE IF (INCX.NE.1) THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* + IF (LSAME(TRANS,'N')) THEN +* +* Form x := A*x. +* + IF (LSAME(UPLO,'U')) THEN + IF (INCX.EQ.1) THEN + DO 20 J = 1,N + IF (X(J).NE.ZERO) THEN + TEMP = X(J) + DO 10 I = 1,J - 1 + X(I) = X(I) + TEMP*A(I,J) + 10 CONTINUE + IF (NOUNIT) X(J) = X(J)*A(J,J) + END IF + 20 CONTINUE + ELSE + JX = KX + DO 40 J = 1,N + IF (X(JX).NE.ZERO) THEN + TEMP = X(JX) + IX = KX + DO 30 I = 1,J - 1 + X(IX) = X(IX) + TEMP*A(I,J) + IX = IX + INCX + 30 CONTINUE + IF (NOUNIT) X(JX) = X(JX)*A(J,J) + END IF + JX = JX + INCX + 40 CONTINUE + END IF + ELSE + IF (INCX.EQ.1) THEN + DO 60 J = N,1,-1 + IF (X(J).NE.ZERO) THEN + TEMP = X(J) + DO 50 I = N,J + 1,-1 + X(I) = X(I) + TEMP*A(I,J) + 50 CONTINUE + IF (NOUNIT) X(J) = X(J)*A(J,J) + END IF + 60 CONTINUE + ELSE + KX = KX + (N-1)*INCX + JX = KX + DO 80 J = N,1,-1 + IF (X(JX).NE.ZERO) THEN + TEMP = X(JX) + IX = KX + DO 70 I = N,J + 1,-1 + X(IX) = X(IX) + TEMP*A(I,J) + IX = IX - INCX + 70 CONTINUE + IF (NOUNIT) X(JX) = X(JX)*A(J,J) + END IF + JX = JX - INCX + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := A'*x or x := conjg( A' )*x. +* + IF (LSAME(UPLO,'U')) THEN + IF (INCX.EQ.1) THEN + DO 110 J = N,1,-1 + TEMP = X(J) + IF (NOCONJ) THEN + IF (NOUNIT) TEMP = TEMP*A(J,J) + DO 90 I = J - 1,1,-1 + TEMP = TEMP + A(I,J)*X(I) + 90 CONTINUE + ELSE + IF (NOUNIT) TEMP = TEMP*CONJG(A(J,J)) + DO 100 I = J - 1,1,-1 + TEMP = TEMP + CONJG(A(I,J))*X(I) + 100 CONTINUE + END IF + X(J) = TEMP + 110 CONTINUE + ELSE + JX = KX + (N-1)*INCX + DO 140 J = N,1,-1 + TEMP = X(JX) + IX = JX + IF (NOCONJ) THEN + IF (NOUNIT) TEMP = TEMP*A(J,J) + DO 120 I = J - 1,1,-1 + IX = IX - INCX + TEMP = TEMP + A(I,J)*X(IX) + 120 CONTINUE + ELSE + IF (NOUNIT) TEMP = TEMP*CONJG(A(J,J)) + DO 130 I = J - 1,1,-1 + IX = IX - INCX + TEMP = TEMP + CONJG(A(I,J))*X(IX) + 130 CONTINUE + END IF + X(JX) = TEMP + JX = JX - INCX + 140 CONTINUE + END IF + ELSE + IF (INCX.EQ.1) THEN + DO 170 J = 1,N + TEMP = X(J) + IF (NOCONJ) THEN + IF (NOUNIT) TEMP = TEMP*A(J,J) + DO 150 I = J + 1,N + TEMP = TEMP + A(I,J)*X(I) + 150 CONTINUE + ELSE + IF (NOUNIT) TEMP = TEMP*CONJG(A(J,J)) + DO 160 I = J + 1,N + TEMP = TEMP + CONJG(A(I,J))*X(I) + 160 CONTINUE + END IF + X(J) = TEMP + 170 CONTINUE + ELSE + JX = KX + DO 200 J = 1,N + TEMP = X(JX) + IX = JX + IF (NOCONJ) THEN + IF (NOUNIT) TEMP = TEMP*A(J,J) + DO 180 I = J + 1,N + IX = IX + INCX + TEMP = TEMP + A(I,J)*X(IX) + 180 CONTINUE + ELSE + IF (NOUNIT) TEMP = TEMP*CONJG(A(J,J)) + DO 190 I = J + 1,N + IX = IX + INCX + TEMP = TEMP + CONJG(A(I,J))*X(IX) + 190 CONTINUE + END IF + X(JX) = TEMP + JX = JX + INCX + 200 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of CTRMV . +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas/ctrsm.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/blas/ctrsm.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,407 @@ + SUBROUTINE CTRSM(SIDE,UPLO,TRANSA,DIAG,M,N,ALPHA,A,LDA,B,LDB) +* .. Scalar Arguments .. + COMPLEX ALPHA + INTEGER LDA,LDB,M,N + CHARACTER DIAG,SIDE,TRANSA,UPLO +* .. +* .. Array Arguments .. + COMPLEX A(LDA,*),B(LDB,*) +* .. +* +* Purpose +* ======= +* +* CTRSM solves one of the matrix equations +* +* op( A )*X = alpha*B, or X*op( A ) = alpha*B, +* +* where alpha is a scalar, X and B are m by n matrices, A is a unit, or +* non-unit, upper or lower triangular matrix and op( A ) is one of +* +* op( A ) = A or op( A ) = A' or op( A ) = conjg( A' ). +* +* The matrix X is overwritten on B. +* +* Arguments +* ========== +* +* SIDE - CHARACTER*1. +* On entry, SIDE specifies whether op( A ) appears on the left +* or right of X as follows: +* +* SIDE = 'L' or 'l' op( A )*X = alpha*B. +* +* SIDE = 'R' or 'r' X*op( A ) = alpha*B. +* +* Unchanged on exit. +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix A is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANSA - CHARACTER*1. +* On entry, TRANSA specifies the form of op( A ) to be used in +* the matrix multiplication as follows: +* +* TRANSA = 'N' or 'n' op( A ) = A. +* +* TRANSA = 'T' or 't' op( A ) = A'. +* +* TRANSA = 'C' or 'c' op( A ) = conjg( A' ). +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit triangular +* as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of B. M must be at +* least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of B. N must be +* at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX . +* On entry, ALPHA specifies the scalar alpha. When alpha is +* zero then A is not referenced and B need not be set before +* entry. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, k ), where k is m +* when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. +* Before entry with UPLO = 'U' or 'u', the leading k by k +* upper triangular part of the array A must contain the upper +* triangular matrix and the strictly lower triangular part of +* A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading k by k +* lower triangular part of the array A must contain the lower +* triangular matrix and the strictly upper triangular part of +* A is not referenced. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced either, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When SIDE = 'L' or 'l' then +* LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' +* then LDA must be at least max( 1, n ). +* Unchanged on exit. +* +* B - COMPLEX array of DIMENSION ( LDB, n ). +* Before entry, the leading m by n part of the array B must +* contain the right-hand side matrix B, and on exit is +* overwritten by the solution matrix X. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. LDB must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG,MAX +* .. +* .. Local Scalars .. + COMPLEX TEMP + INTEGER I,INFO,J,K,NROWA + LOGICAL LSIDE,NOCONJ,NOUNIT,UPPER +* .. +* .. Parameters .. + COMPLEX ONE + PARAMETER (ONE= (1.0E+0,0.0E+0)) + COMPLEX ZERO + PARAMETER (ZERO= (0.0E+0,0.0E+0)) +* .. +* +* Test the input parameters. +* + LSIDE = LSAME(SIDE,'L') + IF (LSIDE) THEN + NROWA = M + ELSE + NROWA = N + END IF + NOCONJ = LSAME(TRANSA,'T') + NOUNIT = LSAME(DIAG,'N') + UPPER = LSAME(UPLO,'U') +* + INFO = 0 + IF ((.NOT.LSIDE) .AND. (.NOT.LSAME(SIDE,'R'))) THEN + INFO = 1 + ELSE IF ((.NOT.UPPER) .AND. (.NOT.LSAME(UPLO,'L'))) THEN + INFO = 2 + ELSE IF ((.NOT.LSAME(TRANSA,'N')) .AND. + + (.NOT.LSAME(TRANSA,'T')) .AND. + + (.NOT.LSAME(TRANSA,'C'))) THEN + INFO = 3 + ELSE IF ((.NOT.LSAME(DIAG,'U')) .AND. (.NOT.LSAME(DIAG,'N'))) THEN + INFO = 4 + ELSE IF (M.LT.0) THEN + INFO = 5 + ELSE IF (N.LT.0) THEN + INFO = 6 + ELSE IF (LDA.LT.MAX(1,NROWA)) THEN + INFO = 9 + ELSE IF (LDB.LT.MAX(1,M)) THEN + INFO = 11 + END IF + IF (INFO.NE.0) THEN + CALL XERBLA('CTRSM ',INFO) + RETURN + END IF +* +* Quick return if possible. +* + IF (M.EQ.0 .OR. N.EQ.0) RETURN +* +* And when alpha.eq.zero. +* + IF (ALPHA.EQ.ZERO) THEN + DO 20 J = 1,N + DO 10 I = 1,M + B(I,J) = ZERO + 10 CONTINUE + 20 CONTINUE + RETURN + END IF +* +* Start the operations. +* + IF (LSIDE) THEN + IF (LSAME(TRANSA,'N')) THEN +* +* Form B := alpha*inv( A )*B. +* + IF (UPPER) THEN + DO 60 J = 1,N + IF (ALPHA.NE.ONE) THEN + DO 30 I = 1,M + B(I,J) = ALPHA*B(I,J) + 30 CONTINUE + END IF + DO 50 K = M,1,-1 + IF (B(K,J).NE.ZERO) THEN + IF (NOUNIT) B(K,J) = B(K,J)/A(K,K) + DO 40 I = 1,K - 1 + B(I,J) = B(I,J) - B(K,J)*A(I,K) + 40 CONTINUE + END IF + 50 CONTINUE + 60 CONTINUE + ELSE + DO 100 J = 1,N + IF (ALPHA.NE.ONE) THEN + DO 70 I = 1,M + B(I,J) = ALPHA*B(I,J) + 70 CONTINUE + END IF + DO 90 K = 1,M + IF (B(K,J).NE.ZERO) THEN + IF (NOUNIT) B(K,J) = B(K,J)/A(K,K) + DO 80 I = K + 1,M + B(I,J) = B(I,J) - B(K,J)*A(I,K) + 80 CONTINUE + END IF + 90 CONTINUE + 100 CONTINUE + END IF + ELSE +* +* Form B := alpha*inv( A' )*B +* or B := alpha*inv( conjg( A' ) )*B. +* + IF (UPPER) THEN + DO 140 J = 1,N + DO 130 I = 1,M + TEMP = ALPHA*B(I,J) + IF (NOCONJ) THEN + DO 110 K = 1,I - 1 + TEMP = TEMP - A(K,I)*B(K,J) + 110 CONTINUE + IF (NOUNIT) TEMP = TEMP/A(I,I) + ELSE + DO 120 K = 1,I - 1 + TEMP = TEMP - CONJG(A(K,I))*B(K,J) + 120 CONTINUE + IF (NOUNIT) TEMP = TEMP/CONJG(A(I,I)) + END IF + B(I,J) = TEMP + 130 CONTINUE + 140 CONTINUE + ELSE + DO 180 J = 1,N + DO 170 I = M,1,-1 + TEMP = ALPHA*B(I,J) + IF (NOCONJ) THEN + DO 150 K = I + 1,M + TEMP = TEMP - A(K,I)*B(K,J) + 150 CONTINUE + IF (NOUNIT) TEMP = TEMP/A(I,I) + ELSE + DO 160 K = I + 1,M + TEMP = TEMP - CONJG(A(K,I))*B(K,J) + 160 CONTINUE + IF (NOUNIT) TEMP = TEMP/CONJG(A(I,I)) + END IF + B(I,J) = TEMP + 170 CONTINUE + 180 CONTINUE + END IF + END IF + ELSE + IF (LSAME(TRANSA,'N')) THEN +* +* Form B := alpha*B*inv( A ). +* + IF (UPPER) THEN + DO 230 J = 1,N + IF (ALPHA.NE.ONE) THEN + DO 190 I = 1,M + B(I,J) = ALPHA*B(I,J) + 190 CONTINUE + END IF + DO 210 K = 1,J - 1 + IF (A(K,J).NE.ZERO) THEN + DO 200 I = 1,M + B(I,J) = B(I,J) - A(K,J)*B(I,K) + 200 CONTINUE + END IF + 210 CONTINUE + IF (NOUNIT) THEN + TEMP = ONE/A(J,J) + DO 220 I = 1,M + B(I,J) = TEMP*B(I,J) + 220 CONTINUE + END IF + 230 CONTINUE + ELSE + DO 280 J = N,1,-1 + IF (ALPHA.NE.ONE) THEN + DO 240 I = 1,M + B(I,J) = ALPHA*B(I,J) + 240 CONTINUE + END IF + DO 260 K = J + 1,N + IF (A(K,J).NE.ZERO) THEN + DO 250 I = 1,M + B(I,J) = B(I,J) - A(K,J)*B(I,K) + 250 CONTINUE + END IF + 260 CONTINUE + IF (NOUNIT) THEN + TEMP = ONE/A(J,J) + DO 270 I = 1,M + B(I,J) = TEMP*B(I,J) + 270 CONTINUE + END IF + 280 CONTINUE + END IF + ELSE +* +* Form B := alpha*B*inv( A' ) +* or B := alpha*B*inv( conjg( A' ) ). +* + IF (UPPER) THEN + DO 330 K = N,1,-1 + IF (NOUNIT) THEN + IF (NOCONJ) THEN + TEMP = ONE/A(K,K) + ELSE + TEMP = ONE/CONJG(A(K,K)) + END IF + DO 290 I = 1,M + B(I,K) = TEMP*B(I,K) + 290 CONTINUE + END IF + DO 310 J = 1,K - 1 + IF (A(J,K).NE.ZERO) THEN + IF (NOCONJ) THEN + TEMP = A(J,K) + ELSE + TEMP = CONJG(A(J,K)) + END IF + DO 300 I = 1,M + B(I,J) = B(I,J) - TEMP*B(I,K) + 300 CONTINUE + END IF + 310 CONTINUE + IF (ALPHA.NE.ONE) THEN + DO 320 I = 1,M + B(I,K) = ALPHA*B(I,K) + 320 CONTINUE + END IF + 330 CONTINUE + ELSE + DO 380 K = 1,N + IF (NOUNIT) THEN + IF (NOCONJ) THEN + TEMP = ONE/A(K,K) + ELSE + TEMP = ONE/CONJG(A(K,K)) + END IF + DO 340 I = 1,M + B(I,K) = TEMP*B(I,K) + 340 CONTINUE + END IF + DO 360 J = K + 1,N + IF (A(J,K).NE.ZERO) THEN + IF (NOCONJ) THEN + TEMP = A(J,K) + ELSE + TEMP = CONJG(A(J,K)) + END IF + DO 350 I = 1,M + B(I,J) = B(I,J) - TEMP*B(I,K) + 350 CONTINUE + END IF + 360 CONTINUE + IF (ALPHA.NE.ONE) THEN + DO 370 I = 1,M + B(I,K) = ALPHA*B(I,K) + 370 CONTINUE + END IF + 380 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of CTRSM . +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas/ctrsv.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/blas/ctrsv.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,312 @@ + SUBROUTINE CTRSV(UPLO,TRANS,DIAG,N,A,LDA,X,INCX) +* .. Scalar Arguments .. + INTEGER INCX,LDA,N + CHARACTER DIAG,TRANS,UPLO +* .. +* .. Array Arguments .. + COMPLEX A(LDA,*),X(*) +* .. +* +* Purpose +* ======= +* +* CTRSV solves one of the systems of equations +* +* A*x = b, or A'*x = b, or conjg( A' )*x = b, +* +* where b and x are n element vectors and A is an n by n unit, or +* non-unit, upper or lower triangular matrix. +* +* No test for singularity or near-singularity is included in this +* routine. Such tests must be performed before calling this routine. +* +* Arguments +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the equations to be solved as +* follows: +* +* TRANS = 'N' or 'n' A*x = b. +* +* TRANS = 'T' or 't' A'*x = b. +* +* TRANS = 'C' or 'c' conjg( A' )*x = b. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular matrix and the strictly lower triangular part of +* A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular matrix and the strictly upper triangular part of +* A is not referenced. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced either, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* X - COMPLEX array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element right-hand side vector b. On exit, X is overwritten +* with the solution vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER (ZERO= (0.0E+0,0.0E+0)) +* .. +* .. Local Scalars .. + COMPLEX TEMP + INTEGER I,INFO,IX,J,JX,KX + LOGICAL NOCONJ,NOUNIT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG,MAX +* .. +* +* Test the input parameters. +* + INFO = 0 + IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN + INFO = 1 + ELSE IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND. + + .NOT.LSAME(TRANS,'C')) THEN + INFO = 2 + ELSE IF (.NOT.LSAME(DIAG,'U') .AND. .NOT.LSAME(DIAG,'N')) THEN + INFO = 3 + ELSE IF (N.LT.0) THEN + INFO = 4 + ELSE IF (LDA.LT.MAX(1,N)) THEN + INFO = 6 + ELSE IF (INCX.EQ.0) THEN + INFO = 8 + END IF + IF (INFO.NE.0) THEN + CALL XERBLA('CTRSV ',INFO) + RETURN + END IF +* +* Quick return if possible. +* + IF (N.EQ.0) RETURN +* + NOCONJ = LSAME(TRANS,'T') + NOUNIT = LSAME(DIAG,'N') +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF (INCX.LE.0) THEN + KX = 1 - (N-1)*INCX + ELSE IF (INCX.NE.1) THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* + IF (LSAME(TRANS,'N')) THEN +* +* Form x := inv( A )*x. +* + IF (LSAME(UPLO,'U')) THEN + IF (INCX.EQ.1) THEN + DO 20 J = N,1,-1 + IF (X(J).NE.ZERO) THEN + IF (NOUNIT) X(J) = X(J)/A(J,J) + TEMP = X(J) + DO 10 I = J - 1,1,-1 + X(I) = X(I) - TEMP*A(I,J) + 10 CONTINUE + END IF + 20 CONTINUE + ELSE + JX = KX + (N-1)*INCX + DO 40 J = N,1,-1 + IF (X(JX).NE.ZERO) THEN + IF (NOUNIT) X(JX) = X(JX)/A(J,J) + TEMP = X(JX) + IX = JX + DO 30 I = J - 1,1,-1 + IX = IX - INCX + X(IX) = X(IX) - TEMP*A(I,J) + 30 CONTINUE + END IF + JX = JX - INCX + 40 CONTINUE + END IF + ELSE + IF (INCX.EQ.1) THEN + DO 60 J = 1,N + IF (X(J).NE.ZERO) THEN + IF (NOUNIT) X(J) = X(J)/A(J,J) + TEMP = X(J) + DO 50 I = J + 1,N + X(I) = X(I) - TEMP*A(I,J) + 50 CONTINUE + END IF + 60 CONTINUE + ELSE + JX = KX + DO 80 J = 1,N + IF (X(JX).NE.ZERO) THEN + IF (NOUNIT) X(JX) = X(JX)/A(J,J) + TEMP = X(JX) + IX = JX + DO 70 I = J + 1,N + IX = IX + INCX + X(IX) = X(IX) - TEMP*A(I,J) + 70 CONTINUE + END IF + JX = JX + INCX + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := inv( A' )*x or x := inv( conjg( A' ) )*x. +* + IF (LSAME(UPLO,'U')) THEN + IF (INCX.EQ.1) THEN + DO 110 J = 1,N + TEMP = X(J) + IF (NOCONJ) THEN + DO 90 I = 1,J - 1 + TEMP = TEMP - A(I,J)*X(I) + 90 CONTINUE + IF (NOUNIT) TEMP = TEMP/A(J,J) + ELSE + DO 100 I = 1,J - 1 + TEMP = TEMP - CONJG(A(I,J))*X(I) + 100 CONTINUE + IF (NOUNIT) TEMP = TEMP/CONJG(A(J,J)) + END IF + X(J) = TEMP + 110 CONTINUE + ELSE + JX = KX + DO 140 J = 1,N + IX = KX + TEMP = X(JX) + IF (NOCONJ) THEN + DO 120 I = 1,J - 1 + TEMP = TEMP - A(I,J)*X(IX) + IX = IX + INCX + 120 CONTINUE + IF (NOUNIT) TEMP = TEMP/A(J,J) + ELSE + DO 130 I = 1,J - 1 + TEMP = TEMP - CONJG(A(I,J))*X(IX) + IX = IX + INCX + 130 CONTINUE + IF (NOUNIT) TEMP = TEMP/CONJG(A(J,J)) + END IF + X(JX) = TEMP + JX = JX + INCX + 140 CONTINUE + END IF + ELSE + IF (INCX.EQ.1) THEN + DO 170 J = N,1,-1 + TEMP = X(J) + IF (NOCONJ) THEN + DO 150 I = N,J + 1,-1 + TEMP = TEMP - A(I,J)*X(I) + 150 CONTINUE + IF (NOUNIT) TEMP = TEMP/A(J,J) + ELSE + DO 160 I = N,J + 1,-1 + TEMP = TEMP - CONJG(A(I,J))*X(I) + 160 CONTINUE + IF (NOUNIT) TEMP = TEMP/CONJG(A(J,J)) + END IF + X(J) = TEMP + 170 CONTINUE + ELSE + KX = KX + (N-1)*INCX + JX = KX + DO 200 J = N,1,-1 + IX = KX + TEMP = X(JX) + IF (NOCONJ) THEN + DO 180 I = N,J + 1,-1 + TEMP = TEMP - A(I,J)*X(IX) + IX = IX - INCX + 180 CONTINUE + IF (NOUNIT) TEMP = TEMP/A(J,J) + ELSE + DO 190 I = N,J + 1,-1 + TEMP = TEMP - CONJG(A(I,J))*X(IX) + IX = IX - INCX + 190 CONTINUE + IF (NOUNIT) TEMP = TEMP/CONJG(A(J,J)) + END IF + X(JX) = TEMP + JX = JX - INCX + 200 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of CTRSV . +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas/sasum.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/blas/sasum.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,59 @@ + REAL FUNCTION SASUM(N,SX,INCX) +* .. Scalar Arguments .. + INTEGER INCX,N +* .. +* .. Array Arguments .. + REAL SX(*) +* .. +* +* Purpose +* ======= +* +* takes the sum of the absolute values. +* uses unrolled loops for increment equal to one. +* jack dongarra, linpack, 3/11/78. +* modified 3/93 to return if incx .le. 0. +* modified 12/3/93, array(1) declarations changed to array(*) +* +* + +* .. Local Scalars .. + REAL STEMP + INTEGER I,M,MP1,NINCX +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS,MOD +* .. + SASUM = 0.0e0 + STEMP = 0.0e0 + IF (N.LE.0 .OR. INCX.LE.0) RETURN + IF (INCX.EQ.1) GO TO 20 +* +* code for increment not equal to 1 +* + NINCX = N*INCX + DO 10 I = 1,NINCX,INCX + STEMP = STEMP + ABS(SX(I)) + 10 CONTINUE + SASUM = STEMP + RETURN +* +* code for increment equal to 1 +* +* +* clean-up loop +* + 20 M = MOD(N,6) + IF (M.EQ.0) GO TO 40 + DO 30 I = 1,M + STEMP = STEMP + ABS(SX(I)) + 30 CONTINUE + IF (N.LT.6) GO TO 60 + 40 MP1 = M + 1 + DO 50 I = MP1,N,6 + STEMP = STEMP + ABS(SX(I)) + ABS(SX(I+1)) + ABS(SX(I+2)) + + + ABS(SX(I+3)) + ABS(SX(I+4)) + ABS(SX(I+5)) + 50 CONTINUE + 60 SASUM = STEMP + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas/saxpy.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/blas/saxpy.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,62 @@ + SUBROUTINE SAXPY(N,SA,SX,INCX,SY,INCY) +* .. Scalar Arguments .. + REAL SA + INTEGER INCX,INCY,N +* .. +* .. Array Arguments .. + REAL SX(*),SY(*) +* .. +* +* Purpose +* ======= +* +* SAXPY constant times a vector plus a vector. +* uses unrolled loop for increments equal to one. +* jack dongarra, linpack, 3/11/78. +* modified 12/3/93, array(1) declarations changed to array(*) +* +* +* .. Local Scalars .. + INTEGER I,IX,IY,M,MP1 +* .. +* .. Intrinsic Functions .. + INTRINSIC MOD +* .. + IF (N.LE.0) RETURN + IF (SA.EQ.0.0) RETURN + IF (INCX.EQ.1 .AND. INCY.EQ.1) GO TO 20 +* +* code for unequal increments or equal increments +* not equal to 1 +* + IX = 1 + IY = 1 + IF (INCX.LT.0) IX = (-N+1)*INCX + 1 + IF (INCY.LT.0) IY = (-N+1)*INCY + 1 + DO 10 I = 1,N + SY(IY) = SY(IY) + SA*SX(IX) + IX = IX + INCX + IY = IY + INCY + 10 CONTINUE + RETURN +* +* code for both increments equal to 1 +* +* +* clean-up loop +* + 20 M = MOD(N,4) + IF (M.EQ.0) GO TO 40 + DO 30 I = 1,M + SY(I) = SY(I) + SA*SX(I) + 30 CONTINUE + IF (N.LT.4) RETURN + 40 MP1 = M + 1 + DO 50 I = MP1,N,4 + SY(I) = SY(I) + SA*SX(I) + SY(I+1) = SY(I+1) + SA*SX(I+1) + SY(I+2) = SY(I+2) + SA*SX(I+2) + SY(I+3) = SY(I+3) + SA*SX(I+3) + 50 CONTINUE + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas/scabs1.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/blas/scabs1.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,16 @@ + REAL FUNCTION SCABS1(Z) +* .. Scalar Arguments .. + COMPLEX Z +* .. +* +* Purpose +* ======= +* +* SCABS1 computes absolute value of a complex number +* +* .. Intrinsic Functions .. + INTRINSIC ABS,AIMAG,REAL +* .. + SCABS1 = ABS(REAL(Z)) + ABS(AIMAG(Z)) + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas/scasum.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/blas/scasum.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,47 @@ + REAL FUNCTION SCASUM(N,CX,INCX) +* .. Scalar Arguments .. + INTEGER INCX,N +* .. +* .. Array Arguments .. + COMPLEX CX(*) +* .. +* +* Purpose +* ======= +* +* takes the sum of the absolute values of a complex vector and +* returns a single precision result. +* jack dongarra, linpack, 3/11/78. +* modified 3/93 to return if incx .le. 0. +* modified 12/3/93, array(1) declarations changed to array(*) +* +* +* .. Local Scalars .. + REAL STEMP + INTEGER I,NINCX +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS,AIMAG,REAL +* .. + SCASUM = 0.0e0 + STEMP = 0.0e0 + IF (N.LE.0 .OR. INCX.LE.0) RETURN + IF (INCX.EQ.1) GO TO 20 +* +* code for increment not equal to 1 +* + NINCX = N*INCX + DO 10 I = 1,NINCX,INCX + STEMP = STEMP + ABS(REAL(CX(I))) + ABS(AIMAG(CX(I))) + 10 CONTINUE + SCASUM = STEMP + RETURN +* +* code for increment equal to 1 +* + 20 DO 30 I = 1,N + STEMP = STEMP + ABS(REAL(CX(I))) + ABS(AIMAG(CX(I))) + 30 CONTINUE + SCASUM = STEMP + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas/scnrm2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/blas/scnrm2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,72 @@ + REAL FUNCTION SCNRM2(N,X,INCX) +* .. Scalar Arguments .. + INTEGER INCX,N +* .. +* .. Array Arguments .. + COMPLEX X(*) +* .. +* +* Purpose +* ======= +* +* SCNRM2 returns the euclidean norm of a vector via the function +* name, so that +* +* SCNRM2 := sqrt( conjg( x' )*x ) +* +* +* +* -- This version written on 25-October-1982. +* Modified on 14-October-1993 to inline the call to CLASSQ. +* Sven Hammarling, Nag Ltd. +* +* +* .. Parameters .. + REAL ONE,ZERO + PARAMETER (ONE=1.0E+0,ZERO=0.0E+0) +* .. +* .. Local Scalars .. + REAL NORM,SCALE,SSQ,TEMP + INTEGER IX +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS,AIMAG,REAL,SQRT +* .. + IF (N.LT.1 .OR. INCX.LT.1) THEN + NORM = ZERO + ELSE + SCALE = ZERO + SSQ = ONE +* The following loop is equivalent to this call to the LAPACK +* auxiliary routine: +* CALL CLASSQ( N, X, INCX, SCALE, SSQ ) +* + DO 10 IX = 1,1 + (N-1)*INCX,INCX + IF (REAL(X(IX)).NE.ZERO) THEN + TEMP = ABS(REAL(X(IX))) + IF (SCALE.LT.TEMP) THEN + SSQ = ONE + SSQ* (SCALE/TEMP)**2 + SCALE = TEMP + ELSE + SSQ = SSQ + (TEMP/SCALE)**2 + END IF + END IF + IF (AIMAG(X(IX)).NE.ZERO) THEN + TEMP = ABS(AIMAG(X(IX))) + IF (SCALE.LT.TEMP) THEN + SSQ = ONE + SSQ* (SCALE/TEMP)**2 + SCALE = TEMP + ELSE + SSQ = SSQ + (TEMP/SCALE)**2 + END IF + END IF + 10 CONTINUE + NORM = SCALE*SQRT(SSQ) + END IF +* + SCNRM2 = NORM + RETURN +* +* End of SCNRM2. +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas/scopy.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/blas/scopy.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,63 @@ + SUBROUTINE SCOPY(N,SX,INCX,SY,INCY) +* .. Scalar Arguments .. + INTEGER INCX,INCY,N +* .. +* .. Array Arguments .. + REAL SX(*),SY(*) +* .. +* +* Purpose +* ======= +* +* copies a vector, x, to a vector, y. +* uses unrolled loops for increments equal to 1. +* jack dongarra, linpack, 3/11/78. +* modified 12/3/93, array(1) declarations changed to array(*) +* +* +* .. Local Scalars .. + INTEGER I,IX,IY,M,MP1 +* .. +* .. Intrinsic Functions .. + INTRINSIC MOD +* .. + IF (N.LE.0) RETURN + IF (INCX.EQ.1 .AND. INCY.EQ.1) GO TO 20 +* +* code for unequal increments or equal increments +* not equal to 1 +* + IX = 1 + IY = 1 + IF (INCX.LT.0) IX = (-N+1)*INCX + 1 + IF (INCY.LT.0) IY = (-N+1)*INCY + 1 + DO 10 I = 1,N + SY(IY) = SX(IX) + IX = IX + INCX + IY = IY + INCY + 10 CONTINUE + RETURN +* +* code for both increments equal to 1 +* +* +* clean-up loop +* + 20 M = MOD(N,7) + IF (M.EQ.0) GO TO 40 + DO 30 I = 1,M + SY(I) = SX(I) + 30 CONTINUE + IF (N.LT.7) RETURN + 40 MP1 = M + 1 + DO 50 I = MP1,N,7 + SY(I) = SX(I) + SY(I+1) = SX(I+1) + SY(I+2) = SX(I+2) + SY(I+3) = SX(I+3) + SY(I+4) = SX(I+4) + SY(I+5) = SX(I+5) + SY(I+6) = SX(I+6) + 50 CONTINUE + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas/sdot.f --- a/libcruft/blas/sdot.f Wed May 14 18:09:56 2008 +0200 +++ b/libcruft/blas/sdot.f Sun Apr 27 22:34:17 2008 +0200 @@ -1,49 +1,64 @@ - real function sdot(n,sx,incx,sy,incy) -c -c forms the dot product of two vectors. -c uses unrolled loops for increments equal to one. -c jack dongarra, linpack, 3/11/78. -c modified 12/3/93, array(1) declarations changed to array(*) -c - real sx(*),sy(*),stemp - integer i,incx,incy,ix,iy,m,mp1,n -c - stemp = 0.0e0 - sdot = 0.0e0 - if(n.le.0)return - if(incx.eq.1.and.incy.eq.1)go to 20 -c -c code for unequal increments or equal increments -c not equal to 1 -c - ix = 1 - iy = 1 - if(incx.lt.0)ix = (-n+1)*incx + 1 - if(incy.lt.0)iy = (-n+1)*incy + 1 - do 10 i = 1,n - stemp = stemp + sx(ix)*sy(iy) - ix = ix + incx - iy = iy + incy - 10 continue - sdot = stemp - return -c -c code for both increments equal to 1 -c -c -c clean-up loop -c - 20 m = mod(n,5) - if( m .eq. 0 ) go to 40 - do 30 i = 1,m - stemp = stemp + sx(i)*sy(i) - 30 continue - if( n .lt. 5 ) go to 60 - 40 mp1 = m + 1 - do 50 i = mp1,n,5 - stemp = stemp + sx(i)*sy(i) + sx(i + 1)*sy(i + 1) + - * sx(i + 2)*sy(i + 2) + sx(i + 3)*sy(i + 3) + sx(i + 4)*sy(i + 4) - 50 continue - 60 sdot = stemp - return - end + REAL FUNCTION SDOT(N,SX,INCX,SY,INCY) +* .. Scalar Arguments .. + INTEGER INCX,INCY,N +* .. +* .. Array Arguments .. + REAL SX(*),SY(*) +* .. +* +* Purpose +* ======= +* +* forms the dot product of two vectors. +* uses unrolled loops for increments equal to one. +* jack dongarra, linpack, 3/11/78. +* modified 12/3/93, array(1) declarations changed to array(*) +* +* + +* .. Local Scalars .. + REAL STEMP + INTEGER I,IX,IY,M,MP1 +* .. +* .. Intrinsic Functions .. + INTRINSIC MOD +* .. + STEMP = 0.0e0 + SDOT = 0.0e0 + IF (N.LE.0) RETURN + IF (INCX.EQ.1 .AND. INCY.EQ.1) GO TO 20 +* +* code for unequal increments or equal increments +* not equal to 1 +* + IX = 1 + IY = 1 + IF (INCX.LT.0) IX = (-N+1)*INCX + 1 + IF (INCY.LT.0) IY = (-N+1)*INCY + 1 + DO 10 I = 1,N + STEMP = STEMP + SX(IX)*SY(IY) + IX = IX + INCX + IY = IY + INCY + 10 CONTINUE + SDOT = STEMP + RETURN +* +* code for both increments equal to 1 +* +* +* clean-up loop +* + 20 M = MOD(N,5) + IF (M.EQ.0) GO TO 40 + DO 30 I = 1,M + STEMP = STEMP + SX(I)*SY(I) + 30 CONTINUE + IF (N.LT.5) GO TO 60 + 40 MP1 = M + 1 + DO 50 I = MP1,N,5 + STEMP = STEMP + SX(I)*SY(I) + SX(I+1)*SY(I+1) + + + SX(I+2)*SY(I+2) + SX(I+3)*SY(I+3) + SX(I+4)*SY(I+4) + 50 CONTINUE + 60 SDOT = STEMP + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas/sgemm.f --- a/libcruft/blas/sgemm.f Wed May 14 18:09:56 2008 +0200 +++ b/libcruft/blas/sgemm.f Sun Apr 27 22:34:17 2008 +0200 @@ -1,11 +1,11 @@ - SUBROUTINE SGEMM ( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, - $ BETA, C, LDC ) + SUBROUTINE SGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC) * .. Scalar Arguments .. - CHARACTER*1 TRANSA, TRANSB - INTEGER M, N, K, LDA, LDB, LDC - REAL ALPHA, BETA + REAL ALPHA,BETA + INTEGER K,LDA,LDB,LDC,M,N + CHARACTER TRANSA,TRANSB +* .. * .. Array Arguments .. - REAL A( LDA, * ), B( LDB, * ), C( LDC, * ) + REAL A(LDA,*),B(LDB,*),C(LDC,*) * .. * * Purpose @@ -22,7 +22,7 @@ * alpha and beta are scalars, and A, B and C are matrices, with op( A ) * an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. * -* Parameters +* Arguments * ========== * * TRANSA - CHARACTER*1. @@ -129,181 +129,181 @@ * * * .. External Functions .. - LOGICAL LSAME - EXTERNAL LSAME + LOGICAL LSAME + EXTERNAL LSAME +* .. * .. External Subroutines .. - EXTERNAL XERBLA + EXTERNAL XERBLA +* .. * .. Intrinsic Functions .. - INTRINSIC MAX + INTRINSIC MAX +* .. * .. Local Scalars .. - LOGICAL NOTA, NOTB - INTEGER I, INFO, J, L, NCOLA, NROWA, NROWB - REAL TEMP + REAL TEMP + INTEGER I,INFO,J,L,NCOLA,NROWA,NROWB + LOGICAL NOTA,NOTB +* .. * .. Parameters .. - REAL ONE , ZERO - PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) + REAL ONE,ZERO + PARAMETER (ONE=1.0E+0,ZERO=0.0E+0) * .. -* .. Executable Statements .. * * Set NOTA and NOTB as true if A and B respectively are not * transposed and set NROWA, NCOLA and NROWB as the number of rows * and columns of A and the number of rows of B respectively. * - NOTA = LSAME( TRANSA, 'N' ) - NOTB = LSAME( TRANSB, 'N' ) - IF( NOTA )THEN - NROWA = M - NCOLA = K + NOTA = LSAME(TRANSA,'N') + NOTB = LSAME(TRANSB,'N') + IF (NOTA) THEN + NROWA = M + NCOLA = K ELSE - NROWA = K - NCOLA = M + NROWA = K + NCOLA = M END IF - IF( NOTB )THEN - NROWB = K + IF (NOTB) THEN + NROWB = K ELSE - NROWB = N + NROWB = N END IF * * Test the input parameters. * INFO = 0 - IF( ( .NOT.NOTA ).AND. - $ ( .NOT.LSAME( TRANSA, 'C' ) ).AND. - $ ( .NOT.LSAME( TRANSA, 'T' ) ) )THEN - INFO = 1 - ELSE IF( ( .NOT.NOTB ).AND. - $ ( .NOT.LSAME( TRANSB, 'C' ) ).AND. - $ ( .NOT.LSAME( TRANSB, 'T' ) ) )THEN - INFO = 2 - ELSE IF( M .LT.0 )THEN - INFO = 3 - ELSE IF( N .LT.0 )THEN - INFO = 4 - ELSE IF( K .LT.0 )THEN - INFO = 5 - ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN - INFO = 8 - ELSE IF( LDB.LT.MAX( 1, NROWB ) )THEN - INFO = 10 - ELSE IF( LDC.LT.MAX( 1, M ) )THEN - INFO = 13 + IF ((.NOT.NOTA) .AND. (.NOT.LSAME(TRANSA,'C')) .AND. + + (.NOT.LSAME(TRANSA,'T'))) THEN + INFO = 1 + ELSE IF ((.NOT.NOTB) .AND. (.NOT.LSAME(TRANSB,'C')) .AND. + + (.NOT.LSAME(TRANSB,'T'))) THEN + INFO = 2 + ELSE IF (M.LT.0) THEN + INFO = 3 + ELSE IF (N.LT.0) THEN + INFO = 4 + ELSE IF (K.LT.0) THEN + INFO = 5 + ELSE IF (LDA.LT.MAX(1,NROWA)) THEN + INFO = 8 + ELSE IF (LDB.LT.MAX(1,NROWB)) THEN + INFO = 10 + ELSE IF (LDC.LT.MAX(1,M)) THEN + INFO = 13 END IF - IF( INFO.NE.0 )THEN - CALL XERBLA( 'SGEMM ', INFO ) - RETURN + IF (INFO.NE.0) THEN + CALL XERBLA('SGEMM ',INFO) + RETURN END IF * * Quick return if possible. * - IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. - $ ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) ) - $ RETURN + IF ((M.EQ.0) .OR. (N.EQ.0) .OR. + + (((ALPHA.EQ.ZERO).OR. (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN * * And if alpha.eq.zero. * - IF( ALPHA.EQ.ZERO )THEN - IF( BETA.EQ.ZERO )THEN - DO 20, J = 1, N - DO 10, I = 1, M - C( I, J ) = ZERO - 10 CONTINUE - 20 CONTINUE - ELSE - DO 40, J = 1, N - DO 30, I = 1, M - C( I, J ) = BETA*C( I, J ) - 30 CONTINUE - 40 CONTINUE - END IF - RETURN + IF (ALPHA.EQ.ZERO) THEN + IF (BETA.EQ.ZERO) THEN + DO 20 J = 1,N + DO 10 I = 1,M + C(I,J) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1,N + DO 30 I = 1,M + C(I,J) = BETA*C(I,J) + 30 CONTINUE + 40 CONTINUE + END IF + RETURN END IF * * Start the operations. * - IF( NOTB )THEN - IF( NOTA )THEN + IF (NOTB) THEN + IF (NOTA) THEN * * Form C := alpha*A*B + beta*C. * - DO 90, J = 1, N - IF( BETA.EQ.ZERO )THEN - DO 50, I = 1, M - C( I, J ) = ZERO - 50 CONTINUE - ELSE IF( BETA.NE.ONE )THEN - DO 60, I = 1, M - C( I, J ) = BETA*C( I, J ) - 60 CONTINUE - END IF - DO 80, L = 1, K - IF( B( L, J ).NE.ZERO )THEN - TEMP = ALPHA*B( L, J ) - DO 70, I = 1, M - C( I, J ) = C( I, J ) + TEMP*A( I, L ) - 70 CONTINUE + DO 90 J = 1,N + IF (BETA.EQ.ZERO) THEN + DO 50 I = 1,M + C(I,J) = ZERO + 50 CONTINUE + ELSE IF (BETA.NE.ONE) THEN + DO 60 I = 1,M + C(I,J) = BETA*C(I,J) + 60 CONTINUE END IF - 80 CONTINUE - 90 CONTINUE - ELSE + DO 80 L = 1,K + IF (B(L,J).NE.ZERO) THEN + TEMP = ALPHA*B(L,J) + DO 70 I = 1,M + C(I,J) = C(I,J) + TEMP*A(I,L) + 70 CONTINUE + END IF + 80 CONTINUE + 90 CONTINUE + ELSE * * Form C := alpha*A'*B + beta*C * - DO 120, J = 1, N - DO 110, I = 1, M - TEMP = ZERO - DO 100, L = 1, K - TEMP = TEMP + A( L, I )*B( L, J ) - 100 CONTINUE - IF( BETA.EQ.ZERO )THEN - C( I, J ) = ALPHA*TEMP - ELSE - C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) - END IF - 110 CONTINUE - 120 CONTINUE - END IF + DO 120 J = 1,N + DO 110 I = 1,M + TEMP = ZERO + DO 100 L = 1,K + TEMP = TEMP + A(L,I)*B(L,J) + 100 CONTINUE + IF (BETA.EQ.ZERO) THEN + C(I,J) = ALPHA*TEMP + ELSE + C(I,J) = ALPHA*TEMP + BETA*C(I,J) + END IF + 110 CONTINUE + 120 CONTINUE + END IF ELSE - IF( NOTA )THEN + IF (NOTA) THEN * * Form C := alpha*A*B' + beta*C * - DO 170, J = 1, N - IF( BETA.EQ.ZERO )THEN - DO 130, I = 1, M - C( I, J ) = ZERO - 130 CONTINUE - ELSE IF( BETA.NE.ONE )THEN - DO 140, I = 1, M - C( I, J ) = BETA*C( I, J ) - 140 CONTINUE - END IF - DO 160, L = 1, K - IF( B( J, L ).NE.ZERO )THEN - TEMP = ALPHA*B( J, L ) - DO 150, I = 1, M - C( I, J ) = C( I, J ) + TEMP*A( I, L ) - 150 CONTINUE + DO 170 J = 1,N + IF (BETA.EQ.ZERO) THEN + DO 130 I = 1,M + C(I,J) = ZERO + 130 CONTINUE + ELSE IF (BETA.NE.ONE) THEN + DO 140 I = 1,M + C(I,J) = BETA*C(I,J) + 140 CONTINUE END IF - 160 CONTINUE - 170 CONTINUE - ELSE + DO 160 L = 1,K + IF (B(J,L).NE.ZERO) THEN + TEMP = ALPHA*B(J,L) + DO 150 I = 1,M + C(I,J) = C(I,J) + TEMP*A(I,L) + 150 CONTINUE + END IF + 160 CONTINUE + 170 CONTINUE + ELSE * * Form C := alpha*A'*B' + beta*C * - DO 200, J = 1, N - DO 190, I = 1, M - TEMP = ZERO - DO 180, L = 1, K - TEMP = TEMP + A( L, I )*B( J, L ) - 180 CONTINUE - IF( BETA.EQ.ZERO )THEN - C( I, J ) = ALPHA*TEMP - ELSE - C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) - END IF - 190 CONTINUE - 200 CONTINUE - END IF + DO 200 J = 1,N + DO 190 I = 1,M + TEMP = ZERO + DO 180 L = 1,K + TEMP = TEMP + A(L,I)*B(J,L) + 180 CONTINUE + IF (BETA.EQ.ZERO) THEN + C(I,J) = ALPHA*TEMP + ELSE + C(I,J) = ALPHA*TEMP + BETA*C(I,J) + END IF + 190 CONTINUE + 200 CONTINUE + END IF END IF * RETURN diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas/sgemv.f --- a/libcruft/blas/sgemv.f Wed May 14 18:09:56 2008 +0200 +++ b/libcruft/blas/sgemv.f Sun Apr 27 22:34:17 2008 +0200 @@ -1,11 +1,11 @@ - SUBROUTINE SGEMV ( TRANS, M, N, ALPHA, A, LDA, X, INCX, - $ BETA, Y, INCY ) + SUBROUTINE SGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY) * .. Scalar Arguments .. - REAL ALPHA, BETA - INTEGER INCX, INCY, LDA, M, N - CHARACTER*1 TRANS + REAL ALPHA,BETA + INTEGER INCX,INCY,LDA,M,N + CHARACTER TRANS +* .. * .. Array Arguments .. - REAL A( LDA, * ), X( * ), Y( * ) + REAL A(LDA,*),X(*),Y(*) * .. * * Purpose @@ -18,7 +18,7 @@ * where alpha and beta are scalars, x and y are vectors and A is an * m by n matrix. * -* Parameters +* Arguments * ========== * * TRANS - CHARACTER*1. @@ -100,69 +100,70 @@ * * * .. Parameters .. - REAL ONE , ZERO - PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) + REAL ONE,ZERO + PARAMETER (ONE=1.0E+0,ZERO=0.0E+0) +* .. * .. Local Scalars .. - REAL TEMP - INTEGER I, INFO, IX, IY, J, JX, JY, KX, KY, LENX, LENY + REAL TEMP + INTEGER I,INFO,IX,IY,J,JX,JY,KX,KY,LENX,LENY +* .. * .. External Functions .. - LOGICAL LSAME - EXTERNAL LSAME + LOGICAL LSAME + EXTERNAL LSAME +* .. * .. External Subroutines .. - EXTERNAL XERBLA + EXTERNAL XERBLA +* .. * .. Intrinsic Functions .. - INTRINSIC MAX + INTRINSIC MAX * .. -* .. Executable Statements .. * * Test the input parameters. * INFO = 0 - IF ( .NOT.LSAME( TRANS, 'N' ).AND. - $ .NOT.LSAME( TRANS, 'T' ).AND. - $ .NOT.LSAME( TRANS, 'C' ) )THEN - INFO = 1 - ELSE IF( M.LT.0 )THEN - INFO = 2 - ELSE IF( N.LT.0 )THEN - INFO = 3 - ELSE IF( LDA.LT.MAX( 1, M ) )THEN - INFO = 6 - ELSE IF( INCX.EQ.0 )THEN - INFO = 8 - ELSE IF( INCY.EQ.0 )THEN - INFO = 11 + IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND. + + .NOT.LSAME(TRANS,'C')) THEN + INFO = 1 + ELSE IF (M.LT.0) THEN + INFO = 2 + ELSE IF (N.LT.0) THEN + INFO = 3 + ELSE IF (LDA.LT.MAX(1,M)) THEN + INFO = 6 + ELSE IF (INCX.EQ.0) THEN + INFO = 8 + ELSE IF (INCY.EQ.0) THEN + INFO = 11 END IF - IF( INFO.NE.0 )THEN - CALL XERBLA( 'SGEMV ', INFO ) - RETURN + IF (INFO.NE.0) THEN + CALL XERBLA('SGEMV ',INFO) + RETURN END IF * * Quick return if possible. * - IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. - $ ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) - $ RETURN + IF ((M.EQ.0) .OR. (N.EQ.0) .OR. + + ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN * * Set LENX and LENY, the lengths of the vectors x and y, and set * up the start points in X and Y. * - IF( LSAME( TRANS, 'N' ) )THEN - LENX = N - LENY = M + IF (LSAME(TRANS,'N')) THEN + LENX = N + LENY = M ELSE - LENX = M - LENY = N + LENX = M + LENY = N END IF - IF( INCX.GT.0 )THEN - KX = 1 + IF (INCX.GT.0) THEN + KX = 1 ELSE - KX = 1 - ( LENX - 1 )*INCX + KX = 1 - (LENX-1)*INCX END IF - IF( INCY.GT.0 )THEN - KY = 1 + IF (INCY.GT.0) THEN + KY = 1 ELSE - KY = 1 - ( LENY - 1 )*INCY + KY = 1 - (LENY-1)*INCY END IF * * Start the operations. In this version the elements of A are @@ -170,88 +171,87 @@ * * First form y := beta*y. * - IF( BETA.NE.ONE )THEN - IF( INCY.EQ.1 )THEN - IF( BETA.EQ.ZERO )THEN - DO 10, I = 1, LENY - Y( I ) = ZERO - 10 CONTINUE - ELSE - DO 20, I = 1, LENY - Y( I ) = BETA*Y( I ) - 20 CONTINUE - END IF - ELSE - IY = KY - IF( BETA.EQ.ZERO )THEN - DO 30, I = 1, LENY - Y( IY ) = ZERO - IY = IY + INCY - 30 CONTINUE - ELSE - DO 40, I = 1, LENY - Y( IY ) = BETA*Y( IY ) - IY = IY + INCY - 40 CONTINUE - END IF - END IF + IF (BETA.NE.ONE) THEN + IF (INCY.EQ.1) THEN + IF (BETA.EQ.ZERO) THEN + DO 10 I = 1,LENY + Y(I) = ZERO + 10 CONTINUE + ELSE + DO 20 I = 1,LENY + Y(I) = BETA*Y(I) + 20 CONTINUE + END IF + ELSE + IY = KY + IF (BETA.EQ.ZERO) THEN + DO 30 I = 1,LENY + Y(IY) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40 I = 1,LENY + Y(IY) = BETA*Y(IY) + IY = IY + INCY + 40 CONTINUE + END IF + END IF END IF - IF( ALPHA.EQ.ZERO ) - $ RETURN - IF( LSAME( TRANS, 'N' ) )THEN + IF (ALPHA.EQ.ZERO) RETURN + IF (LSAME(TRANS,'N')) THEN * * Form y := alpha*A*x + y. * - JX = KX - IF( INCY.EQ.1 )THEN - DO 60, J = 1, N - IF( X( JX ).NE.ZERO )THEN - TEMP = ALPHA*X( JX ) - DO 50, I = 1, M - Y( I ) = Y( I ) + TEMP*A( I, J ) - 50 CONTINUE - END IF - JX = JX + INCX - 60 CONTINUE - ELSE - DO 80, J = 1, N - IF( X( JX ).NE.ZERO )THEN - TEMP = ALPHA*X( JX ) - IY = KY - DO 70, I = 1, M - Y( IY ) = Y( IY ) + TEMP*A( I, J ) - IY = IY + INCY - 70 CONTINUE - END IF - JX = JX + INCX - 80 CONTINUE - END IF + JX = KX + IF (INCY.EQ.1) THEN + DO 60 J = 1,N + IF (X(JX).NE.ZERO) THEN + TEMP = ALPHA*X(JX) + DO 50 I = 1,M + Y(I) = Y(I) + TEMP*A(I,J) + 50 CONTINUE + END IF + JX = JX + INCX + 60 CONTINUE + ELSE + DO 80 J = 1,N + IF (X(JX).NE.ZERO) THEN + TEMP = ALPHA*X(JX) + IY = KY + DO 70 I = 1,M + Y(IY) = Y(IY) + TEMP*A(I,J) + IY = IY + INCY + 70 CONTINUE + END IF + JX = JX + INCX + 80 CONTINUE + END IF ELSE * * Form y := alpha*A'*x + y. * - JY = KY - IF( INCX.EQ.1 )THEN - DO 100, J = 1, N - TEMP = ZERO - DO 90, I = 1, M - TEMP = TEMP + A( I, J )*X( I ) - 90 CONTINUE - Y( JY ) = Y( JY ) + ALPHA*TEMP - JY = JY + INCY - 100 CONTINUE - ELSE - DO 120, J = 1, N - TEMP = ZERO - IX = KX - DO 110, I = 1, M - TEMP = TEMP + A( I, J )*X( IX ) - IX = IX + INCX - 110 CONTINUE - Y( JY ) = Y( JY ) + ALPHA*TEMP - JY = JY + INCY - 120 CONTINUE - END IF + JY = KY + IF (INCX.EQ.1) THEN + DO 100 J = 1,N + TEMP = ZERO + DO 90 I = 1,M + TEMP = TEMP + A(I,J)*X(I) + 90 CONTINUE + Y(JY) = Y(JY) + ALPHA*TEMP + JY = JY + INCY + 100 CONTINUE + ELSE + DO 120 J = 1,N + TEMP = ZERO + IX = KX + DO 110 I = 1,M + TEMP = TEMP + A(I,J)*X(IX) + IX = IX + INCX + 110 CONTINUE + Y(JY) = Y(JY) + ALPHA*TEMP + JY = JY + INCY + 120 CONTINUE + END IF END IF * RETURN diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas/sger.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/blas/sger.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,159 @@ + SUBROUTINE SGER(M,N,ALPHA,X,INCX,Y,INCY,A,LDA) +* .. Scalar Arguments .. + REAL ALPHA + INTEGER INCX,INCY,LDA,M,N +* .. +* .. Array Arguments .. + REAL A(LDA,*),X(*),Y(*) +* .. +* +* Purpose +* ======= +* +* SGER performs the rank 1 operation +* +* A := alpha*x*y' + A, +* +* where alpha is a scalar, x is an m element vector, y is an n element +* vector and A is an m by n matrix. +* +* Arguments +* ========== +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix A. +* M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - REAL . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - REAL array of dimension at least +* ( 1 + ( m - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the m +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* Y - REAL array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. +* Unchanged on exit. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* A - REAL array of DIMENSION ( LDA, n ). +* Before entry, the leading m by n part of the array A must +* contain the matrix of coefficients. On exit, A is +* overwritten by the updated matrix. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + REAL ZERO + PARAMETER (ZERO=0.0E+0) +* .. +* .. Local Scalars .. + REAL TEMP + INTEGER I,INFO,IX,J,JY,KX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* +* Test the input parameters. +* + INFO = 0 + IF (M.LT.0) THEN + INFO = 1 + ELSE IF (N.LT.0) THEN + INFO = 2 + ELSE IF (INCX.EQ.0) THEN + INFO = 5 + ELSE IF (INCY.EQ.0) THEN + INFO = 7 + ELSE IF (LDA.LT.MAX(1,M)) THEN + INFO = 9 + END IF + IF (INFO.NE.0) THEN + CALL XERBLA('SGER ',INFO) + RETURN + END IF +* +* Quick return if possible. +* + IF ((M.EQ.0) .OR. (N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* + IF (INCY.GT.0) THEN + JY = 1 + ELSE + JY = 1 - (N-1)*INCY + END IF + IF (INCX.EQ.1) THEN + DO 20 J = 1,N + IF (Y(JY).NE.ZERO) THEN + TEMP = ALPHA*Y(JY) + DO 10 I = 1,M + A(I,J) = A(I,J) + X(I)*TEMP + 10 CONTINUE + END IF + JY = JY + INCY + 20 CONTINUE + ELSE + IF (INCX.GT.0) THEN + KX = 1 + ELSE + KX = 1 - (M-1)*INCX + END IF + DO 40 J = 1,N + IF (Y(JY).NE.ZERO) THEN + TEMP = ALPHA*Y(JY) + IX = KX + DO 30 I = 1,M + A(I,J) = A(I,J) + X(IX)*TEMP + IX = IX + INCX + 30 CONTINUE + END IF + JY = JY + INCY + 40 CONTINUE + END IF +* + RETURN +* +* End of SGER . +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas/smach.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/blas/smach.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,59 @@ + real function smach(job) + integer job +c +c smach computes machine parameters of floating point +c arithmetic for use in testing only. not required by +c linpack proper. +c +c if trouble with automatic computation of these quantities, +c they can be set by direct assignment statements. +c assume the computer has +c +c b = base of arithmetic +c t = number of base b digits +c l = smallest possible exponent +c u = largest possible exponent +c +c then +c +c eps = b**(1-t) +c tiny = 100.0*b**(-l+t) +c huge = 0.01*b**(u-t) +c +c dmach same as smach except t, l, u apply to +c double precision. +c +c cmach same as smach except if complex division +c is done by +c +c 1/(x+i*y) = (x-i*y)/(x**2+y**2) +c +c then +c +c tiny = sqrt(tiny) +c huge = sqrt(huge) +c +c +c job is 1, 2 or 3 for epsilon, tiny and huge, respectively. +c +c + real eps,tiny,huge,s +c + eps = 1.0 + 10 eps = eps/2.0 + s = 1.0 + eps + if (s .gt. 1.0) go to 10 + eps = 2.0*eps +c + s = 1.0 + 20 tiny = s + s = s/16.0 + if (s*100. .ne. 0.0) go to 20 + tiny = (tiny/eps)*100.0 + huge = 1.0/tiny +c + if (job .eq. 1) smach = eps + if (job .eq. 2) smach = tiny + if (job .eq. 3) smach = huge + return + end diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas/snrm2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/blas/snrm2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,66 @@ + REAL FUNCTION SNRM2(N,X,INCX) +* .. Scalar Arguments .. + INTEGER INCX,N +* .. +* .. Array Arguments .. + REAL X(*) +* .. +* +* Purpose +* ======= +* +* SNRM2 returns the euclidean norm of a vector via the function +* name, so that +* +* SNRM2 := sqrt( x'*x ). +* +* Further Details +* =============== +* +* -- This version written on 25-October-1982. +* Modified on 14-October-1993 to inline the call to SLASSQ. +* Sven Hammarling, Nag Ltd. +* +* +* .. Parameters .. + REAL ONE,ZERO + PARAMETER (ONE=1.0E+0,ZERO=0.0E+0) +* .. +* .. Local Scalars .. + REAL ABSXI,NORM,SCALE,SSQ + INTEGER IX +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS,SQRT +* .. + IF (N.LT.1 .OR. INCX.LT.1) THEN + NORM = ZERO + ELSE IF (N.EQ.1) THEN + NORM = ABS(X(1)) + ELSE + SCALE = ZERO + SSQ = ONE +* The following loop is equivalent to this call to the LAPACK +* auxiliary routine: +* CALL SLASSQ( N, X, INCX, SCALE, SSQ ) +* + DO 10 IX = 1,1 + (N-1)*INCX,INCX + IF (X(IX).NE.ZERO) THEN + ABSXI = ABS(X(IX)) + IF (SCALE.LT.ABSXI) THEN + SSQ = ONE + SSQ* (SCALE/ABSXI)**2 + SCALE = ABSXI + ELSE + SSQ = SSQ + (ABSXI/SCALE)**2 + END IF + END IF + 10 CONTINUE + NORM = SCALE*SQRT(SSQ) + END IF +* + SNRM2 = NORM + RETURN +* +* End of SNRM2. +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas/srot.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/blas/srot.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,54 @@ + SUBROUTINE SROT(N,SX,INCX,SY,INCY,C,S) +* .. Scalar Arguments .. + REAL C,S + INTEGER INCX,INCY,N +* .. +* .. Array Arguments .. + REAL SX(*),SY(*) +* .. +* +* Purpose +* ======= +* +* applies a plane rotation. +* +* Further Details +* =============== +* +* jack dongarra, linpack, 3/11/78. +* modified 12/3/93, array(1) declarations changed to array(*) +* +* + +* .. Local Scalars .. + REAL STEMP + INTEGER I,IX,IY +* .. + IF (N.LE.0) RETURN + IF (INCX.EQ.1 .AND. INCY.EQ.1) GO TO 20 +* +* code for unequal increments or equal increments not equal +* to 1 +* + IX = 1 + IY = 1 + IF (INCX.LT.0) IX = (-N+1)*INCX + 1 + IF (INCY.LT.0) IY = (-N+1)*INCY + 1 + DO 10 I = 1,N + STEMP = C*SX(IX) + S*SY(IY) + SY(IY) = C*SY(IY) - S*SX(IX) + SX(IX) = STEMP + IX = IX + INCX + IY = IY + INCY + 10 CONTINUE + RETURN +* +* code for both increments equal to 1 +* + 20 DO 30 I = 1,N + STEMP = C*SX(I) + S*SY(I) + SY(I) = C*SY(I) - S*SX(I) + SX(I) = STEMP + 30 CONTINUE + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas/sscal.f --- a/libcruft/blas/sscal.f Wed May 14 18:09:56 2008 +0200 +++ b/libcruft/blas/sscal.f Sun Apr 27 22:34:17 2008 +0200 @@ -1,43 +1,57 @@ - subroutine sscal(n,sa,sx,incx) -c -c scales a vector by a constant. -c uses unrolled loops for increment equal to 1. -c jack dongarra, linpack, 3/11/78. -c modified 3/93 to return if incx .le. 0. -c modified 12/3/93, array(1) declarations changed to array(*) -c - real sa,sx(*) - integer i,incx,m,mp1,n,nincx -c - if( n.le.0 .or. incx.le.0 )return - if(incx.eq.1)go to 20 -c -c code for increment not equal to 1 -c - nincx = n*incx - do 10 i = 1,nincx,incx - sx(i) = sa*sx(i) - 10 continue - return -c -c code for increment equal to 1 -c -c -c clean-up loop -c - 20 m = mod(n,5) - if( m .eq. 0 ) go to 40 - do 30 i = 1,m - sx(i) = sa*sx(i) - 30 continue - if( n .lt. 5 ) return - 40 mp1 = m + 1 - do 50 i = mp1,n,5 - sx(i) = sa*sx(i) - sx(i + 1) = sa*sx(i + 1) - sx(i + 2) = sa*sx(i + 2) - sx(i + 3) = sa*sx(i + 3) - sx(i + 4) = sa*sx(i + 4) - 50 continue - return - end + SUBROUTINE SSCAL(N,SA,SX,INCX) +* .. Scalar Arguments .. + REAL SA + INTEGER INCX,N +* .. +* .. Array Arguments .. + REAL SX(*) +* .. +* +* Purpose +* ======= +* +* scales a vector by a constant. +* uses unrolled loops for increment equal to 1. +* jack dongarra, linpack, 3/11/78. +* modified 3/93 to return if incx .le. 0. +* modified 12/3/93, array(1) declarations changed to array(*) +* +* +* .. Local Scalars .. + INTEGER I,M,MP1,NINCX +* .. +* .. Intrinsic Functions .. + INTRINSIC MOD +* .. + IF (N.LE.0 .OR. INCX.LE.0) RETURN + IF (INCX.EQ.1) GO TO 20 +* +* code for increment not equal to 1 +* + NINCX = N*INCX + DO 10 I = 1,NINCX,INCX + SX(I) = SA*SX(I) + 10 CONTINUE + RETURN +* +* code for increment equal to 1 +* +* +* clean-up loop +* + 20 M = MOD(N,5) + IF (M.EQ.0) GO TO 40 + DO 30 I = 1,M + SX(I) = SA*SX(I) + 30 CONTINUE + IF (N.LT.5) RETURN + 40 MP1 = M + 1 + DO 50 I = MP1,N,5 + SX(I) = SA*SX(I) + SX(I+1) = SA*SX(I+1) + SX(I+2) = SA*SX(I+2) + SX(I+3) = SA*SX(I+3) + SX(I+4) = SA*SX(I+4) + 50 CONTINUE + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas/sswap.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/blas/sswap.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,70 @@ + SUBROUTINE SSWAP(N,SX,INCX,SY,INCY) +* .. Scalar Arguments .. + INTEGER INCX,INCY,N +* .. +* .. Array Arguments .. + REAL SX(*),SY(*) +* .. +* +* Purpose +* ======= +* +* interchanges two vectors. +* uses unrolled loops for increments equal to 1. +* jack dongarra, linpack, 3/11/78. +* modified 12/3/93, array(1) declarations changed to array(*) +* +* +* .. Local Scalars .. + REAL STEMP + INTEGER I,IX,IY,M,MP1 +* .. +* .. Intrinsic Functions .. + INTRINSIC MOD +* .. + IF (N.LE.0) RETURN + IF (INCX.EQ.1 .AND. INCY.EQ.1) GO TO 20 +* +* code for unequal increments or equal increments not equal +* to 1 +* + IX = 1 + IY = 1 + IF (INCX.LT.0) IX = (-N+1)*INCX + 1 + IF (INCY.LT.0) IY = (-N+1)*INCY + 1 + DO 10 I = 1,N + STEMP = SX(IX) + SX(IX) = SY(IY) + SY(IY) = STEMP + IX = IX + INCX + IY = IY + INCY + 10 CONTINUE + RETURN +* +* code for both increments equal to 1 +* +* +* clean-up loop +* + 20 M = MOD(N,3) + IF (M.EQ.0) GO TO 40 + DO 30 I = 1,M + STEMP = SX(I) + SX(I) = SY(I) + SY(I) = STEMP + 30 CONTINUE + IF (N.LT.3) RETURN + 40 MP1 = M + 1 + DO 50 I = MP1,N,3 + STEMP = SX(I) + SX(I) = SY(I) + SY(I) = STEMP + STEMP = SX(I+1) + SX(I+1) = SY(I+1) + SY(I+1) = STEMP + STEMP = SX(I+2) + SX(I+2) = SY(I+2) + SY(I+2) = STEMP + 50 CONTINUE + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas/ssymv.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/blas/ssymv.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,262 @@ + SUBROUTINE SSYMV(UPLO,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY) +* .. Scalar Arguments .. + REAL ALPHA,BETA + INTEGER INCX,INCY,LDA,N + CHARACTER UPLO +* .. +* .. Array Arguments .. + REAL A(LDA,*),X(*),Y(*) +* .. +* +* Purpose +* ======= +* +* SSYMV performs the matrix-vector operation +* +* y := alpha*A*x + beta*y, +* +* where alpha and beta are scalars, x and y are n element vectors and +* A is an n by n symmetric matrix. +* +* Arguments +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array A is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of A +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of A +* is to be referenced. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - REAL . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - REAL array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular part of the symmetric matrix and the strictly +* lower triangular part of A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular part of the symmetric matrix and the strictly +* upper triangular part of A is not referenced. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* X - REAL array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* BETA - REAL . +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then Y need not be set on input. +* Unchanged on exit. +* +* Y - REAL array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. On exit, Y is overwritten by the updated +* vector y. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + REAL ONE,ZERO + PARAMETER (ONE=1.0E+0,ZERO=0.0E+0) +* .. +* .. Local Scalars .. + REAL TEMP1,TEMP2 + INTEGER I,INFO,IX,IY,J,JX,JY,KX,KY +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* +* Test the input parameters. +* + INFO = 0 + IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN + INFO = 1 + ELSE IF (N.LT.0) THEN + INFO = 2 + ELSE IF (LDA.LT.MAX(1,N)) THEN + INFO = 5 + ELSE IF (INCX.EQ.0) THEN + INFO = 7 + ELSE IF (INCY.EQ.0) THEN + INFO = 10 + END IF + IF (INFO.NE.0) THEN + CALL XERBLA('SSYMV ',INFO) + RETURN + END IF +* +* Quick return if possible. +* + IF ((N.EQ.0) .OR. ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN +* +* Set up the start points in X and Y. +* + IF (INCX.GT.0) THEN + KX = 1 + ELSE + KX = 1 - (N-1)*INCX + END IF + IF (INCY.GT.0) THEN + KY = 1 + ELSE + KY = 1 - (N-1)*INCY + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through the triangular part +* of A. +* +* First form y := beta*y. +* + IF (BETA.NE.ONE) THEN + IF (INCY.EQ.1) THEN + IF (BETA.EQ.ZERO) THEN + DO 10 I = 1,N + Y(I) = ZERO + 10 CONTINUE + ELSE + DO 20 I = 1,N + Y(I) = BETA*Y(I) + 20 CONTINUE + END IF + ELSE + IY = KY + IF (BETA.EQ.ZERO) THEN + DO 30 I = 1,N + Y(IY) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40 I = 1,N + Y(IY) = BETA*Y(IY) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF (ALPHA.EQ.ZERO) RETURN + IF (LSAME(UPLO,'U')) THEN +* +* Form y when A is stored in upper triangle. +* + IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN + DO 60 J = 1,N + TEMP1 = ALPHA*X(J) + TEMP2 = ZERO + DO 50 I = 1,J - 1 + Y(I) = Y(I) + TEMP1*A(I,J) + TEMP2 = TEMP2 + A(I,J)*X(I) + 50 CONTINUE + Y(J) = Y(J) + TEMP1*A(J,J) + ALPHA*TEMP2 + 60 CONTINUE + ELSE + JX = KX + JY = KY + DO 80 J = 1,N + TEMP1 = ALPHA*X(JX) + TEMP2 = ZERO + IX = KX + IY = KY + DO 70 I = 1,J - 1 + Y(IY) = Y(IY) + TEMP1*A(I,J) + TEMP2 = TEMP2 + A(I,J)*X(IX) + IX = IX + INCX + IY = IY + INCY + 70 CONTINUE + Y(JY) = Y(JY) + TEMP1*A(J,J) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + 80 CONTINUE + END IF + ELSE +* +* Form y when A is stored in lower triangle. +* + IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN + DO 100 J = 1,N + TEMP1 = ALPHA*X(J) + TEMP2 = ZERO + Y(J) = Y(J) + TEMP1*A(J,J) + DO 90 I = J + 1,N + Y(I) = Y(I) + TEMP1*A(I,J) + TEMP2 = TEMP2 + A(I,J)*X(I) + 90 CONTINUE + Y(J) = Y(J) + ALPHA*TEMP2 + 100 CONTINUE + ELSE + JX = KX + JY = KY + DO 120 J = 1,N + TEMP1 = ALPHA*X(JX) + TEMP2 = ZERO + Y(JY) = Y(JY) + TEMP1*A(J,J) + IX = JX + IY = JY + DO 110 I = J + 1,N + IX = IX + INCX + IY = IY + INCY + Y(IY) = Y(IY) + TEMP1*A(I,J) + TEMP2 = TEMP2 + A(I,J)*X(IX) + 110 CONTINUE + Y(JY) = Y(JY) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + 120 CONTINUE + END IF + END IF +* + RETURN +* +* End of SSYMV . +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas/ssyr.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/blas/ssyr.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,199 @@ + SUBROUTINE SSYR(UPLO,N,ALPHA,X,INCX,A,LDA) +* .. Scalar Arguments .. + REAL ALPHA + INTEGER INCX,LDA,N + CHARACTER UPLO +* .. +* .. Array Arguments .. + REAL A(LDA,*),X(*) +* .. +* +* Purpose +* ======= +* +* SSYR performs the symmetric rank 1 operation +* +* A := alpha*x*x' + A, +* +* where alpha is a real scalar, x is an n element vector and A is an +* n by n symmetric matrix. +* +* Arguments +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array A is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of A +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of A +* is to be referenced. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - REAL . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - REAL array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* A - REAL array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular part of the symmetric matrix and the strictly +* lower triangular part of A is not referenced. On exit, the +* upper triangular part of the array A is overwritten by the +* upper triangular part of the updated matrix. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular part of the symmetric matrix and the strictly +* upper triangular part of A is not referenced. On exit, the +* lower triangular part of the array A is overwritten by the +* lower triangular part of the updated matrix. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + REAL ZERO + PARAMETER (ZERO=0.0E+0) +* .. +* .. Local Scalars .. + REAL TEMP + INTEGER I,INFO,IX,J,JX,KX +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* +* Test the input parameters. +* + INFO = 0 + IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN + INFO = 1 + ELSE IF (N.LT.0) THEN + INFO = 2 + ELSE IF (INCX.EQ.0) THEN + INFO = 5 + ELSE IF (LDA.LT.MAX(1,N)) THEN + INFO = 7 + END IF + IF (INFO.NE.0) THEN + CALL XERBLA('SSYR ',INFO) + RETURN + END IF +* +* Quick return if possible. +* + IF ((N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN +* +* Set the start point in X if the increment is not unity. +* + IF (INCX.LE.0) THEN + KX = 1 - (N-1)*INCX + ELSE IF (INCX.NE.1) THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through the triangular part +* of A. +* + IF (LSAME(UPLO,'U')) THEN +* +* Form A when A is stored in upper triangle. +* + IF (INCX.EQ.1) THEN + DO 20 J = 1,N + IF (X(J).NE.ZERO) THEN + TEMP = ALPHA*X(J) + DO 10 I = 1,J + A(I,J) = A(I,J) + X(I)*TEMP + 10 CONTINUE + END IF + 20 CONTINUE + ELSE + JX = KX + DO 40 J = 1,N + IF (X(JX).NE.ZERO) THEN + TEMP = ALPHA*X(JX) + IX = KX + DO 30 I = 1,J + A(I,J) = A(I,J) + X(IX)*TEMP + IX = IX + INCX + 30 CONTINUE + END IF + JX = JX + INCX + 40 CONTINUE + END IF + ELSE +* +* Form A when A is stored in lower triangle. +* + IF (INCX.EQ.1) THEN + DO 60 J = 1,N + IF (X(J).NE.ZERO) THEN + TEMP = ALPHA*X(J) + DO 50 I = J,N + A(I,J) = A(I,J) + X(I)*TEMP + 50 CONTINUE + END IF + 60 CONTINUE + ELSE + JX = KX + DO 80 J = 1,N + IF (X(JX).NE.ZERO) THEN + TEMP = ALPHA*X(JX) + IX = JX + DO 70 I = J,N + A(I,J) = A(I,J) + X(IX)*TEMP + IX = IX + INCX + 70 CONTINUE + END IF + JX = JX + INCX + 80 CONTINUE + END IF + END IF +* + RETURN +* +* End of SSYR . +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas/ssyr2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/blas/ssyr2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,230 @@ + SUBROUTINE SSYR2(UPLO,N,ALPHA,X,INCX,Y,INCY,A,LDA) +* .. Scalar Arguments .. + REAL ALPHA + INTEGER INCX,INCY,LDA,N + CHARACTER UPLO +* .. +* .. Array Arguments .. + REAL A(LDA,*),X(*),Y(*) +* .. +* +* Purpose +* ======= +* +* SSYR2 performs the symmetric rank 2 operation +* +* A := alpha*x*y' + alpha*y*x' + A, +* +* where alpha is a scalar, x and y are n element vectors and A is an n +* by n symmetric matrix. +* +* Arguments +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array A is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of A +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of A +* is to be referenced. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - REAL . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - REAL array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* Y - REAL array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. +* Unchanged on exit. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* A - REAL array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular part of the symmetric matrix and the strictly +* lower triangular part of A is not referenced. On exit, the +* upper triangular part of the array A is overwritten by the +* upper triangular part of the updated matrix. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular part of the symmetric matrix and the strictly +* upper triangular part of A is not referenced. On exit, the +* lower triangular part of the array A is overwritten by the +* lower triangular part of the updated matrix. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + REAL ZERO + PARAMETER (ZERO=0.0E+0) +* .. +* .. Local Scalars .. + REAL TEMP1,TEMP2 + INTEGER I,INFO,IX,IY,J,JX,JY,KX,KY +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* +* Test the input parameters. +* + INFO = 0 + IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN + INFO = 1 + ELSE IF (N.LT.0) THEN + INFO = 2 + ELSE IF (INCX.EQ.0) THEN + INFO = 5 + ELSE IF (INCY.EQ.0) THEN + INFO = 7 + ELSE IF (LDA.LT.MAX(1,N)) THEN + INFO = 9 + END IF + IF (INFO.NE.0) THEN + CALL XERBLA('SSYR2 ',INFO) + RETURN + END IF +* +* Quick return if possible. +* + IF ((N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN +* +* Set up the start points in X and Y if the increments are not both +* unity. +* + IF ((INCX.NE.1) .OR. (INCY.NE.1)) THEN + IF (INCX.GT.0) THEN + KX = 1 + ELSE + KX = 1 - (N-1)*INCX + END IF + IF (INCY.GT.0) THEN + KY = 1 + ELSE + KY = 1 - (N-1)*INCY + END IF + JX = KX + JY = KY + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through the triangular part +* of A. +* + IF (LSAME(UPLO,'U')) THEN +* +* Form A when A is stored in the upper triangle. +* + IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN + DO 20 J = 1,N + IF ((X(J).NE.ZERO) .OR. (Y(J).NE.ZERO)) THEN + TEMP1 = ALPHA*Y(J) + TEMP2 = ALPHA*X(J) + DO 10 I = 1,J + A(I,J) = A(I,J) + X(I)*TEMP1 + Y(I)*TEMP2 + 10 CONTINUE + END IF + 20 CONTINUE + ELSE + DO 40 J = 1,N + IF ((X(JX).NE.ZERO) .OR. (Y(JY).NE.ZERO)) THEN + TEMP1 = ALPHA*Y(JY) + TEMP2 = ALPHA*X(JX) + IX = KX + IY = KY + DO 30 I = 1,J + A(I,J) = A(I,J) + X(IX)*TEMP1 + Y(IY)*TEMP2 + IX = IX + INCX + IY = IY + INCY + 30 CONTINUE + END IF + JX = JX + INCX + JY = JY + INCY + 40 CONTINUE + END IF + ELSE +* +* Form A when A is stored in the lower triangle. +* + IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN + DO 60 J = 1,N + IF ((X(J).NE.ZERO) .OR. (Y(J).NE.ZERO)) THEN + TEMP1 = ALPHA*Y(J) + TEMP2 = ALPHA*X(J) + DO 50 I = J,N + A(I,J) = A(I,J) + X(I)*TEMP1 + Y(I)*TEMP2 + 50 CONTINUE + END IF + 60 CONTINUE + ELSE + DO 80 J = 1,N + IF ((X(JX).NE.ZERO) .OR. (Y(JY).NE.ZERO)) THEN + TEMP1 = ALPHA*Y(JY) + TEMP2 = ALPHA*X(JX) + IX = JX + IY = JY + DO 70 I = J,N + A(I,J) = A(I,J) + X(IX)*TEMP1 + Y(IY)*TEMP2 + IX = IX + INCX + IY = IY + INCY + 70 CONTINUE + END IF + JX = JX + INCX + JY = JY + INCY + 80 CONTINUE + END IF + END IF +* + RETURN +* +* End of SSYR2 . +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas/ssyr2k.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/blas/ssyr2k.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,326 @@ + SUBROUTINE SSYR2K(UPLO,TRANS,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC) +* .. Scalar Arguments .. + REAL ALPHA,BETA + INTEGER K,LDA,LDB,LDC,N + CHARACTER TRANS,UPLO +* .. +* .. Array Arguments .. + REAL A(LDA,*),B(LDB,*),C(LDC,*) +* .. +* +* Purpose +* ======= +* +* SSYR2K performs one of the symmetric rank 2k operations +* +* C := alpha*A*B' + alpha*B*A' + beta*C, +* +* or +* +* C := alpha*A'*B + alpha*B'*A + beta*C, +* +* where alpha and beta are scalars, C is an n by n symmetric matrix +* and A and B are n by k matrices in the first case and k by n +* matrices in the second case. +* +* Arguments +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array C is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of C +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of C +* is to be referenced. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' C := alpha*A*B' + alpha*B*A' + +* beta*C. +* +* TRANS = 'T' or 't' C := alpha*A'*B + alpha*B'*A + +* beta*C. +* +* TRANS = 'C' or 'c' C := alpha*A'*B + alpha*B'*A + +* beta*C. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix C. N must be +* at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry with TRANS = 'N' or 'n', K specifies the number +* of columns of the matrices A and B, and on entry with +* TRANS = 'T' or 't' or 'C' or 'c', K specifies the number +* of rows of the matrices A and B. K must be at least zero. +* Unchanged on exit. +* +* ALPHA - REAL . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - REAL array of DIMENSION ( LDA, ka ), where ka is +* k when TRANS = 'N' or 'n', and is n otherwise. +* Before entry with TRANS = 'N' or 'n', the leading n by k +* part of the array A must contain the matrix A, otherwise +* the leading k by n part of the array A must contain the +* matrix A. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When TRANS = 'N' or 'n' +* then LDA must be at least max( 1, n ), otherwise LDA must +* be at least max( 1, k ). +* Unchanged on exit. +* +* B - REAL array of DIMENSION ( LDB, kb ), where kb is +* k when TRANS = 'N' or 'n', and is n otherwise. +* Before entry with TRANS = 'N' or 'n', the leading n by k +* part of the array B must contain the matrix B, otherwise +* the leading k by n part of the array B must contain the +* matrix B. +* Unchanged on exit. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. When TRANS = 'N' or 'n' +* then LDB must be at least max( 1, n ), otherwise LDB must +* be at least max( 1, k ). +* Unchanged on exit. +* +* BETA - REAL . +* On entry, BETA specifies the scalar beta. +* Unchanged on exit. +* +* C - REAL array of DIMENSION ( LDC, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array C must contain the upper +* triangular part of the symmetric matrix and the strictly +* lower triangular part of C is not referenced. On exit, the +* upper triangular part of the array C is overwritten by the +* upper triangular part of the updated matrix. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array C must contain the lower +* triangular part of the symmetric matrix and the strictly +* upper triangular part of C is not referenced. On exit, the +* lower triangular part of the array C is overwritten by the +* lower triangular part of the updated matrix. +* +* LDC - INTEGER. +* On entry, LDC specifies the first dimension of C as declared +* in the calling (sub) program. LDC must be at least +* max( 1, n ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Local Scalars .. + REAL TEMP1,TEMP2 + INTEGER I,INFO,J,L,NROWA + LOGICAL UPPER +* .. +* .. Parameters .. + REAL ONE,ZERO + PARAMETER (ONE=1.0E+0,ZERO=0.0E+0) +* .. +* +* Test the input parameters. +* + IF (LSAME(TRANS,'N')) THEN + NROWA = N + ELSE + NROWA = K + END IF + UPPER = LSAME(UPLO,'U') +* + INFO = 0 + IF ((.NOT.UPPER) .AND. (.NOT.LSAME(UPLO,'L'))) THEN + INFO = 1 + ELSE IF ((.NOT.LSAME(TRANS,'N')) .AND. + + (.NOT.LSAME(TRANS,'T')) .AND. + + (.NOT.LSAME(TRANS,'C'))) THEN + INFO = 2 + ELSE IF (N.LT.0) THEN + INFO = 3 + ELSE IF (K.LT.0) THEN + INFO = 4 + ELSE IF (LDA.LT.MAX(1,NROWA)) THEN + INFO = 7 + ELSE IF (LDB.LT.MAX(1,NROWA)) THEN + INFO = 9 + ELSE IF (LDC.LT.MAX(1,N)) THEN + INFO = 12 + END IF + IF (INFO.NE.0) THEN + CALL XERBLA('SSYR2K',INFO) + RETURN + END IF +* +* Quick return if possible. +* + IF ((N.EQ.0) .OR. (((ALPHA.EQ.ZERO).OR. + + (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN +* +* And when alpha.eq.zero. +* + IF (ALPHA.EQ.ZERO) THEN + IF (UPPER) THEN + IF (BETA.EQ.ZERO) THEN + DO 20 J = 1,N + DO 10 I = 1,J + C(I,J) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1,N + DO 30 I = 1,J + C(I,J) = BETA*C(I,J) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE + IF (BETA.EQ.ZERO) THEN + DO 60 J = 1,N + DO 50 I = J,N + C(I,J) = ZERO + 50 CONTINUE + 60 CONTINUE + ELSE + DO 80 J = 1,N + DO 70 I = J,N + C(I,J) = BETA*C(I,J) + 70 CONTINUE + 80 CONTINUE + END IF + END IF + RETURN + END IF +* +* Start the operations. +* + IF (LSAME(TRANS,'N')) THEN +* +* Form C := alpha*A*B' + alpha*B*A' + C. +* + IF (UPPER) THEN + DO 130 J = 1,N + IF (BETA.EQ.ZERO) THEN + DO 90 I = 1,J + C(I,J) = ZERO + 90 CONTINUE + ELSE IF (BETA.NE.ONE) THEN + DO 100 I = 1,J + C(I,J) = BETA*C(I,J) + 100 CONTINUE + END IF + DO 120 L = 1,K + IF ((A(J,L).NE.ZERO) .OR. (B(J,L).NE.ZERO)) THEN + TEMP1 = ALPHA*B(J,L) + TEMP2 = ALPHA*A(J,L) + DO 110 I = 1,J + C(I,J) = C(I,J) + A(I,L)*TEMP1 + + + B(I,L)*TEMP2 + 110 CONTINUE + END IF + 120 CONTINUE + 130 CONTINUE + ELSE + DO 180 J = 1,N + IF (BETA.EQ.ZERO) THEN + DO 140 I = J,N + C(I,J) = ZERO + 140 CONTINUE + ELSE IF (BETA.NE.ONE) THEN + DO 150 I = J,N + C(I,J) = BETA*C(I,J) + 150 CONTINUE + END IF + DO 170 L = 1,K + IF ((A(J,L).NE.ZERO) .OR. (B(J,L).NE.ZERO)) THEN + TEMP1 = ALPHA*B(J,L) + TEMP2 = ALPHA*A(J,L) + DO 160 I = J,N + C(I,J) = C(I,J) + A(I,L)*TEMP1 + + + B(I,L)*TEMP2 + 160 CONTINUE + END IF + 170 CONTINUE + 180 CONTINUE + END IF + ELSE +* +* Form C := alpha*A'*B + alpha*B'*A + C. +* + IF (UPPER) THEN + DO 210 J = 1,N + DO 200 I = 1,J + TEMP1 = ZERO + TEMP2 = ZERO + DO 190 L = 1,K + TEMP1 = TEMP1 + A(L,I)*B(L,J) + TEMP2 = TEMP2 + B(L,I)*A(L,J) + 190 CONTINUE + IF (BETA.EQ.ZERO) THEN + C(I,J) = ALPHA*TEMP1 + ALPHA*TEMP2 + ELSE + C(I,J) = BETA*C(I,J) + ALPHA*TEMP1 + + + ALPHA*TEMP2 + END IF + 200 CONTINUE + 210 CONTINUE + ELSE + DO 240 J = 1,N + DO 230 I = J,N + TEMP1 = ZERO + TEMP2 = ZERO + DO 220 L = 1,K + TEMP1 = TEMP1 + A(L,I)*B(L,J) + TEMP2 = TEMP2 + B(L,I)*A(L,J) + 220 CONTINUE + IF (BETA.EQ.ZERO) THEN + C(I,J) = ALPHA*TEMP1 + ALPHA*TEMP2 + ELSE + C(I,J) = BETA*C(I,J) + ALPHA*TEMP1 + + + ALPHA*TEMP2 + END IF + 230 CONTINUE + 240 CONTINUE + END IF + END IF +* + RETURN +* +* End of SSYR2K. +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas/ssyrk.f --- a/libcruft/blas/ssyrk.f Wed May 14 18:09:56 2008 +0200 +++ b/libcruft/blas/ssyrk.f Sun Apr 27 22:34:17 2008 +0200 @@ -1,11 +1,11 @@ - SUBROUTINE SSYRK ( UPLO, TRANS, N, K, ALPHA, A, LDA, - $ BETA, C, LDC ) + SUBROUTINE SSYRK(UPLO,TRANS,N,K,ALPHA,A,LDA,BETA,C,LDC) * .. Scalar Arguments .. - CHARACTER*1 UPLO, TRANS - INTEGER N, K, LDA, LDC - REAL ALPHA, BETA + REAL ALPHA,BETA + INTEGER K,LDA,LDC,N + CHARACTER TRANS,UPLO +* .. * .. Array Arguments .. - REAL A( LDA, * ), C( LDC, * ) + REAL A(LDA,*),C(LDC,*) * .. * * Purpose @@ -23,7 +23,7 @@ * and A is an n by k matrix in the first case and a k by n matrix * in the second case. * -* Parameters +* Arguments * ========== * * UPLO - CHARACTER*1. @@ -117,174 +117,175 @@ * * * .. External Functions .. - LOGICAL LSAME - EXTERNAL LSAME + LOGICAL LSAME + EXTERNAL LSAME +* .. * .. External Subroutines .. - EXTERNAL XERBLA + EXTERNAL XERBLA +* .. * .. Intrinsic Functions .. - INTRINSIC MAX + INTRINSIC MAX +* .. * .. Local Scalars .. - LOGICAL UPPER - INTEGER I, INFO, J, L, NROWA - REAL TEMP + REAL TEMP + INTEGER I,INFO,J,L,NROWA + LOGICAL UPPER +* .. * .. Parameters .. - REAL ONE , ZERO - PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) + REAL ONE,ZERO + PARAMETER (ONE=1.0E+0,ZERO=0.0E+0) * .. -* .. Executable Statements .. * * Test the input parameters. * - IF( LSAME( TRANS, 'N' ) )THEN - NROWA = N + IF (LSAME(TRANS,'N')) THEN + NROWA = N ELSE - NROWA = K + NROWA = K END IF - UPPER = LSAME( UPLO, 'U' ) + UPPER = LSAME(UPLO,'U') * INFO = 0 - IF( ( .NOT.UPPER ).AND. - $ ( .NOT.LSAME( UPLO , 'L' ) ) )THEN - INFO = 1 - ELSE IF( ( .NOT.LSAME( TRANS, 'N' ) ).AND. - $ ( .NOT.LSAME( TRANS, 'T' ) ).AND. - $ ( .NOT.LSAME( TRANS, 'C' ) ) )THEN - INFO = 2 - ELSE IF( N .LT.0 )THEN - INFO = 3 - ELSE IF( K .LT.0 )THEN - INFO = 4 - ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN - INFO = 7 - ELSE IF( LDC.LT.MAX( 1, N ) )THEN - INFO = 10 + IF ((.NOT.UPPER) .AND. (.NOT.LSAME(UPLO,'L'))) THEN + INFO = 1 + ELSE IF ((.NOT.LSAME(TRANS,'N')) .AND. + + (.NOT.LSAME(TRANS,'T')) .AND. + + (.NOT.LSAME(TRANS,'C'))) THEN + INFO = 2 + ELSE IF (N.LT.0) THEN + INFO = 3 + ELSE IF (K.LT.0) THEN + INFO = 4 + ELSE IF (LDA.LT.MAX(1,NROWA)) THEN + INFO = 7 + ELSE IF (LDC.LT.MAX(1,N)) THEN + INFO = 10 END IF - IF( INFO.NE.0 )THEN - CALL XERBLA( 'SSYRK ', INFO ) - RETURN + IF (INFO.NE.0) THEN + CALL XERBLA('SSYRK ',INFO) + RETURN END IF * * Quick return if possible. * - IF( ( N.EQ.0 ).OR. - $ ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) ) - $ RETURN + IF ((N.EQ.0) .OR. (((ALPHA.EQ.ZERO).OR. + + (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN * * And when alpha.eq.zero. * - IF( ALPHA.EQ.ZERO )THEN - IF( UPPER )THEN - IF( BETA.EQ.ZERO )THEN - DO 20, J = 1, N - DO 10, I = 1, J - C( I, J ) = ZERO - 10 CONTINUE - 20 CONTINUE - ELSE - DO 40, J = 1, N - DO 30, I = 1, J - C( I, J ) = BETA*C( I, J ) - 30 CONTINUE - 40 CONTINUE - END IF - ELSE - IF( BETA.EQ.ZERO )THEN - DO 60, J = 1, N - DO 50, I = J, N - C( I, J ) = ZERO - 50 CONTINUE - 60 CONTINUE - ELSE - DO 80, J = 1, N - DO 70, I = J, N - C( I, J ) = BETA*C( I, J ) - 70 CONTINUE - 80 CONTINUE - END IF - END IF - RETURN + IF (ALPHA.EQ.ZERO) THEN + IF (UPPER) THEN + IF (BETA.EQ.ZERO) THEN + DO 20 J = 1,N + DO 10 I = 1,J + C(I,J) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1,N + DO 30 I = 1,J + C(I,J) = BETA*C(I,J) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE + IF (BETA.EQ.ZERO) THEN + DO 60 J = 1,N + DO 50 I = J,N + C(I,J) = ZERO + 50 CONTINUE + 60 CONTINUE + ELSE + DO 80 J = 1,N + DO 70 I = J,N + C(I,J) = BETA*C(I,J) + 70 CONTINUE + 80 CONTINUE + END IF + END IF + RETURN END IF * * Start the operations. * - IF( LSAME( TRANS, 'N' ) )THEN + IF (LSAME(TRANS,'N')) THEN * * Form C := alpha*A*A' + beta*C. * - IF( UPPER )THEN - DO 130, J = 1, N - IF( BETA.EQ.ZERO )THEN - DO 90, I = 1, J - C( I, J ) = ZERO - 90 CONTINUE - ELSE IF( BETA.NE.ONE )THEN - DO 100, I = 1, J - C( I, J ) = BETA*C( I, J ) - 100 CONTINUE - END IF - DO 120, L = 1, K - IF( A( J, L ).NE.ZERO )THEN - TEMP = ALPHA*A( J, L ) - DO 110, I = 1, J - C( I, J ) = C( I, J ) + TEMP*A( I, L ) - 110 CONTINUE + IF (UPPER) THEN + DO 130 J = 1,N + IF (BETA.EQ.ZERO) THEN + DO 90 I = 1,J + C(I,J) = ZERO + 90 CONTINUE + ELSE IF (BETA.NE.ONE) THEN + DO 100 I = 1,J + C(I,J) = BETA*C(I,J) + 100 CONTINUE END IF - 120 CONTINUE - 130 CONTINUE - ELSE - DO 180, J = 1, N - IF( BETA.EQ.ZERO )THEN - DO 140, I = J, N - C( I, J ) = ZERO - 140 CONTINUE - ELSE IF( BETA.NE.ONE )THEN - DO 150, I = J, N - C( I, J ) = BETA*C( I, J ) - 150 CONTINUE - END IF - DO 170, L = 1, K - IF( A( J, L ).NE.ZERO )THEN - TEMP = ALPHA*A( J, L ) - DO 160, I = J, N - C( I, J ) = C( I, J ) + TEMP*A( I, L ) - 160 CONTINUE + DO 120 L = 1,K + IF (A(J,L).NE.ZERO) THEN + TEMP = ALPHA*A(J,L) + DO 110 I = 1,J + C(I,J) = C(I,J) + TEMP*A(I,L) + 110 CONTINUE + END IF + 120 CONTINUE + 130 CONTINUE + ELSE + DO 180 J = 1,N + IF (BETA.EQ.ZERO) THEN + DO 140 I = J,N + C(I,J) = ZERO + 140 CONTINUE + ELSE IF (BETA.NE.ONE) THEN + DO 150 I = J,N + C(I,J) = BETA*C(I,J) + 150 CONTINUE END IF - 170 CONTINUE - 180 CONTINUE - END IF + DO 170 L = 1,K + IF (A(J,L).NE.ZERO) THEN + TEMP = ALPHA*A(J,L) + DO 160 I = J,N + C(I,J) = C(I,J) + TEMP*A(I,L) + 160 CONTINUE + END IF + 170 CONTINUE + 180 CONTINUE + END IF ELSE * * Form C := alpha*A'*A + beta*C. * - IF( UPPER )THEN - DO 210, J = 1, N - DO 200, I = 1, J - TEMP = ZERO - DO 190, L = 1, K - TEMP = TEMP + A( L, I )*A( L, J ) - 190 CONTINUE - IF( BETA.EQ.ZERO )THEN - C( I, J ) = ALPHA*TEMP - ELSE - C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) - END IF - 200 CONTINUE - 210 CONTINUE - ELSE - DO 240, J = 1, N - DO 230, I = J, N - TEMP = ZERO - DO 220, L = 1, K - TEMP = TEMP + A( L, I )*A( L, J ) - 220 CONTINUE - IF( BETA.EQ.ZERO )THEN - C( I, J ) = ALPHA*TEMP - ELSE - C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) - END IF - 230 CONTINUE - 240 CONTINUE - END IF + IF (UPPER) THEN + DO 210 J = 1,N + DO 200 I = 1,J + TEMP = ZERO + DO 190 L = 1,K + TEMP = TEMP + A(L,I)*A(L,J) + 190 CONTINUE + IF (BETA.EQ.ZERO) THEN + C(I,J) = ALPHA*TEMP + ELSE + C(I,J) = ALPHA*TEMP + BETA*C(I,J) + END IF + 200 CONTINUE + 210 CONTINUE + ELSE + DO 240 J = 1,N + DO 230 I = J,N + TEMP = ZERO + DO 220 L = 1,K + TEMP = TEMP + A(L,I)*A(L,J) + 220 CONTINUE + IF (BETA.EQ.ZERO) THEN + C(I,J) = ALPHA*TEMP + ELSE + C(I,J) = ALPHA*TEMP + BETA*C(I,J) + END IF + 230 CONTINUE + 240 CONTINUE + END IF END IF * RETURN diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas/stbsv.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/blas/stbsv.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,336 @@ + SUBROUTINE STBSV(UPLO,TRANS,DIAG,N,K,A,LDA,X,INCX) +* .. Scalar Arguments .. + INTEGER INCX,K,LDA,N + CHARACTER DIAG,TRANS,UPLO +* .. +* .. Array Arguments .. + REAL A(LDA,*),X(*) +* .. +* +* Purpose +* ======= +* +* STBSV solves one of the systems of equations +* +* A*x = b, or A'*x = b, +* +* where b and x are n element vectors and A is an n by n unit, or +* non-unit, upper or lower triangular band matrix, with ( k + 1 ) +* diagonals. +* +* No test for singularity or near-singularity is included in this +* routine. Such tests must be performed before calling this routine. +* +* Arguments +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the equations to be solved as +* follows: +* +* TRANS = 'N' or 'n' A*x = b. +* +* TRANS = 'T' or 't' A'*x = b. +* +* TRANS = 'C' or 'c' A'*x = b. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry with UPLO = 'U' or 'u', K specifies the number of +* super-diagonals of the matrix A. +* On entry with UPLO = 'L' or 'l', K specifies the number of +* sub-diagonals of the matrix A. +* K must satisfy 0 .le. K. +* Unchanged on exit. +* +* A - REAL array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) +* by n part of the array A must contain the upper triangular +* band part of the matrix of coefficients, supplied column by +* column, with the leading diagonal of the matrix in row +* ( k + 1 ) of the array, the first super-diagonal starting at +* position 2 in row k, and so on. The top left k by k triangle +* of the array A is not referenced. +* The following program segment will transfer an upper +* triangular band matrix from conventional full matrix storage +* to band storage: +* +* DO 20, J = 1, N +* M = K + 1 - J +* DO 10, I = MAX( 1, J - K ), J +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) +* by n part of the array A must contain the lower triangular +* band part of the matrix of coefficients, supplied column by +* column, with the leading diagonal of the matrix in row 1 of +* the array, the first sub-diagonal starting at position 1 in +* row 2, and so on. The bottom right k by k triangle of the +* array A is not referenced. +* The following program segment will transfer a lower +* triangular band matrix from conventional full matrix storage +* to band storage: +* +* DO 20, J = 1, N +* M = 1 - J +* DO 10, I = J, MIN( N, J + K ) +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Note that when DIAG = 'U' or 'u' the elements of the array A +* corresponding to the diagonal elements of the matrix are not +* referenced, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* ( k + 1 ). +* Unchanged on exit. +* +* X - REAL array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element right-hand side vector b. On exit, X is overwritten +* with the solution vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + REAL ZERO + PARAMETER (ZERO=0.0E+0) +* .. +* .. Local Scalars .. + REAL TEMP + INTEGER I,INFO,IX,J,JX,KPLUS1,KX,L + LOGICAL NOUNIT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX,MIN +* .. +* +* Test the input parameters. +* + INFO = 0 + IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN + INFO = 1 + ELSE IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND. + + .NOT.LSAME(TRANS,'C')) THEN + INFO = 2 + ELSE IF (.NOT.LSAME(DIAG,'U') .AND. .NOT.LSAME(DIAG,'N')) THEN + INFO = 3 + ELSE IF (N.LT.0) THEN + INFO = 4 + ELSE IF (K.LT.0) THEN + INFO = 5 + ELSE IF (LDA.LT. (K+1)) THEN + INFO = 7 + ELSE IF (INCX.EQ.0) THEN + INFO = 9 + END IF + IF (INFO.NE.0) THEN + CALL XERBLA('STBSV ',INFO) + RETURN + END IF +* +* Quick return if possible. +* + IF (N.EQ.0) RETURN +* + NOUNIT = LSAME(DIAG,'N') +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF (INCX.LE.0) THEN + KX = 1 - (N-1)*INCX + ELSE IF (INCX.NE.1) THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed by sequentially with one pass through A. +* + IF (LSAME(TRANS,'N')) THEN +* +* Form x := inv( A )*x. +* + IF (LSAME(UPLO,'U')) THEN + KPLUS1 = K + 1 + IF (INCX.EQ.1) THEN + DO 20 J = N,1,-1 + IF (X(J).NE.ZERO) THEN + L = KPLUS1 - J + IF (NOUNIT) X(J) = X(J)/A(KPLUS1,J) + TEMP = X(J) + DO 10 I = J - 1,MAX(1,J-K),-1 + X(I) = X(I) - TEMP*A(L+I,J) + 10 CONTINUE + END IF + 20 CONTINUE + ELSE + KX = KX + (N-1)*INCX + JX = KX + DO 40 J = N,1,-1 + KX = KX - INCX + IF (X(JX).NE.ZERO) THEN + IX = KX + L = KPLUS1 - J + IF (NOUNIT) X(JX) = X(JX)/A(KPLUS1,J) + TEMP = X(JX) + DO 30 I = J - 1,MAX(1,J-K),-1 + X(IX) = X(IX) - TEMP*A(L+I,J) + IX = IX - INCX + 30 CONTINUE + END IF + JX = JX - INCX + 40 CONTINUE + END IF + ELSE + IF (INCX.EQ.1) THEN + DO 60 J = 1,N + IF (X(J).NE.ZERO) THEN + L = 1 - J + IF (NOUNIT) X(J) = X(J)/A(1,J) + TEMP = X(J) + DO 50 I = J + 1,MIN(N,J+K) + X(I) = X(I) - TEMP*A(L+I,J) + 50 CONTINUE + END IF + 60 CONTINUE + ELSE + JX = KX + DO 80 J = 1,N + KX = KX + INCX + IF (X(JX).NE.ZERO) THEN + IX = KX + L = 1 - J + IF (NOUNIT) X(JX) = X(JX)/A(1,J) + TEMP = X(JX) + DO 70 I = J + 1,MIN(N,J+K) + X(IX) = X(IX) - TEMP*A(L+I,J) + IX = IX + INCX + 70 CONTINUE + END IF + JX = JX + INCX + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := inv( A')*x. +* + IF (LSAME(UPLO,'U')) THEN + KPLUS1 = K + 1 + IF (INCX.EQ.1) THEN + DO 100 J = 1,N + TEMP = X(J) + L = KPLUS1 - J + DO 90 I = MAX(1,J-K),J - 1 + TEMP = TEMP - A(L+I,J)*X(I) + 90 CONTINUE + IF (NOUNIT) TEMP = TEMP/A(KPLUS1,J) + X(J) = TEMP + 100 CONTINUE + ELSE + JX = KX + DO 120 J = 1,N + TEMP = X(JX) + IX = KX + L = KPLUS1 - J + DO 110 I = MAX(1,J-K),J - 1 + TEMP = TEMP - A(L+I,J)*X(IX) + IX = IX + INCX + 110 CONTINUE + IF (NOUNIT) TEMP = TEMP/A(KPLUS1,J) + X(JX) = TEMP + JX = JX + INCX + IF (J.GT.K) KX = KX + INCX + 120 CONTINUE + END IF + ELSE + IF (INCX.EQ.1) THEN + DO 140 J = N,1,-1 + TEMP = X(J) + L = 1 - J + DO 130 I = MIN(N,J+K),J + 1,-1 + TEMP = TEMP - A(L+I,J)*X(I) + 130 CONTINUE + IF (NOUNIT) TEMP = TEMP/A(1,J) + X(J) = TEMP + 140 CONTINUE + ELSE + KX = KX + (N-1)*INCX + JX = KX + DO 160 J = N,1,-1 + TEMP = X(JX) + IX = KX + L = 1 - J + DO 150 I = MIN(N,J+K),J + 1,-1 + TEMP = TEMP - A(L+I,J)*X(IX) + IX = IX - INCX + 150 CONTINUE + IF (NOUNIT) TEMP = TEMP/A(1,J) + X(JX) = TEMP + JX = JX - INCX + IF ((N-J).GE.K) KX = KX - INCX + 160 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of STBSV . +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas/strmm.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/blas/strmm.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,346 @@ + SUBROUTINE STRMM(SIDE,UPLO,TRANSA,DIAG,M,N,ALPHA,A,LDA,B,LDB) +* .. Scalar Arguments .. + REAL ALPHA + INTEGER LDA,LDB,M,N + CHARACTER DIAG,SIDE,TRANSA,UPLO +* .. +* .. Array Arguments .. + REAL A(LDA,*),B(LDB,*) +* .. +* +* Purpose +* ======= +* +* STRMM performs one of the matrix-matrix operations +* +* B := alpha*op( A )*B, or B := alpha*B*op( A ), +* +* where alpha is a scalar, B is an m by n matrix, A is a unit, or +* non-unit, upper or lower triangular matrix and op( A ) is one of +* +* op( A ) = A or op( A ) = A'. +* +* Arguments +* ========== +* +* SIDE - CHARACTER*1. +* On entry, SIDE specifies whether op( A ) multiplies B from +* the left or right as follows: +* +* SIDE = 'L' or 'l' B := alpha*op( A )*B. +* +* SIDE = 'R' or 'r' B := alpha*B*op( A ). +* +* Unchanged on exit. +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix A is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANSA - CHARACTER*1. +* On entry, TRANSA specifies the form of op( A ) to be used in +* the matrix multiplication as follows: +* +* TRANSA = 'N' or 'n' op( A ) = A. +* +* TRANSA = 'T' or 't' op( A ) = A'. +* +* TRANSA = 'C' or 'c' op( A ) = A'. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit triangular +* as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of B. M must be at +* least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of B. N must be +* at least zero. +* Unchanged on exit. +* +* ALPHA - REAL . +* On entry, ALPHA specifies the scalar alpha. When alpha is +* zero then A is not referenced and B need not be set before +* entry. +* Unchanged on exit. +* +* A - REAL array of DIMENSION ( LDA, k ), where k is m +* when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. +* Before entry with UPLO = 'U' or 'u', the leading k by k +* upper triangular part of the array A must contain the upper +* triangular matrix and the strictly lower triangular part of +* A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading k by k +* lower triangular part of the array A must contain the lower +* triangular matrix and the strictly upper triangular part of +* A is not referenced. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced either, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When SIDE = 'L' or 'l' then +* LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' +* then LDA must be at least max( 1, n ). +* Unchanged on exit. +* +* B - REAL array of DIMENSION ( LDB, n ). +* Before entry, the leading m by n part of the array B must +* contain the matrix B, and on exit is overwritten by the +* transformed matrix. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. LDB must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Local Scalars .. + REAL TEMP + INTEGER I,INFO,J,K,NROWA + LOGICAL LSIDE,NOUNIT,UPPER +* .. +* .. Parameters .. + REAL ONE,ZERO + PARAMETER (ONE=1.0E+0,ZERO=0.0E+0) +* .. +* +* Test the input parameters. +* + LSIDE = LSAME(SIDE,'L') + IF (LSIDE) THEN + NROWA = M + ELSE + NROWA = N + END IF + NOUNIT = LSAME(DIAG,'N') + UPPER = LSAME(UPLO,'U') +* + INFO = 0 + IF ((.NOT.LSIDE) .AND. (.NOT.LSAME(SIDE,'R'))) THEN + INFO = 1 + ELSE IF ((.NOT.UPPER) .AND. (.NOT.LSAME(UPLO,'L'))) THEN + INFO = 2 + ELSE IF ((.NOT.LSAME(TRANSA,'N')) .AND. + + (.NOT.LSAME(TRANSA,'T')) .AND. + + (.NOT.LSAME(TRANSA,'C'))) THEN + INFO = 3 + ELSE IF ((.NOT.LSAME(DIAG,'U')) .AND. (.NOT.LSAME(DIAG,'N'))) THEN + INFO = 4 + ELSE IF (M.LT.0) THEN + INFO = 5 + ELSE IF (N.LT.0) THEN + INFO = 6 + ELSE IF (LDA.LT.MAX(1,NROWA)) THEN + INFO = 9 + ELSE IF (LDB.LT.MAX(1,M)) THEN + INFO = 11 + END IF + IF (INFO.NE.0) THEN + CALL XERBLA('STRMM ',INFO) + RETURN + END IF +* +* Quick return if possible. +* + IF (M.EQ.0 .OR. N.EQ.0) RETURN +* +* And when alpha.eq.zero. +* + IF (ALPHA.EQ.ZERO) THEN + DO 20 J = 1,N + DO 10 I = 1,M + B(I,J) = ZERO + 10 CONTINUE + 20 CONTINUE + RETURN + END IF +* +* Start the operations. +* + IF (LSIDE) THEN + IF (LSAME(TRANSA,'N')) THEN +* +* Form B := alpha*A*B. +* + IF (UPPER) THEN + DO 50 J = 1,N + DO 40 K = 1,M + IF (B(K,J).NE.ZERO) THEN + TEMP = ALPHA*B(K,J) + DO 30 I = 1,K - 1 + B(I,J) = B(I,J) + TEMP*A(I,K) + 30 CONTINUE + IF (NOUNIT) TEMP = TEMP*A(K,K) + B(K,J) = TEMP + END IF + 40 CONTINUE + 50 CONTINUE + ELSE + DO 80 J = 1,N + DO 70 K = M,1,-1 + IF (B(K,J).NE.ZERO) THEN + TEMP = ALPHA*B(K,J) + B(K,J) = TEMP + IF (NOUNIT) B(K,J) = B(K,J)*A(K,K) + DO 60 I = K + 1,M + B(I,J) = B(I,J) + TEMP*A(I,K) + 60 CONTINUE + END IF + 70 CONTINUE + 80 CONTINUE + END IF + ELSE +* +* Form B := alpha*A'*B. +* + IF (UPPER) THEN + DO 110 J = 1,N + DO 100 I = M,1,-1 + TEMP = B(I,J) + IF (NOUNIT) TEMP = TEMP*A(I,I) + DO 90 K = 1,I - 1 + TEMP = TEMP + A(K,I)*B(K,J) + 90 CONTINUE + B(I,J) = ALPHA*TEMP + 100 CONTINUE + 110 CONTINUE + ELSE + DO 140 J = 1,N + DO 130 I = 1,M + TEMP = B(I,J) + IF (NOUNIT) TEMP = TEMP*A(I,I) + DO 120 K = I + 1,M + TEMP = TEMP + A(K,I)*B(K,J) + 120 CONTINUE + B(I,J) = ALPHA*TEMP + 130 CONTINUE + 140 CONTINUE + END IF + END IF + ELSE + IF (LSAME(TRANSA,'N')) THEN +* +* Form B := alpha*B*A. +* + IF (UPPER) THEN + DO 180 J = N,1,-1 + TEMP = ALPHA + IF (NOUNIT) TEMP = TEMP*A(J,J) + DO 150 I = 1,M + B(I,J) = TEMP*B(I,J) + 150 CONTINUE + DO 170 K = 1,J - 1 + IF (A(K,J).NE.ZERO) THEN + TEMP = ALPHA*A(K,J) + DO 160 I = 1,M + B(I,J) = B(I,J) + TEMP*B(I,K) + 160 CONTINUE + END IF + 170 CONTINUE + 180 CONTINUE + ELSE + DO 220 J = 1,N + TEMP = ALPHA + IF (NOUNIT) TEMP = TEMP*A(J,J) + DO 190 I = 1,M + B(I,J) = TEMP*B(I,J) + 190 CONTINUE + DO 210 K = J + 1,N + IF (A(K,J).NE.ZERO) THEN + TEMP = ALPHA*A(K,J) + DO 200 I = 1,M + B(I,J) = B(I,J) + TEMP*B(I,K) + 200 CONTINUE + END IF + 210 CONTINUE + 220 CONTINUE + END IF + ELSE +* +* Form B := alpha*B*A'. +* + IF (UPPER) THEN + DO 260 K = 1,N + DO 240 J = 1,K - 1 + IF (A(J,K).NE.ZERO) THEN + TEMP = ALPHA*A(J,K) + DO 230 I = 1,M + B(I,J) = B(I,J) + TEMP*B(I,K) + 230 CONTINUE + END IF + 240 CONTINUE + TEMP = ALPHA + IF (NOUNIT) TEMP = TEMP*A(K,K) + IF (TEMP.NE.ONE) THEN + DO 250 I = 1,M + B(I,K) = TEMP*B(I,K) + 250 CONTINUE + END IF + 260 CONTINUE + ELSE + DO 300 K = N,1,-1 + DO 280 J = K + 1,N + IF (A(J,K).NE.ZERO) THEN + TEMP = ALPHA*A(J,K) + DO 270 I = 1,M + B(I,J) = B(I,J) + TEMP*B(I,K) + 270 CONTINUE + END IF + 280 CONTINUE + TEMP = ALPHA + IF (NOUNIT) TEMP = TEMP*A(K,K) + IF (TEMP.NE.ONE) THEN + DO 290 I = 1,M + B(I,K) = TEMP*B(I,K) + 290 CONTINUE + END IF + 300 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of STRMM . +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas/strmv.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/blas/strmv.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,278 @@ + SUBROUTINE STRMV(UPLO,TRANS,DIAG,N,A,LDA,X,INCX) +* .. Scalar Arguments .. + INTEGER INCX,LDA,N + CHARACTER DIAG,TRANS,UPLO +* .. +* .. Array Arguments .. + REAL A(LDA,*),X(*) +* .. +* +* Purpose +* ======= +* +* STRMV performs one of the matrix-vector operations +* +* x := A*x, or x := A'*x, +* +* where x is an n element vector and A is an n by n unit, or non-unit, +* upper or lower triangular matrix. +* +* Arguments +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' x := A*x. +* +* TRANS = 'T' or 't' x := A'*x. +* +* TRANS = 'C' or 'c' x := A'*x. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* A - REAL array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular matrix and the strictly lower triangular part of +* A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular matrix and the strictly upper triangular part of +* A is not referenced. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced either, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* X - REAL array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. On exit, X is overwritten with the +* tranformed vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + REAL ZERO + PARAMETER (ZERO=0.0E+0) +* .. +* .. Local Scalars .. + REAL TEMP + INTEGER I,INFO,IX,J,JX,KX + LOGICAL NOUNIT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* +* Test the input parameters. +* + INFO = 0 + IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN + INFO = 1 + ELSE IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND. + + .NOT.LSAME(TRANS,'C')) THEN + INFO = 2 + ELSE IF (.NOT.LSAME(DIAG,'U') .AND. .NOT.LSAME(DIAG,'N')) THEN + INFO = 3 + ELSE IF (N.LT.0) THEN + INFO = 4 + ELSE IF (LDA.LT.MAX(1,N)) THEN + INFO = 6 + ELSE IF (INCX.EQ.0) THEN + INFO = 8 + END IF + IF (INFO.NE.0) THEN + CALL XERBLA('STRMV ',INFO) + RETURN + END IF +* +* Quick return if possible. +* + IF (N.EQ.0) RETURN +* + NOUNIT = LSAME(DIAG,'N') +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF (INCX.LE.0) THEN + KX = 1 - (N-1)*INCX + ELSE IF (INCX.NE.1) THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* + IF (LSAME(TRANS,'N')) THEN +* +* Form x := A*x. +* + IF (LSAME(UPLO,'U')) THEN + IF (INCX.EQ.1) THEN + DO 20 J = 1,N + IF (X(J).NE.ZERO) THEN + TEMP = X(J) + DO 10 I = 1,J - 1 + X(I) = X(I) + TEMP*A(I,J) + 10 CONTINUE + IF (NOUNIT) X(J) = X(J)*A(J,J) + END IF + 20 CONTINUE + ELSE + JX = KX + DO 40 J = 1,N + IF (X(JX).NE.ZERO) THEN + TEMP = X(JX) + IX = KX + DO 30 I = 1,J - 1 + X(IX) = X(IX) + TEMP*A(I,J) + IX = IX + INCX + 30 CONTINUE + IF (NOUNIT) X(JX) = X(JX)*A(J,J) + END IF + JX = JX + INCX + 40 CONTINUE + END IF + ELSE + IF (INCX.EQ.1) THEN + DO 60 J = N,1,-1 + IF (X(J).NE.ZERO) THEN + TEMP = X(J) + DO 50 I = N,J + 1,-1 + X(I) = X(I) + TEMP*A(I,J) + 50 CONTINUE + IF (NOUNIT) X(J) = X(J)*A(J,J) + END IF + 60 CONTINUE + ELSE + KX = KX + (N-1)*INCX + JX = KX + DO 80 J = N,1,-1 + IF (X(JX).NE.ZERO) THEN + TEMP = X(JX) + IX = KX + DO 70 I = N,J + 1,-1 + X(IX) = X(IX) + TEMP*A(I,J) + IX = IX - INCX + 70 CONTINUE + IF (NOUNIT) X(JX) = X(JX)*A(J,J) + END IF + JX = JX - INCX + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := A'*x. +* + IF (LSAME(UPLO,'U')) THEN + IF (INCX.EQ.1) THEN + DO 100 J = N,1,-1 + TEMP = X(J) + IF (NOUNIT) TEMP = TEMP*A(J,J) + DO 90 I = J - 1,1,-1 + TEMP = TEMP + A(I,J)*X(I) + 90 CONTINUE + X(J) = TEMP + 100 CONTINUE + ELSE + JX = KX + (N-1)*INCX + DO 120 J = N,1,-1 + TEMP = X(JX) + IX = JX + IF (NOUNIT) TEMP = TEMP*A(J,J) + DO 110 I = J - 1,1,-1 + IX = IX - INCX + TEMP = TEMP + A(I,J)*X(IX) + 110 CONTINUE + X(JX) = TEMP + JX = JX - INCX + 120 CONTINUE + END IF + ELSE + IF (INCX.EQ.1) THEN + DO 140 J = 1,N + TEMP = X(J) + IF (NOUNIT) TEMP = TEMP*A(J,J) + DO 130 I = J + 1,N + TEMP = TEMP + A(I,J)*X(I) + 130 CONTINUE + X(J) = TEMP + 140 CONTINUE + ELSE + JX = KX + DO 160 J = 1,N + TEMP = X(JX) + IX = JX + IF (NOUNIT) TEMP = TEMP*A(J,J) + DO 150 I = J + 1,N + IX = IX + INCX + TEMP = TEMP + A(I,J)*X(IX) + 150 CONTINUE + X(JX) = TEMP + JX = JX + INCX + 160 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of STRMV . +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas/strsm.f --- a/libcruft/blas/strsm.f Wed May 14 18:09:56 2008 +0200 +++ b/libcruft/blas/strsm.f Sun Apr 27 22:34:17 2008 +0200 @@ -1,11 +1,11 @@ - SUBROUTINE STRSM ( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, - $ B, LDB ) + SUBROUTINE STRSM(SIDE,UPLO,TRANSA,DIAG,M,N,ALPHA,A,LDA,B,LDB) * .. Scalar Arguments .. - CHARACTER*1 SIDE, UPLO, TRANSA, DIAG - INTEGER M, N, LDA, LDB - REAL ALPHA + REAL ALPHA + INTEGER LDA,LDB,M,N + CHARACTER DIAG,SIDE,TRANSA,UPLO +* .. * .. Array Arguments .. - REAL A( LDA, * ), B( LDB, * ) + REAL A(LDA,*),B(LDB,*) * .. * * Purpose @@ -22,7 +22,7 @@ * * The matrix X is overwritten on B. * -* Parameters +* Arguments * ========== * * SIDE - CHARACTER*1. @@ -128,247 +128,242 @@ * * * .. External Functions .. - LOGICAL LSAME - EXTERNAL LSAME + LOGICAL LSAME + EXTERNAL LSAME +* .. * .. External Subroutines .. - EXTERNAL XERBLA + EXTERNAL XERBLA +* .. * .. Intrinsic Functions .. - INTRINSIC MAX + INTRINSIC MAX +* .. * .. Local Scalars .. - LOGICAL LSIDE, NOUNIT, UPPER - INTEGER I, INFO, J, K, NROWA - REAL TEMP + REAL TEMP + INTEGER I,INFO,J,K,NROWA + LOGICAL LSIDE,NOUNIT,UPPER +* .. * .. Parameters .. - REAL ONE , ZERO - PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) + REAL ONE,ZERO + PARAMETER (ONE=1.0E+0,ZERO=0.0E+0) * .. -* .. Executable Statements .. * * Test the input parameters. * - LSIDE = LSAME( SIDE , 'L' ) - IF( LSIDE )THEN - NROWA = M + LSIDE = LSAME(SIDE,'L') + IF (LSIDE) THEN + NROWA = M ELSE - NROWA = N + NROWA = N END IF - NOUNIT = LSAME( DIAG , 'N' ) - UPPER = LSAME( UPLO , 'U' ) + NOUNIT = LSAME(DIAG,'N') + UPPER = LSAME(UPLO,'U') * - INFO = 0 - IF( ( .NOT.LSIDE ).AND. - $ ( .NOT.LSAME( SIDE , 'R' ) ) )THEN - INFO = 1 - ELSE IF( ( .NOT.UPPER ).AND. - $ ( .NOT.LSAME( UPLO , 'L' ) ) )THEN - INFO = 2 - ELSE IF( ( .NOT.LSAME( TRANSA, 'N' ) ).AND. - $ ( .NOT.LSAME( TRANSA, 'T' ) ).AND. - $ ( .NOT.LSAME( TRANSA, 'C' ) ) )THEN - INFO = 3 - ELSE IF( ( .NOT.LSAME( DIAG , 'U' ) ).AND. - $ ( .NOT.LSAME( DIAG , 'N' ) ) )THEN - INFO = 4 - ELSE IF( M .LT.0 )THEN - INFO = 5 - ELSE IF( N .LT.0 )THEN - INFO = 6 - ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN - INFO = 9 - ELSE IF( LDB.LT.MAX( 1, M ) )THEN - INFO = 11 + INFO = 0 + IF ((.NOT.LSIDE) .AND. (.NOT.LSAME(SIDE,'R'))) THEN + INFO = 1 + ELSE IF ((.NOT.UPPER) .AND. (.NOT.LSAME(UPLO,'L'))) THEN + INFO = 2 + ELSE IF ((.NOT.LSAME(TRANSA,'N')) .AND. + + (.NOT.LSAME(TRANSA,'T')) .AND. + + (.NOT.LSAME(TRANSA,'C'))) THEN + INFO = 3 + ELSE IF ((.NOT.LSAME(DIAG,'U')) .AND. (.NOT.LSAME(DIAG,'N'))) THEN + INFO = 4 + ELSE IF (M.LT.0) THEN + INFO = 5 + ELSE IF (N.LT.0) THEN + INFO = 6 + ELSE IF (LDA.LT.MAX(1,NROWA)) THEN + INFO = 9 + ELSE IF (LDB.LT.MAX(1,M)) THEN + INFO = 11 END IF - IF( INFO.NE.0 )THEN - CALL XERBLA( 'STRSM ', INFO ) - RETURN + IF (INFO.NE.0) THEN + CALL XERBLA('STRSM ',INFO) + RETURN END IF * * Quick return if possible. * - IF( N.EQ.0 ) - $ RETURN + IF (M.EQ.0 .OR. N.EQ.0) RETURN * * And when alpha.eq.zero. * - IF( ALPHA.EQ.ZERO )THEN - DO 20, J = 1, N - DO 10, I = 1, M - B( I, J ) = ZERO - 10 CONTINUE - 20 CONTINUE - RETURN + IF (ALPHA.EQ.ZERO) THEN + DO 20 J = 1,N + DO 10 I = 1,M + B(I,J) = ZERO + 10 CONTINUE + 20 CONTINUE + RETURN END IF * * Start the operations. * - IF( LSIDE )THEN - IF( LSAME( TRANSA, 'N' ) )THEN + IF (LSIDE) THEN + IF (LSAME(TRANSA,'N')) THEN * * Form B := alpha*inv( A )*B. * - IF( UPPER )THEN - DO 60, J = 1, N - IF( ALPHA.NE.ONE )THEN - DO 30, I = 1, M - B( I, J ) = ALPHA*B( I, J ) - 30 CONTINUE - END IF - DO 50, K = M, 1, -1 - IF( B( K, J ).NE.ZERO )THEN - IF( NOUNIT ) - $ B( K, J ) = B( K, J )/A( K, K ) - DO 40, I = 1, K - 1 - B( I, J ) = B( I, J ) - B( K, J )*A( I, K ) - 40 CONTINUE - END IF - 50 CONTINUE - 60 CONTINUE - ELSE - DO 100, J = 1, N - IF( ALPHA.NE.ONE )THEN - DO 70, I = 1, M - B( I, J ) = ALPHA*B( I, J ) - 70 CONTINUE - END IF - DO 90 K = 1, M - IF( B( K, J ).NE.ZERO )THEN - IF( NOUNIT ) - $ B( K, J ) = B( K, J )/A( K, K ) - DO 80, I = K + 1, M - B( I, J ) = B( I, J ) - B( K, J )*A( I, K ) - 80 CONTINUE - END IF - 90 CONTINUE - 100 CONTINUE - END IF - ELSE + IF (UPPER) THEN + DO 60 J = 1,N + IF (ALPHA.NE.ONE) THEN + DO 30 I = 1,M + B(I,J) = ALPHA*B(I,J) + 30 CONTINUE + END IF + DO 50 K = M,1,-1 + IF (B(K,J).NE.ZERO) THEN + IF (NOUNIT) B(K,J) = B(K,J)/A(K,K) + DO 40 I = 1,K - 1 + B(I,J) = B(I,J) - B(K,J)*A(I,K) + 40 CONTINUE + END IF + 50 CONTINUE + 60 CONTINUE + ELSE + DO 100 J = 1,N + IF (ALPHA.NE.ONE) THEN + DO 70 I = 1,M + B(I,J) = ALPHA*B(I,J) + 70 CONTINUE + END IF + DO 90 K = 1,M + IF (B(K,J).NE.ZERO) THEN + IF (NOUNIT) B(K,J) = B(K,J)/A(K,K) + DO 80 I = K + 1,M + B(I,J) = B(I,J) - B(K,J)*A(I,K) + 80 CONTINUE + END IF + 90 CONTINUE + 100 CONTINUE + END IF + ELSE * * Form B := alpha*inv( A' )*B. * - IF( UPPER )THEN - DO 130, J = 1, N - DO 120, I = 1, M - TEMP = ALPHA*B( I, J ) - DO 110, K = 1, I - 1 - TEMP = TEMP - A( K, I )*B( K, J ) - 110 CONTINUE - IF( NOUNIT ) - $ TEMP = TEMP/A( I, I ) - B( I, J ) = TEMP - 120 CONTINUE - 130 CONTINUE - ELSE - DO 160, J = 1, N - DO 150, I = M, 1, -1 - TEMP = ALPHA*B( I, J ) - DO 140, K = I + 1, M - TEMP = TEMP - A( K, I )*B( K, J ) - 140 CONTINUE - IF( NOUNIT ) - $ TEMP = TEMP/A( I, I ) - B( I, J ) = TEMP - 150 CONTINUE - 160 CONTINUE - END IF - END IF + IF (UPPER) THEN + DO 130 J = 1,N + DO 120 I = 1,M + TEMP = ALPHA*B(I,J) + DO 110 K = 1,I - 1 + TEMP = TEMP - A(K,I)*B(K,J) + 110 CONTINUE + IF (NOUNIT) TEMP = TEMP/A(I,I) + B(I,J) = TEMP + 120 CONTINUE + 130 CONTINUE + ELSE + DO 160 J = 1,N + DO 150 I = M,1,-1 + TEMP = ALPHA*B(I,J) + DO 140 K = I + 1,M + TEMP = TEMP - A(K,I)*B(K,J) + 140 CONTINUE + IF (NOUNIT) TEMP = TEMP/A(I,I) + B(I,J) = TEMP + 150 CONTINUE + 160 CONTINUE + END IF + END IF ELSE - IF( LSAME( TRANSA, 'N' ) )THEN + IF (LSAME(TRANSA,'N')) THEN * * Form B := alpha*B*inv( A ). * - IF( UPPER )THEN - DO 210, J = 1, N - IF( ALPHA.NE.ONE )THEN - DO 170, I = 1, M - B( I, J ) = ALPHA*B( I, J ) - 170 CONTINUE - END IF - DO 190, K = 1, J - 1 - IF( A( K, J ).NE.ZERO )THEN - DO 180, I = 1, M - B( I, J ) = B( I, J ) - A( K, J )*B( I, K ) - 180 CONTINUE - END IF - 190 CONTINUE - IF( NOUNIT )THEN - TEMP = ONE/A( J, J ) - DO 200, I = 1, M - B( I, J ) = TEMP*B( I, J ) - 200 CONTINUE - END IF - 210 CONTINUE - ELSE - DO 260, J = N, 1, -1 - IF( ALPHA.NE.ONE )THEN - DO 220, I = 1, M - B( I, J ) = ALPHA*B( I, J ) - 220 CONTINUE - END IF - DO 240, K = J + 1, N - IF( A( K, J ).NE.ZERO )THEN - DO 230, I = 1, M - B( I, J ) = B( I, J ) - A( K, J )*B( I, K ) - 230 CONTINUE - END IF - 240 CONTINUE - IF( NOUNIT )THEN - TEMP = ONE/A( J, J ) - DO 250, I = 1, M - B( I, J ) = TEMP*B( I, J ) - 250 CONTINUE - END IF - 260 CONTINUE - END IF - ELSE + IF (UPPER) THEN + DO 210 J = 1,N + IF (ALPHA.NE.ONE) THEN + DO 170 I = 1,M + B(I,J) = ALPHA*B(I,J) + 170 CONTINUE + END IF + DO 190 K = 1,J - 1 + IF (A(K,J).NE.ZERO) THEN + DO 180 I = 1,M + B(I,J) = B(I,J) - A(K,J)*B(I,K) + 180 CONTINUE + END IF + 190 CONTINUE + IF (NOUNIT) THEN + TEMP = ONE/A(J,J) + DO 200 I = 1,M + B(I,J) = TEMP*B(I,J) + 200 CONTINUE + END IF + 210 CONTINUE + ELSE + DO 260 J = N,1,-1 + IF (ALPHA.NE.ONE) THEN + DO 220 I = 1,M + B(I,J) = ALPHA*B(I,J) + 220 CONTINUE + END IF + DO 240 K = J + 1,N + IF (A(K,J).NE.ZERO) THEN + DO 230 I = 1,M + B(I,J) = B(I,J) - A(K,J)*B(I,K) + 230 CONTINUE + END IF + 240 CONTINUE + IF (NOUNIT) THEN + TEMP = ONE/A(J,J) + DO 250 I = 1,M + B(I,J) = TEMP*B(I,J) + 250 CONTINUE + END IF + 260 CONTINUE + END IF + ELSE * * Form B := alpha*B*inv( A' ). * - IF( UPPER )THEN - DO 310, K = N, 1, -1 - IF( NOUNIT )THEN - TEMP = ONE/A( K, K ) - DO 270, I = 1, M - B( I, K ) = TEMP*B( I, K ) - 270 CONTINUE - END IF - DO 290, J = 1, K - 1 - IF( A( J, K ).NE.ZERO )THEN - TEMP = A( J, K ) - DO 280, I = 1, M - B( I, J ) = B( I, J ) - TEMP*B( I, K ) - 280 CONTINUE - END IF - 290 CONTINUE - IF( ALPHA.NE.ONE )THEN - DO 300, I = 1, M - B( I, K ) = ALPHA*B( I, K ) - 300 CONTINUE - END IF - 310 CONTINUE - ELSE - DO 360, K = 1, N - IF( NOUNIT )THEN - TEMP = ONE/A( K, K ) - DO 320, I = 1, M - B( I, K ) = TEMP*B( I, K ) - 320 CONTINUE - END IF - DO 340, J = K + 1, N - IF( A( J, K ).NE.ZERO )THEN - TEMP = A( J, K ) - DO 330, I = 1, M - B( I, J ) = B( I, J ) - TEMP*B( I, K ) - 330 CONTINUE - END IF - 340 CONTINUE - IF( ALPHA.NE.ONE )THEN - DO 350, I = 1, M - B( I, K ) = ALPHA*B( I, K ) - 350 CONTINUE - END IF - 360 CONTINUE - END IF - END IF + IF (UPPER) THEN + DO 310 K = N,1,-1 + IF (NOUNIT) THEN + TEMP = ONE/A(K,K) + DO 270 I = 1,M + B(I,K) = TEMP*B(I,K) + 270 CONTINUE + END IF + DO 290 J = 1,K - 1 + IF (A(J,K).NE.ZERO) THEN + TEMP = A(J,K) + DO 280 I = 1,M + B(I,J) = B(I,J) - TEMP*B(I,K) + 280 CONTINUE + END IF + 290 CONTINUE + IF (ALPHA.NE.ONE) THEN + DO 300 I = 1,M + B(I,K) = ALPHA*B(I,K) + 300 CONTINUE + END IF + 310 CONTINUE + ELSE + DO 360 K = 1,N + IF (NOUNIT) THEN + TEMP = ONE/A(K,K) + DO 320 I = 1,M + B(I,K) = TEMP*B(I,K) + 320 CONTINUE + END IF + DO 340 J = K + 1,N + IF (A(J,K).NE.ZERO) THEN + TEMP = A(J,K) + DO 330 I = 1,M + B(I,J) = B(I,J) - TEMP*B(I,K) + 330 CONTINUE + END IF + 340 CONTINUE + IF (ALPHA.NE.ONE) THEN + DO 350 I = 1,M + B(I,K) = ALPHA*B(I,K) + 350 CONTINUE + END IF + 360 CONTINUE + END IF + END IF END IF * RETURN diff -r 45f5faba05a2 -r 82be108cc558 libcruft/blas/strsv.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/blas/strsv.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,281 @@ + SUBROUTINE STRSV(UPLO,TRANS,DIAG,N,A,LDA,X,INCX) +* .. Scalar Arguments .. + INTEGER INCX,LDA,N + CHARACTER DIAG,TRANS,UPLO +* .. +* .. Array Arguments .. + REAL A(LDA,*),X(*) +* .. +* +* Purpose +* ======= +* +* STRSV solves one of the systems of equations +* +* A*x = b, or A'*x = b, +* +* where b and x are n element vectors and A is an n by n unit, or +* non-unit, upper or lower triangular matrix. +* +* No test for singularity or near-singularity is included in this +* routine. Such tests must be performed before calling this routine. +* +* Arguments +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the equations to be solved as +* follows: +* +* TRANS = 'N' or 'n' A*x = b. +* +* TRANS = 'T' or 't' A'*x = b. +* +* TRANS = 'C' or 'c' A'*x = b. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* A - REAL array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular matrix and the strictly lower triangular part of +* A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular matrix and the strictly upper triangular part of +* A is not referenced. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced either, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* X - REAL array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element right-hand side vector b. On exit, X is overwritten +* with the solution vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + REAL ZERO + PARAMETER (ZERO=0.0E+0) +* .. +* .. Local Scalars .. + REAL TEMP + INTEGER I,INFO,IX,J,JX,KX + LOGICAL NOUNIT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* +* Test the input parameters. +* + INFO = 0 + IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN + INFO = 1 + ELSE IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND. + + .NOT.LSAME(TRANS,'C')) THEN + INFO = 2 + ELSE IF (.NOT.LSAME(DIAG,'U') .AND. .NOT.LSAME(DIAG,'N')) THEN + INFO = 3 + ELSE IF (N.LT.0) THEN + INFO = 4 + ELSE IF (LDA.LT.MAX(1,N)) THEN + INFO = 6 + ELSE IF (INCX.EQ.0) THEN + INFO = 8 + END IF + IF (INFO.NE.0) THEN + CALL XERBLA('STRSV ',INFO) + RETURN + END IF +* +* Quick return if possible. +* + IF (N.EQ.0) RETURN +* + NOUNIT = LSAME(DIAG,'N') +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF (INCX.LE.0) THEN + KX = 1 - (N-1)*INCX + ELSE IF (INCX.NE.1) THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* + IF (LSAME(TRANS,'N')) THEN +* +* Form x := inv( A )*x. +* + IF (LSAME(UPLO,'U')) THEN + IF (INCX.EQ.1) THEN + DO 20 J = N,1,-1 + IF (X(J).NE.ZERO) THEN + IF (NOUNIT) X(J) = X(J)/A(J,J) + TEMP = X(J) + DO 10 I = J - 1,1,-1 + X(I) = X(I) - TEMP*A(I,J) + 10 CONTINUE + END IF + 20 CONTINUE + ELSE + JX = KX + (N-1)*INCX + DO 40 J = N,1,-1 + IF (X(JX).NE.ZERO) THEN + IF (NOUNIT) X(JX) = X(JX)/A(J,J) + TEMP = X(JX) + IX = JX + DO 30 I = J - 1,1,-1 + IX = IX - INCX + X(IX) = X(IX) - TEMP*A(I,J) + 30 CONTINUE + END IF + JX = JX - INCX + 40 CONTINUE + END IF + ELSE + IF (INCX.EQ.1) THEN + DO 60 J = 1,N + IF (X(J).NE.ZERO) THEN + IF (NOUNIT) X(J) = X(J)/A(J,J) + TEMP = X(J) + DO 50 I = J + 1,N + X(I) = X(I) - TEMP*A(I,J) + 50 CONTINUE + END IF + 60 CONTINUE + ELSE + JX = KX + DO 80 J = 1,N + IF (X(JX).NE.ZERO) THEN + IF (NOUNIT) X(JX) = X(JX)/A(J,J) + TEMP = X(JX) + IX = JX + DO 70 I = J + 1,N + IX = IX + INCX + X(IX) = X(IX) - TEMP*A(I,J) + 70 CONTINUE + END IF + JX = JX + INCX + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := inv( A' )*x. +* + IF (LSAME(UPLO,'U')) THEN + IF (INCX.EQ.1) THEN + DO 100 J = 1,N + TEMP = X(J) + DO 90 I = 1,J - 1 + TEMP = TEMP - A(I,J)*X(I) + 90 CONTINUE + IF (NOUNIT) TEMP = TEMP/A(J,J) + X(J) = TEMP + 100 CONTINUE + ELSE + JX = KX + DO 120 J = 1,N + TEMP = X(JX) + IX = KX + DO 110 I = 1,J - 1 + TEMP = TEMP - A(I,J)*X(IX) + IX = IX + INCX + 110 CONTINUE + IF (NOUNIT) TEMP = TEMP/A(J,J) + X(JX) = TEMP + JX = JX + INCX + 120 CONTINUE + END IF + ELSE + IF (INCX.EQ.1) THEN + DO 140 J = N,1,-1 + TEMP = X(J) + DO 130 I = N,J + 1,-1 + TEMP = TEMP - A(I,J)*X(I) + 130 CONTINUE + IF (NOUNIT) TEMP = TEMP/A(J,J) + X(J) = TEMP + 140 CONTINUE + ELSE + KX = KX + (N-1)*INCX + JX = KX + DO 160 J = N,1,-1 + TEMP = X(JX) + IX = KX + DO 150 I = N,J + 1,-1 + TEMP = TEMP - A(I,J)*X(IX) + IX = IX - INCX + 150 CONTINUE + IF (NOUNIT) TEMP = TEMP/A(J,J) + X(JX) = TEMP + JX = JX - INCX + 160 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of STRSV . +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/fftpack/Makefile.in --- a/libcruft/fftpack/Makefile.in Wed May 14 18:09:56 2008 +0200 +++ b/libcruft/fftpack/Makefile.in Sun Apr 27 22:34:17 2008 +0200 @@ -28,7 +28,9 @@ FSRC = cfftb.f cfftb1.f cfftf.f cfftf1.f cffti.f cffti1.f passb.f \ passb2.f passb3.f passb4.f passb5.f passf.f passf2.f passf3.f \ - passf4.f passf5.f + passf4.f passf5.f zfftb.f zfftb1.f zfftf.f zfftf1.f zffti.f zffti1.f \ + zpassb.f zpassb2.f zpassb3.f zpassb4.f zpassb5.f zpassf.f zpassf2.f \ + zpassf3.f zpassf4.f zpassf5.f include $(TOPDIR)/Makeconf diff -r 45f5faba05a2 -r 82be108cc558 libcruft/fftpack/cfftb.f --- a/libcruft/fftpack/cfftb.f Wed May 14 18:09:56 2008 +0200 +++ b/libcruft/fftpack/cfftb.f Sun Apr 27 22:34:17 2008 +0200 @@ -1,5 +1,4 @@ subroutine cfftb (n,c,wsave) - implicit double precision (a-h,o-z) dimension c(*) ,wsave(*) if (n .eq. 1) return iw1 = n+n+1 diff -r 45f5faba05a2 -r 82be108cc558 libcruft/fftpack/cfftb1.f --- a/libcruft/fftpack/cfftb1.f Wed May 14 18:09:56 2008 +0200 +++ b/libcruft/fftpack/cfftb1.f Sun Apr 27 22:34:17 2008 +0200 @@ -1,5 +1,4 @@ subroutine cfftb1 (n,c,ch,wa,ifac) - implicit double precision (a-h,o-z) dimension ch(*) ,c(*) ,wa(*) ,ifac(*) nf = ifac(2) na = 0 diff -r 45f5faba05a2 -r 82be108cc558 libcruft/fftpack/cfftf.f --- a/libcruft/fftpack/cfftf.f Wed May 14 18:09:56 2008 +0200 +++ b/libcruft/fftpack/cfftf.f Sun Apr 27 22:34:17 2008 +0200 @@ -1,5 +1,4 @@ subroutine cfftf (n,c,wsave) - implicit double precision (a-h,o-z) dimension c(*) ,wsave(*) if (n .eq. 1) return iw1 = n+n+1 diff -r 45f5faba05a2 -r 82be108cc558 libcruft/fftpack/cfftf1.f --- a/libcruft/fftpack/cfftf1.f Wed May 14 18:09:56 2008 +0200 +++ b/libcruft/fftpack/cfftf1.f Sun Apr 27 22:34:17 2008 +0200 @@ -1,5 +1,4 @@ subroutine cfftf1 (n,c,ch,wa,ifac) - implicit double precision (a-h,o-z) dimension ch(*) ,c(*) ,wa(*) ,ifac(*) nf = ifac(2) na = 0 diff -r 45f5faba05a2 -r 82be108cc558 libcruft/fftpack/cffti.f --- a/libcruft/fftpack/cffti.f Wed May 14 18:09:56 2008 +0200 +++ b/libcruft/fftpack/cffti.f Sun Apr 27 22:34:17 2008 +0200 @@ -1,5 +1,4 @@ subroutine cffti (n,wsave) - implicit double precision (a-h,o-z) dimension wsave(*) if (n .eq. 1) return iw1 = n+n+1 diff -r 45f5faba05a2 -r 82be108cc558 libcruft/fftpack/cffti1.f --- a/libcruft/fftpack/cffti1.f Wed May 14 18:09:56 2008 +0200 +++ b/libcruft/fftpack/cffti1.f Sun Apr 27 22:34:17 2008 +0200 @@ -1,5 +1,4 @@ subroutine cffti1 (n,wa,ifac) - implicit double precision (a-h,o-z) dimension wa(*) ,ifac(*) ,ntryh(4) data ntryh(1),ntryh(2),ntryh(3),ntryh(4)/3,4,2,5/ nl = n @@ -26,7 +25,7 @@ 107 if (nl .ne. 1) go to 104 ifac(1) = n ifac(2) = nf - tpi = 6.28318530717959d0 + tpi = 6.28318530717959 argh = tpi/dble(n) i = 2 l1 = 1 diff -r 45f5faba05a2 -r 82be108cc558 libcruft/fftpack/passb.f --- a/libcruft/fftpack/passb.f Wed May 14 18:09:56 2008 +0200 +++ b/libcruft/fftpack/passb.f Sun Apr 27 22:34:17 2008 +0200 @@ -1,5 +1,4 @@ subroutine passb (nac,ido,ip,l1,idl1,cc,c1,c2,ch,ch2,wa) - implicit double precision (a-h,o-z) dimension ch(ido,l1,ip) ,cc(ido,ip,l1) , 1 c1(ido,l1,ip) ,wa(1) ,c2(idl1,ip), 2 ch2(idl1,ip) diff -r 45f5faba05a2 -r 82be108cc558 libcruft/fftpack/passb2.f --- a/libcruft/fftpack/passb2.f Wed May 14 18:09:56 2008 +0200 +++ b/libcruft/fftpack/passb2.f Sun Apr 27 22:34:17 2008 +0200 @@ -1,5 +1,4 @@ subroutine passb2 (ido,l1,cc,ch,wa1) - implicit double precision (a-h,o-z) dimension cc(ido,2,l1) ,ch(ido,l1,2) , 1 wa1(1) if (ido .gt. 2) go to 102 diff -r 45f5faba05a2 -r 82be108cc558 libcruft/fftpack/passb3.f --- a/libcruft/fftpack/passb3.f Wed May 14 18:09:56 2008 +0200 +++ b/libcruft/fftpack/passb3.f Sun Apr 27 22:34:17 2008 +0200 @@ -1,8 +1,7 @@ subroutine passb3 (ido,l1,cc,ch,wa1,wa2) - implicit double precision (a-h,o-z) dimension cc(ido,3,l1) ,ch(ido,l1,3) , 1 wa1(1) ,wa2(1) - data taur,taui /-.5,.866025403784439d0/ + data taur,taui /-.5,.866025403784439/ if (ido .ne. 2) go to 102 do 101 k=1,l1 tr2 = cc(1,2,k)+cc(1,3,k) diff -r 45f5faba05a2 -r 82be108cc558 libcruft/fftpack/passb4.f --- a/libcruft/fftpack/passb4.f Wed May 14 18:09:56 2008 +0200 +++ b/libcruft/fftpack/passb4.f Sun Apr 27 22:34:17 2008 +0200 @@ -1,5 +1,4 @@ subroutine passb4 (ido,l1,cc,ch,wa1,wa2,wa3) - implicit double precision (a-h,o-z) dimension cc(ido,4,l1) ,ch(ido,l1,4) , 1 wa1(1) ,wa2(1) ,wa3(1) if (ido .ne. 2) go to 102 diff -r 45f5faba05a2 -r 82be108cc558 libcruft/fftpack/passb5.f --- a/libcruft/fftpack/passb5.f Wed May 14 18:09:56 2008 +0200 +++ b/libcruft/fftpack/passb5.f Sun Apr 27 22:34:17 2008 +0200 @@ -1,9 +1,8 @@ subroutine passb5 (ido,l1,cc,ch,wa1,wa2,wa3,wa4) - implicit double precision (a-h,o-z) dimension cc(ido,5,l1) ,ch(ido,l1,5) , 1 wa1(1) ,wa2(1) ,wa3(1) ,wa4(1) - data tr11,ti11,tr12,ti12 /.309016994374947d0,.951056516295154d0, - 1-.809016994374947d0,.587785252292473d0/ + data tr11,ti11,tr12,ti12 /.309016994374947,.951056516295154, + 1-.809016994374947,.587785252292473/ if (ido .ne. 2) go to 102 do 101 k=1,l1 ti5 = cc(2,2,k)-cc(2,5,k) diff -r 45f5faba05a2 -r 82be108cc558 libcruft/fftpack/passf.f --- a/libcruft/fftpack/passf.f Wed May 14 18:09:56 2008 +0200 +++ b/libcruft/fftpack/passf.f Sun Apr 27 22:34:17 2008 +0200 @@ -1,5 +1,4 @@ subroutine passf (nac,ido,ip,l1,idl1,cc,c1,c2,ch,ch2,wa) - implicit double precision (a-h,o-z) dimension ch(ido,l1,ip) ,cc(ido,ip,l1) , 1 c1(ido,l1,ip) ,wa(1) ,c2(idl1,ip), 2 ch2(idl1,ip) diff -r 45f5faba05a2 -r 82be108cc558 libcruft/fftpack/passf2.f --- a/libcruft/fftpack/passf2.f Wed May 14 18:09:56 2008 +0200 +++ b/libcruft/fftpack/passf2.f Sun Apr 27 22:34:17 2008 +0200 @@ -1,5 +1,4 @@ subroutine passf2 (ido,l1,cc,ch,wa1) - implicit double precision (a-h,o-z) dimension cc(ido,2,l1) ,ch(ido,l1,2) , 1 wa1(1) if (ido .gt. 2) go to 102 diff -r 45f5faba05a2 -r 82be108cc558 libcruft/fftpack/passf3.f --- a/libcruft/fftpack/passf3.f Wed May 14 18:09:56 2008 +0200 +++ b/libcruft/fftpack/passf3.f Sun Apr 27 22:34:17 2008 +0200 @@ -1,8 +1,7 @@ subroutine passf3 (ido,l1,cc,ch,wa1,wa2) - implicit double precision (a-h,o-z) dimension cc(ido,3,l1) ,ch(ido,l1,3) , 1 wa1(1) ,wa2(1) - data taur,taui /-.5d0,-.866025403784439d0/ + data taur,taui /-.5,-.866025403784439/ if (ido .ne. 2) go to 102 do 101 k=1,l1 tr2 = cc(1,2,k)+cc(1,3,k) diff -r 45f5faba05a2 -r 82be108cc558 libcruft/fftpack/passf4.f --- a/libcruft/fftpack/passf4.f Wed May 14 18:09:56 2008 +0200 +++ b/libcruft/fftpack/passf4.f Sun Apr 27 22:34:17 2008 +0200 @@ -1,5 +1,4 @@ subroutine passf4 (ido,l1,cc,ch,wa1,wa2,wa3) - implicit double precision (a-h,o-z) dimension cc(ido,4,l1) ,ch(ido,l1,4) , 1 wa1(1) ,wa2(1) ,wa3(1) if (ido .ne. 2) go to 102 diff -r 45f5faba05a2 -r 82be108cc558 libcruft/fftpack/passf5.f --- a/libcruft/fftpack/passf5.f Wed May 14 18:09:56 2008 +0200 +++ b/libcruft/fftpack/passf5.f Sun Apr 27 22:34:17 2008 +0200 @@ -1,9 +1,8 @@ subroutine passf5 (ido,l1,cc,ch,wa1,wa2,wa3,wa4) - implicit double precision (a-h,o-z) dimension cc(ido,5,l1) ,ch(ido,l1,5) , 1 wa1(1) ,wa2(1) ,wa3(1) ,wa4(1) - data tr11,ti11,tr12,ti12 /.309016994374947d0,-.951056516295154d0, - 1-.809016994374947d0,-.587785252292473d0/ + data tr11,ti11,tr12,ti12 /.309016994374947,-.951056516295154, + 1-.809016994374947,-.587785252292473/ if (ido .ne. 2) go to 102 do 101 k=1,l1 ti5 = cc(2,2,k)-cc(2,5,k) diff -r 45f5faba05a2 -r 82be108cc558 libcruft/fftpack/zfftb.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/fftpack/zfftb.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,9 @@ + subroutine zfftb (n,c,wsave) + implicit double precision (a-h,o-z) + dimension c(*) ,wsave(*) + if (n .eq. 1) return + iw1 = n+n+1 + iw2 = iw1+n+n + call zfftb1 (n,c,wsave,wsave(iw1),wsave(iw2)) + return + end diff -r 45f5faba05a2 -r 82be108cc558 libcruft/fftpack/zfftb1.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/fftpack/zfftb1.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,62 @@ + subroutine zfftb1 (n,c,ch,wa,ifac) + implicit double precision (a-h,o-z) + dimension ch(*) ,c(*) ,wa(*) ,ifac(*) + nf = ifac(2) + na = 0 + l1 = 1 + iw = 1 + do 116 k1=1,nf + ip = ifac(k1+2) + l2 = ip*l1 + ido = n/l2 + idot = ido+ido + idl1 = idot*l1 + if (ip .ne. 4) go to 103 + ix2 = iw+idot + ix3 = ix2+idot + if (na .ne. 0) go to 101 + call zpassb4 (idot,l1,c,ch,wa(iw),wa(ix2),wa(ix3)) + go to 102 + 101 call zpassb4 (idot,l1,ch,c,wa(iw),wa(ix2),wa(ix3)) + 102 na = 1-na + go to 115 + 103 if (ip .ne. 2) go to 106 + if (na .ne. 0) go to 104 + call zpassb2 (idot,l1,c,ch,wa(iw)) + go to 105 + 104 call zpassb2 (idot,l1,ch,c,wa(iw)) + 105 na = 1-na + go to 115 + 106 if (ip .ne. 3) go to 109 + ix2 = iw+idot + if (na .ne. 0) go to 107 + call zpassb3 (idot,l1,c,ch,wa(iw),wa(ix2)) + go to 108 + 107 call zpassb3 (idot,l1,ch,c,wa(iw),wa(ix2)) + 108 na = 1-na + go to 115 + 109 if (ip .ne. 5) go to 112 + ix2 = iw+idot + ix3 = ix2+idot + ix4 = ix3+idot + if (na .ne. 0) go to 110 + call zpassb5 (idot,l1,c,ch,wa(iw),wa(ix2),wa(ix3),wa(ix4)) + go to 111 + 110 call zpassb5 (idot,l1,ch,c,wa(iw),wa(ix2),wa(ix3),wa(ix4)) + 111 na = 1-na + go to 115 + 112 if (na .ne. 0) go to 113 + call zpassb (nac,idot,ip,l1,idl1,c,c,c,ch,ch,wa(iw)) + go to 114 + 113 call zpassb (nac,idot,ip,l1,idl1,ch,ch,ch,c,c,wa(iw)) + 114 if (nac .ne. 0) na = 1-na + 115 l1 = l2 + iw = iw+(ip-1)*idot + 116 continue + if (na .eq. 0) return + n2 = n+n + do 117 i=1,n2 + c(i) = ch(i) + 117 continue + return + end diff -r 45f5faba05a2 -r 82be108cc558 libcruft/fftpack/zfftf.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/fftpack/zfftf.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,9 @@ + subroutine zfftf (n,c,wsave) + implicit double precision (a-h,o-z) + dimension c(*) ,wsave(*) + if (n .eq. 1) return + iw1 = n+n+1 + iw2 = iw1+n+n + call zfftf1 (n,c,wsave,wsave(iw1),wsave(iw2)) + return + end diff -r 45f5faba05a2 -r 82be108cc558 libcruft/fftpack/zfftf1.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/fftpack/zfftf1.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,62 @@ + subroutine zfftf1 (n,c,ch,wa,ifac) + implicit double precision (a-h,o-z) + dimension ch(*) ,c(*) ,wa(*) ,ifac(*) + nf = ifac(2) + na = 0 + l1 = 1 + iw = 1 + do 116 k1=1,nf + ip = ifac(k1+2) + l2 = ip*l1 + ido = n/l2 + idot = ido+ido + idl1 = idot*l1 + if (ip .ne. 4) go to 103 + ix2 = iw+idot + ix3 = ix2+idot + if (na .ne. 0) go to 101 + call zpassf4 (idot,l1,c,ch,wa(iw),wa(ix2),wa(ix3)) + go to 102 + 101 call zpassf4 (idot,l1,ch,c,wa(iw),wa(ix2),wa(ix3)) + 102 na = 1-na + go to 115 + 103 if (ip .ne. 2) go to 106 + if (na .ne. 0) go to 104 + call zpassf2 (idot,l1,c,ch,wa(iw)) + go to 105 + 104 call zpassf2 (idot,l1,ch,c,wa(iw)) + 105 na = 1-na + go to 115 + 106 if (ip .ne. 3) go to 109 + ix2 = iw+idot + if (na .ne. 0) go to 107 + call zpassf3 (idot,l1,c,ch,wa(iw),wa(ix2)) + go to 108 + 107 call zpassf3 (idot,l1,ch,c,wa(iw),wa(ix2)) + 108 na = 1-na + go to 115 + 109 if (ip .ne. 5) go to 112 + ix2 = iw+idot + ix3 = ix2+idot + ix4 = ix3+idot + if (na .ne. 0) go to 110 + call zpassf5 (idot,l1,c,ch,wa(iw),wa(ix2),wa(ix3),wa(ix4)) + go to 111 + 110 call zpassf5 (idot,l1,ch,c,wa(iw),wa(ix2),wa(ix3),wa(ix4)) + 111 na = 1-na + go to 115 + 112 if (na .ne. 0) go to 113 + call zpassf (nac,idot,ip,l1,idl1,c,c,c,ch,ch,wa(iw)) + go to 114 + 113 call zpassf (nac,idot,ip,l1,idl1,ch,ch,ch,c,c,wa(iw)) + 114 if (nac .ne. 0) na = 1-na + 115 l1 = l2 + iw = iw+(ip-1)*idot + 116 continue + if (na .eq. 0) return + n2 = n+n + do 117 i=1,n2 + c(i) = ch(i) + 117 continue + return + end diff -r 45f5faba05a2 -r 82be108cc558 libcruft/fftpack/zffti.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/fftpack/zffti.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,9 @@ + subroutine zffti (n,wsave) + implicit double precision (a-h,o-z) + dimension wsave(*) + if (n .eq. 1) return + iw1 = n+n+1 + iw2 = iw1+n+n + call zffti1 (n,wsave(iw1),wsave(iw2)) + return + end diff -r 45f5faba05a2 -r 82be108cc558 libcruft/fftpack/zffti1.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/fftpack/zffti1.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,61 @@ + subroutine zffti1 (n,wa,ifac) + implicit double precision (a-h,o-z) + dimension wa(*) ,ifac(*) ,ntryh(4) + data ntryh(1),ntryh(2),ntryh(3),ntryh(4)/3,4,2,5/ + nl = n + nf = 0 + j = 0 + 101 j = j+1 + if (j-4) 102,102,103 + 102 ntry = ntryh(j) + go to 104 + 103 ntry = ntry+2 + 104 nq = nl/ntry + nr = nl-ntry*nq + if (nr) 101,105,101 + 105 nf = nf+1 + ifac(nf+2) = ntry + nl = nq + if (ntry .ne. 2) go to 107 + if (nf .eq. 1) go to 107 + do 106 i=2,nf + ib = nf-i+2 + ifac(ib+2) = ifac(ib+1) + 106 continue + ifac(3) = 2 + 107 if (nl .ne. 1) go to 104 + ifac(1) = n + ifac(2) = nf + tpi = 6.28318530717959d0 + argh = tpi/dble(n) + i = 2 + l1 = 1 + do 110 k1=1,nf + ip = ifac(k1+2) + ld = 0 + l2 = l1*ip + ido = n/l2 + idot = ido+ido+2 + ipm = ip-1 + do 109 j=1,ipm + i1 = i + wa(i-1) = 1. + wa(i) = 0. + ld = ld+l1 + fi = 0. + argld = dble(ld)*argh + do 108 ii=4,idot,2 + i = i+2 + fi = fi+1. + arg = fi*argld + wa(i-1) = cos(arg) + wa(i) = sin(arg) + 108 continue + if (ip .le. 5) go to 109 + wa(i1-1) = wa(i-1) + wa(i1) = wa(i) + 109 continue + l1 = l2 + 110 continue + return + end diff -r 45f5faba05a2 -r 82be108cc558 libcruft/fftpack/zpassb.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/fftpack/zpassb.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,117 @@ + subroutine zpassb (nac,ido,ip,l1,idl1,cc,c1,c2,ch,ch2,wa) + implicit double precision (a-h,o-z) + dimension ch(ido,l1,ip) ,cc(ido,ip,l1) , + 1 c1(ido,l1,ip) ,wa(1) ,c2(idl1,ip), + 2 ch2(idl1,ip) + idot = ido/2 + nt = ip*idl1 + ipp2 = ip+2 + ipph = (ip+1)/2 + idp = ip*ido +c + if (ido .lt. l1) go to 106 + do 103 j=2,ipph + jc = ipp2-j + do 102 k=1,l1 + do 101 i=1,ido + ch(i,k,j) = cc(i,j,k)+cc(i,jc,k) + ch(i,k,jc) = cc(i,j,k)-cc(i,jc,k) + 101 continue + 102 continue + 103 continue + do 105 k=1,l1 + do 104 i=1,ido + ch(i,k,1) = cc(i,1,k) + 104 continue + 105 continue + go to 112 + 106 do 109 j=2,ipph + jc = ipp2-j + do 108 i=1,ido + do 107 k=1,l1 + ch(i,k,j) = cc(i,j,k)+cc(i,jc,k) + ch(i,k,jc) = cc(i,j,k)-cc(i,jc,k) + 107 continue + 108 continue + 109 continue + do 111 i=1,ido + do 110 k=1,l1 + ch(i,k,1) = cc(i,1,k) + 110 continue + 111 continue + 112 idl = 2-ido + inc = 0 + do 116 l=2,ipph + lc = ipp2-l + idl = idl+ido + do 113 ik=1,idl1 + c2(ik,l) = ch2(ik,1)+wa(idl-1)*ch2(ik,2) + c2(ik,lc) = wa(idl)*ch2(ik,ip) + 113 continue + idlj = idl + inc = inc+ido + do 115 j=3,ipph + jc = ipp2-j + idlj = idlj+inc + if (idlj .gt. idp) idlj = idlj-idp + war = wa(idlj-1) + wai = wa(idlj) + do 114 ik=1,idl1 + c2(ik,l) = c2(ik,l)+war*ch2(ik,j) + c2(ik,lc) = c2(ik,lc)+wai*ch2(ik,jc) + 114 continue + 115 continue + 116 continue + do 118 j=2,ipph + do 117 ik=1,idl1 + ch2(ik,1) = ch2(ik,1)+ch2(ik,j) + 117 continue + 118 continue + do 120 j=2,ipph + jc = ipp2-j + do 119 ik=2,idl1,2 + ch2(ik-1,j) = c2(ik-1,j)-c2(ik,jc) + ch2(ik-1,jc) = c2(ik-1,j)+c2(ik,jc) + ch2(ik,j) = c2(ik,j)+c2(ik-1,jc) + ch2(ik,jc) = c2(ik,j)-c2(ik-1,jc) + 119 continue + 120 continue + nac = 1 + if (ido .eq. 2) return + nac = 0 + do 121 ik=1,idl1 + c2(ik,1) = ch2(ik,1) + 121 continue + do 123 j=2,ip + do 122 k=1,l1 + c1(1,k,j) = ch(1,k,j) + c1(2,k,j) = ch(2,k,j) + 122 continue + 123 continue + if (idot .gt. l1) go to 127 + idij = 0 + do 126 j=2,ip + idij = idij+2 + do 125 i=4,ido,2 + idij = idij+2 + do 124 k=1,l1 + c1(i-1,k,j) = wa(idij-1)*ch(i-1,k,j)-wa(idij)*ch(i,k,j) + c1(i,k,j) = wa(idij-1)*ch(i,k,j)+wa(idij)*ch(i-1,k,j) + 124 continue + 125 continue + 126 continue + return + 127 idj = 2-ido + do 130 j=2,ip + idj = idj+ido + do 129 k=1,l1 + idij = idj + do 128 i=4,ido,2 + idij = idij+2 + c1(i-1,k,j) = wa(idij-1)*ch(i-1,k,j)-wa(idij)*ch(i,k,j) + c1(i,k,j) = wa(idij-1)*ch(i,k,j)+wa(idij)*ch(i-1,k,j) + 128 continue + 129 continue + 130 continue + return + end diff -r 45f5faba05a2 -r 82be108cc558 libcruft/fftpack/zpassb2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/fftpack/zpassb2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,24 @@ + subroutine zpassb2 (ido,l1,cc,ch,wa1) + implicit double precision (a-h,o-z) + dimension cc(ido,2,l1) ,ch(ido,l1,2) , + 1 wa1(1) + if (ido .gt. 2) go to 102 + do 101 k=1,l1 + ch(1,k,1) = cc(1,1,k)+cc(1,2,k) + ch(1,k,2) = cc(1,1,k)-cc(1,2,k) + ch(2,k,1) = cc(2,1,k)+cc(2,2,k) + ch(2,k,2) = cc(2,1,k)-cc(2,2,k) + 101 continue + return + 102 do 104 k=1,l1 + do 103 i=2,ido,2 + ch(i-1,k,1) = cc(i-1,1,k)+cc(i-1,2,k) + tr2 = cc(i-1,1,k)-cc(i-1,2,k) + ch(i,k,1) = cc(i,1,k)+cc(i,2,k) + ti2 = cc(i,1,k)-cc(i,2,k) + ch(i,k,2) = wa1(i-1)*ti2+wa1(i)*tr2 + ch(i-1,k,2) = wa1(i-1)*tr2-wa1(i)*ti2 + 103 continue + 104 continue + return + end diff -r 45f5faba05a2 -r 82be108cc558 libcruft/fftpack/zpassb3.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/fftpack/zpassb3.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,43 @@ + subroutine zpassb3 (ido,l1,cc,ch,wa1,wa2) + implicit double precision (a-h,o-z) + dimension cc(ido,3,l1) ,ch(ido,l1,3) , + 1 wa1(1) ,wa2(1) + data taur,taui /-.5,.866025403784439d0/ + if (ido .ne. 2) go to 102 + do 101 k=1,l1 + tr2 = cc(1,2,k)+cc(1,3,k) + cr2 = cc(1,1,k)+taur*tr2 + ch(1,k,1) = cc(1,1,k)+tr2 + ti2 = cc(2,2,k)+cc(2,3,k) + ci2 = cc(2,1,k)+taur*ti2 + ch(2,k,1) = cc(2,1,k)+ti2 + cr3 = taui*(cc(1,2,k)-cc(1,3,k)) + ci3 = taui*(cc(2,2,k)-cc(2,3,k)) + ch(1,k,2) = cr2-ci3 + ch(1,k,3) = cr2+ci3 + ch(2,k,2) = ci2+cr3 + ch(2,k,3) = ci2-cr3 + 101 continue + return + 102 do 104 k=1,l1 + do 103 i=2,ido,2 + tr2 = cc(i-1,2,k)+cc(i-1,3,k) + cr2 = cc(i-1,1,k)+taur*tr2 + ch(i-1,k,1) = cc(i-1,1,k)+tr2 + ti2 = cc(i,2,k)+cc(i,3,k) + ci2 = cc(i,1,k)+taur*ti2 + ch(i,k,1) = cc(i,1,k)+ti2 + cr3 = taui*(cc(i-1,2,k)-cc(i-1,3,k)) + ci3 = taui*(cc(i,2,k)-cc(i,3,k)) + dr2 = cr2-ci3 + dr3 = cr2+ci3 + di2 = ci2+cr3 + di3 = ci2-cr3 + ch(i,k,2) = wa1(i-1)*di2+wa1(i)*dr2 + ch(i-1,k,2) = wa1(i-1)*dr2-wa1(i)*di2 + ch(i,k,3) = wa2(i-1)*di3+wa2(i)*dr3 + ch(i-1,k,3) = wa2(i-1)*dr3-wa2(i)*di3 + 103 continue + 104 continue + return + end diff -r 45f5faba05a2 -r 82be108cc558 libcruft/fftpack/zpassb4.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/fftpack/zpassb4.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,52 @@ + subroutine zpassb4 (ido,l1,cc,ch,wa1,wa2,wa3) + implicit double precision (a-h,o-z) + dimension cc(ido,4,l1) ,ch(ido,l1,4) , + 1 wa1(1) ,wa2(1) ,wa3(1) + if (ido .ne. 2) go to 102 + do 101 k=1,l1 + ti1 = cc(2,1,k)-cc(2,3,k) + ti2 = cc(2,1,k)+cc(2,3,k) + tr4 = cc(2,4,k)-cc(2,2,k) + ti3 = cc(2,2,k)+cc(2,4,k) + tr1 = cc(1,1,k)-cc(1,3,k) + tr2 = cc(1,1,k)+cc(1,3,k) + ti4 = cc(1,2,k)-cc(1,4,k) + tr3 = cc(1,2,k)+cc(1,4,k) + ch(1,k,1) = tr2+tr3 + ch(1,k,3) = tr2-tr3 + ch(2,k,1) = ti2+ti3 + ch(2,k,3) = ti2-ti3 + ch(1,k,2) = tr1+tr4 + ch(1,k,4) = tr1-tr4 + ch(2,k,2) = ti1+ti4 + ch(2,k,4) = ti1-ti4 + 101 continue + return + 102 do 104 k=1,l1 + do 103 i=2,ido,2 + ti1 = cc(i,1,k)-cc(i,3,k) + ti2 = cc(i,1,k)+cc(i,3,k) + ti3 = cc(i,2,k)+cc(i,4,k) + tr4 = cc(i,4,k)-cc(i,2,k) + tr1 = cc(i-1,1,k)-cc(i-1,3,k) + tr2 = cc(i-1,1,k)+cc(i-1,3,k) + ti4 = cc(i-1,2,k)-cc(i-1,4,k) + tr3 = cc(i-1,2,k)+cc(i-1,4,k) + ch(i-1,k,1) = tr2+tr3 + cr3 = tr2-tr3 + ch(i,k,1) = ti2+ti3 + ci3 = ti2-ti3 + cr2 = tr1+tr4 + cr4 = tr1-tr4 + ci2 = ti1+ti4 + ci4 = ti1-ti4 + ch(i-1,k,2) = wa1(i-1)*cr2-wa1(i)*ci2 + ch(i,k,2) = wa1(i-1)*ci2+wa1(i)*cr2 + ch(i-1,k,3) = wa2(i-1)*cr3-wa2(i)*ci3 + ch(i,k,3) = wa2(i-1)*ci3+wa2(i)*cr3 + ch(i-1,k,4) = wa3(i-1)*cr4-wa3(i)*ci4 + ch(i,k,4) = wa3(i-1)*ci4+wa3(i)*cr4 + 103 continue + 104 continue + return + end diff -r 45f5faba05a2 -r 82be108cc558 libcruft/fftpack/zpassb5.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/fftpack/zpassb5.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,76 @@ + subroutine zpassb5 (ido,l1,cc,ch,wa1,wa2,wa3,wa4) + implicit double precision (a-h,o-z) + dimension cc(ido,5,l1) ,ch(ido,l1,5) , + 1 wa1(1) ,wa2(1) ,wa3(1) ,wa4(1) + data tr11,ti11,tr12,ti12 /.309016994374947d0,.951056516295154d0, + 1-.809016994374947d0,.587785252292473d0/ + if (ido .ne. 2) go to 102 + do 101 k=1,l1 + ti5 = cc(2,2,k)-cc(2,5,k) + ti2 = cc(2,2,k)+cc(2,5,k) + ti4 = cc(2,3,k)-cc(2,4,k) + ti3 = cc(2,3,k)+cc(2,4,k) + tr5 = cc(1,2,k)-cc(1,5,k) + tr2 = cc(1,2,k)+cc(1,5,k) + tr4 = cc(1,3,k)-cc(1,4,k) + tr3 = cc(1,3,k)+cc(1,4,k) + ch(1,k,1) = cc(1,1,k)+tr2+tr3 + ch(2,k,1) = cc(2,1,k)+ti2+ti3 + cr2 = cc(1,1,k)+tr11*tr2+tr12*tr3 + ci2 = cc(2,1,k)+tr11*ti2+tr12*ti3 + cr3 = cc(1,1,k)+tr12*tr2+tr11*tr3 + ci3 = cc(2,1,k)+tr12*ti2+tr11*ti3 + cr5 = ti11*tr5+ti12*tr4 + ci5 = ti11*ti5+ti12*ti4 + cr4 = ti12*tr5-ti11*tr4 + ci4 = ti12*ti5-ti11*ti4 + ch(1,k,2) = cr2-ci5 + ch(1,k,5) = cr2+ci5 + ch(2,k,2) = ci2+cr5 + ch(2,k,3) = ci3+cr4 + ch(1,k,3) = cr3-ci4 + ch(1,k,4) = cr3+ci4 + ch(2,k,4) = ci3-cr4 + ch(2,k,5) = ci2-cr5 + 101 continue + return + 102 do 104 k=1,l1 + do 103 i=2,ido,2 + ti5 = cc(i,2,k)-cc(i,5,k) + ti2 = cc(i,2,k)+cc(i,5,k) + ti4 = cc(i,3,k)-cc(i,4,k) + ti3 = cc(i,3,k)+cc(i,4,k) + tr5 = cc(i-1,2,k)-cc(i-1,5,k) + tr2 = cc(i-1,2,k)+cc(i-1,5,k) + tr4 = cc(i-1,3,k)-cc(i-1,4,k) + tr3 = cc(i-1,3,k)+cc(i-1,4,k) + ch(i-1,k,1) = cc(i-1,1,k)+tr2+tr3 + ch(i,k,1) = cc(i,1,k)+ti2+ti3 + cr2 = cc(i-1,1,k)+tr11*tr2+tr12*tr3 + ci2 = cc(i,1,k)+tr11*ti2+tr12*ti3 + cr3 = cc(i-1,1,k)+tr12*tr2+tr11*tr3 + ci3 = cc(i,1,k)+tr12*ti2+tr11*ti3 + cr5 = ti11*tr5+ti12*tr4 + ci5 = ti11*ti5+ti12*ti4 + cr4 = ti12*tr5-ti11*tr4 + ci4 = ti12*ti5-ti11*ti4 + dr3 = cr3-ci4 + dr4 = cr3+ci4 + di3 = ci3+cr4 + di4 = ci3-cr4 + dr5 = cr2+ci5 + dr2 = cr2-ci5 + di5 = ci2-cr5 + di2 = ci2+cr5 + ch(i-1,k,2) = wa1(i-1)*dr2-wa1(i)*di2 + ch(i,k,2) = wa1(i-1)*di2+wa1(i)*dr2 + ch(i-1,k,3) = wa2(i-1)*dr3-wa2(i)*di3 + ch(i,k,3) = wa2(i-1)*di3+wa2(i)*dr3 + ch(i-1,k,4) = wa3(i-1)*dr4-wa3(i)*di4 + ch(i,k,4) = wa3(i-1)*di4+wa3(i)*dr4 + ch(i-1,k,5) = wa4(i-1)*dr5-wa4(i)*di5 + ch(i,k,5) = wa4(i-1)*di5+wa4(i)*dr5 + 103 continue + 104 continue + return + end diff -r 45f5faba05a2 -r 82be108cc558 libcruft/fftpack/zpassf.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/fftpack/zpassf.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,117 @@ + subroutine zpassf (nac,ido,ip,l1,idl1,cc,c1,c2,ch,ch2,wa) + implicit double precision (a-h,o-z) + dimension ch(ido,l1,ip) ,cc(ido,ip,l1) , + 1 c1(ido,l1,ip) ,wa(1) ,c2(idl1,ip), + 2 ch2(idl1,ip) + idot = ido/2 + nt = ip*idl1 + ipp2 = ip+2 + ipph = (ip+1)/2 + idp = ip*ido +c + if (ido .lt. l1) go to 106 + do 103 j=2,ipph + jc = ipp2-j + do 102 k=1,l1 + do 101 i=1,ido + ch(i,k,j) = cc(i,j,k)+cc(i,jc,k) + ch(i,k,jc) = cc(i,j,k)-cc(i,jc,k) + 101 continue + 102 continue + 103 continue + do 105 k=1,l1 + do 104 i=1,ido + ch(i,k,1) = cc(i,1,k) + 104 continue + 105 continue + go to 112 + 106 do 109 j=2,ipph + jc = ipp2-j + do 108 i=1,ido + do 107 k=1,l1 + ch(i,k,j) = cc(i,j,k)+cc(i,jc,k) + ch(i,k,jc) = cc(i,j,k)-cc(i,jc,k) + 107 continue + 108 continue + 109 continue + do 111 i=1,ido + do 110 k=1,l1 + ch(i,k,1) = cc(i,1,k) + 110 continue + 111 continue + 112 idl = 2-ido + inc = 0 + do 116 l=2,ipph + lc = ipp2-l + idl = idl+ido + do 113 ik=1,idl1 + c2(ik,l) = ch2(ik,1)+wa(idl-1)*ch2(ik,2) + c2(ik,lc) = -wa(idl)*ch2(ik,ip) + 113 continue + idlj = idl + inc = inc+ido + do 115 j=3,ipph + jc = ipp2-j + idlj = idlj+inc + if (idlj .gt. idp) idlj = idlj-idp + war = wa(idlj-1) + wai = wa(idlj) + do 114 ik=1,idl1 + c2(ik,l) = c2(ik,l)+war*ch2(ik,j) + c2(ik,lc) = c2(ik,lc)-wai*ch2(ik,jc) + 114 continue + 115 continue + 116 continue + do 118 j=2,ipph + do 117 ik=1,idl1 + ch2(ik,1) = ch2(ik,1)+ch2(ik,j) + 117 continue + 118 continue + do 120 j=2,ipph + jc = ipp2-j + do 119 ik=2,idl1,2 + ch2(ik-1,j) = c2(ik-1,j)-c2(ik,jc) + ch2(ik-1,jc) = c2(ik-1,j)+c2(ik,jc) + ch2(ik,j) = c2(ik,j)+c2(ik-1,jc) + ch2(ik,jc) = c2(ik,j)-c2(ik-1,jc) + 119 continue + 120 continue + nac = 1 + if (ido .eq. 2) return + nac = 0 + do 121 ik=1,idl1 + c2(ik,1) = ch2(ik,1) + 121 continue + do 123 j=2,ip + do 122 k=1,l1 + c1(1,k,j) = ch(1,k,j) + c1(2,k,j) = ch(2,k,j) + 122 continue + 123 continue + if (idot .gt. l1) go to 127 + idij = 0 + do 126 j=2,ip + idij = idij+2 + do 125 i=4,ido,2 + idij = idij+2 + do 124 k=1,l1 + c1(i-1,k,j) = wa(idij-1)*ch(i-1,k,j)+wa(idij)*ch(i,k,j) + c1(i,k,j) = wa(idij-1)*ch(i,k,j)-wa(idij)*ch(i-1,k,j) + 124 continue + 125 continue + 126 continue + return + 127 idj = 2-ido + do 130 j=2,ip + idj = idj+ido + do 129 k=1,l1 + idij = idj + do 128 i=4,ido,2 + idij = idij+2 + c1(i-1,k,j) = wa(idij-1)*ch(i-1,k,j)+wa(idij)*ch(i,k,j) + c1(i,k,j) = wa(idij-1)*ch(i,k,j)-wa(idij)*ch(i-1,k,j) + 128 continue + 129 continue + 130 continue + return + end diff -r 45f5faba05a2 -r 82be108cc558 libcruft/fftpack/zpassf2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/fftpack/zpassf2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,24 @@ + subroutine zpassf2 (ido,l1,cc,ch,wa1) + implicit double precision (a-h,o-z) + dimension cc(ido,2,l1) ,ch(ido,l1,2) , + 1 wa1(1) + if (ido .gt. 2) go to 102 + do 101 k=1,l1 + ch(1,k,1) = cc(1,1,k)+cc(1,2,k) + ch(1,k,2) = cc(1,1,k)-cc(1,2,k) + ch(2,k,1) = cc(2,1,k)+cc(2,2,k) + ch(2,k,2) = cc(2,1,k)-cc(2,2,k) + 101 continue + return + 102 do 104 k=1,l1 + do 103 i=2,ido,2 + ch(i-1,k,1) = cc(i-1,1,k)+cc(i-1,2,k) + tr2 = cc(i-1,1,k)-cc(i-1,2,k) + ch(i,k,1) = cc(i,1,k)+cc(i,2,k) + ti2 = cc(i,1,k)-cc(i,2,k) + ch(i,k,2) = wa1(i-1)*ti2-wa1(i)*tr2 + ch(i-1,k,2) = wa1(i-1)*tr2+wa1(i)*ti2 + 103 continue + 104 continue + return + end diff -r 45f5faba05a2 -r 82be108cc558 libcruft/fftpack/zpassf3.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/fftpack/zpassf3.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,43 @@ + subroutine zpassf3 (ido,l1,cc,ch,wa1,wa2) + implicit double precision (a-h,o-z) + dimension cc(ido,3,l1) ,ch(ido,l1,3) , + 1 wa1(1) ,wa2(1) + data taur,taui /-.5d0,-.866025403784439d0/ + if (ido .ne. 2) go to 102 + do 101 k=1,l1 + tr2 = cc(1,2,k)+cc(1,3,k) + cr2 = cc(1,1,k)+taur*tr2 + ch(1,k,1) = cc(1,1,k)+tr2 + ti2 = cc(2,2,k)+cc(2,3,k) + ci2 = cc(2,1,k)+taur*ti2 + ch(2,k,1) = cc(2,1,k)+ti2 + cr3 = taui*(cc(1,2,k)-cc(1,3,k)) + ci3 = taui*(cc(2,2,k)-cc(2,3,k)) + ch(1,k,2) = cr2-ci3 + ch(1,k,3) = cr2+ci3 + ch(2,k,2) = ci2+cr3 + ch(2,k,3) = ci2-cr3 + 101 continue + return + 102 do 104 k=1,l1 + do 103 i=2,ido,2 + tr2 = cc(i-1,2,k)+cc(i-1,3,k) + cr2 = cc(i-1,1,k)+taur*tr2 + ch(i-1,k,1) = cc(i-1,1,k)+tr2 + ti2 = cc(i,2,k)+cc(i,3,k) + ci2 = cc(i,1,k)+taur*ti2 + ch(i,k,1) = cc(i,1,k)+ti2 + cr3 = taui*(cc(i-1,2,k)-cc(i-1,3,k)) + ci3 = taui*(cc(i,2,k)-cc(i,3,k)) + dr2 = cr2-ci3 + dr3 = cr2+ci3 + di2 = ci2+cr3 + di3 = ci2-cr3 + ch(i,k,2) = wa1(i-1)*di2-wa1(i)*dr2 + ch(i-1,k,2) = wa1(i-1)*dr2+wa1(i)*di2 + ch(i,k,3) = wa2(i-1)*di3-wa2(i)*dr3 + ch(i-1,k,3) = wa2(i-1)*dr3+wa2(i)*di3 + 103 continue + 104 continue + return + end diff -r 45f5faba05a2 -r 82be108cc558 libcruft/fftpack/zpassf4.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/fftpack/zpassf4.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,52 @@ + subroutine zpassf4 (ido,l1,cc,ch,wa1,wa2,wa3) + implicit double precision (a-h,o-z) + dimension cc(ido,4,l1) ,ch(ido,l1,4) , + 1 wa1(1) ,wa2(1) ,wa3(1) + if (ido .ne. 2) go to 102 + do 101 k=1,l1 + ti1 = cc(2,1,k)-cc(2,3,k) + ti2 = cc(2,1,k)+cc(2,3,k) + tr4 = cc(2,2,k)-cc(2,4,k) + ti3 = cc(2,2,k)+cc(2,4,k) + tr1 = cc(1,1,k)-cc(1,3,k) + tr2 = cc(1,1,k)+cc(1,3,k) + ti4 = cc(1,4,k)-cc(1,2,k) + tr3 = cc(1,2,k)+cc(1,4,k) + ch(1,k,1) = tr2+tr3 + ch(1,k,3) = tr2-tr3 + ch(2,k,1) = ti2+ti3 + ch(2,k,3) = ti2-ti3 + ch(1,k,2) = tr1+tr4 + ch(1,k,4) = tr1-tr4 + ch(2,k,2) = ti1+ti4 + ch(2,k,4) = ti1-ti4 + 101 continue + return + 102 do 104 k=1,l1 + do 103 i=2,ido,2 + ti1 = cc(i,1,k)-cc(i,3,k) + ti2 = cc(i,1,k)+cc(i,3,k) + ti3 = cc(i,2,k)+cc(i,4,k) + tr4 = cc(i,2,k)-cc(i,4,k) + tr1 = cc(i-1,1,k)-cc(i-1,3,k) + tr2 = cc(i-1,1,k)+cc(i-1,3,k) + ti4 = cc(i-1,4,k)-cc(i-1,2,k) + tr3 = cc(i-1,2,k)+cc(i-1,4,k) + ch(i-1,k,1) = tr2+tr3 + cr3 = tr2-tr3 + ch(i,k,1) = ti2+ti3 + ci3 = ti2-ti3 + cr2 = tr1+tr4 + cr4 = tr1-tr4 + ci2 = ti1+ti4 + ci4 = ti1-ti4 + ch(i-1,k,2) = wa1(i-1)*cr2+wa1(i)*ci2 + ch(i,k,2) = wa1(i-1)*ci2-wa1(i)*cr2 + ch(i-1,k,3) = wa2(i-1)*cr3+wa2(i)*ci3 + ch(i,k,3) = wa2(i-1)*ci3-wa2(i)*cr3 + ch(i-1,k,4) = wa3(i-1)*cr4+wa3(i)*ci4 + ch(i,k,4) = wa3(i-1)*ci4-wa3(i)*cr4 + 103 continue + 104 continue + return + end diff -r 45f5faba05a2 -r 82be108cc558 libcruft/fftpack/zpassf5.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/fftpack/zpassf5.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,76 @@ + subroutine zpassf5 (ido,l1,cc,ch,wa1,wa2,wa3,wa4) + implicit double precision (a-h,o-z) + dimension cc(ido,5,l1) ,ch(ido,l1,5) , + 1 wa1(1) ,wa2(1) ,wa3(1) ,wa4(1) + data tr11,ti11,tr12,ti12 /.309016994374947d0,-.951056516295154d0, + 1-.809016994374947d0,-.587785252292473d0/ + if (ido .ne. 2) go to 102 + do 101 k=1,l1 + ti5 = cc(2,2,k)-cc(2,5,k) + ti2 = cc(2,2,k)+cc(2,5,k) + ti4 = cc(2,3,k)-cc(2,4,k) + ti3 = cc(2,3,k)+cc(2,4,k) + tr5 = cc(1,2,k)-cc(1,5,k) + tr2 = cc(1,2,k)+cc(1,5,k) + tr4 = cc(1,3,k)-cc(1,4,k) + tr3 = cc(1,3,k)+cc(1,4,k) + ch(1,k,1) = cc(1,1,k)+tr2+tr3 + ch(2,k,1) = cc(2,1,k)+ti2+ti3 + cr2 = cc(1,1,k)+tr11*tr2+tr12*tr3 + ci2 = cc(2,1,k)+tr11*ti2+tr12*ti3 + cr3 = cc(1,1,k)+tr12*tr2+tr11*tr3 + ci3 = cc(2,1,k)+tr12*ti2+tr11*ti3 + cr5 = ti11*tr5+ti12*tr4 + ci5 = ti11*ti5+ti12*ti4 + cr4 = ti12*tr5-ti11*tr4 + ci4 = ti12*ti5-ti11*ti4 + ch(1,k,2) = cr2-ci5 + ch(1,k,5) = cr2+ci5 + ch(2,k,2) = ci2+cr5 + ch(2,k,3) = ci3+cr4 + ch(1,k,3) = cr3-ci4 + ch(1,k,4) = cr3+ci4 + ch(2,k,4) = ci3-cr4 + ch(2,k,5) = ci2-cr5 + 101 continue + return + 102 do 104 k=1,l1 + do 103 i=2,ido,2 + ti5 = cc(i,2,k)-cc(i,5,k) + ti2 = cc(i,2,k)+cc(i,5,k) + ti4 = cc(i,3,k)-cc(i,4,k) + ti3 = cc(i,3,k)+cc(i,4,k) + tr5 = cc(i-1,2,k)-cc(i-1,5,k) + tr2 = cc(i-1,2,k)+cc(i-1,5,k) + tr4 = cc(i-1,3,k)-cc(i-1,4,k) + tr3 = cc(i-1,3,k)+cc(i-1,4,k) + ch(i-1,k,1) = cc(i-1,1,k)+tr2+tr3 + ch(i,k,1) = cc(i,1,k)+ti2+ti3 + cr2 = cc(i-1,1,k)+tr11*tr2+tr12*tr3 + ci2 = cc(i,1,k)+tr11*ti2+tr12*ti3 + cr3 = cc(i-1,1,k)+tr12*tr2+tr11*tr3 + ci3 = cc(i,1,k)+tr12*ti2+tr11*ti3 + cr5 = ti11*tr5+ti12*tr4 + ci5 = ti11*ti5+ti12*ti4 + cr4 = ti12*tr5-ti11*tr4 + ci4 = ti12*ti5-ti11*ti4 + dr3 = cr3-ci4 + dr4 = cr3+ci4 + di3 = ci3+cr4 + di4 = ci3-cr4 + dr5 = cr2+ci5 + dr2 = cr2-ci5 + di5 = ci2-cr5 + di2 = ci2+cr5 + ch(i-1,k,2) = wa1(i-1)*dr2+wa1(i)*di2 + ch(i,k,2) = wa1(i-1)*di2-wa1(i)*dr2 + ch(i-1,k,3) = wa2(i-1)*dr3+wa2(i)*di3 + ch(i,k,3) = wa2(i-1)*di3-wa2(i)*dr3 + ch(i-1,k,4) = wa3(i-1)*dr4+wa3(i)*di4 + ch(i,k,4) = wa3(i-1)*di4-wa3(i)*dr4 + ch(i-1,k,5) = wa4(i-1)*dr5+wa4(i)*di5 + ch(i,k,5) = wa4(i-1)*di5-wa4(i)*dr5 + 103 continue + 104 continue + return + end diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack-xtra/Makefile.in --- a/libcruft/lapack-xtra/Makefile.in Wed May 14 18:09:56 2008 +0200 +++ b/libcruft/lapack-xtra/Makefile.in Sun Apr 27 22:34:17 2008 +0200 @@ -26,7 +26,7 @@ EXTERNAL_DISTFILES = $(DISTFILES) -FSRC = xdlamch.f xdlange.f xilaenv.f xzlange.f +FSRC = xclange.f xdlamch.f xdlange.f xilaenv.f xslamch.f xslange.f xzlange.f include $(TOPDIR)/Makeconf diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack-xtra/xclange.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack-xtra/xclange.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,155 @@ +*** This subroutine includes all of the CLANGE function instead of +*** simply wrapping it in a subroutine to avoid possible differences in +*** the way complex values are returned by various Fortran compilers. +*** For example, if we simply wrap the function and compile this file +*** with gfortran and the library that provides CLANGE is compiled with +*** a compiler that uses the g77 (f2c-compatible) calling convention for +*** complex-valued functions, all hell will break loose. + + SUBROUTINE XCLANGE ( NORM, M, N, A, LDA, WORK, VALUE ) + +*** DOUBLE PRECISION FUNCTION CLANGE( NORM, M, N, A, LDA, WORK ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER LDA, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION WORK( * ) + COMPLEX*16 A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* CLANGE returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of a +* complex matrix A. +* +* Description +* =========== +* +* CLANGE returns the value +* +* CLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in CLANGE as described +* above. +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. When M = 0, +* CLANGE is set to zero. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. When N = 0, +* CLANGE is set to zero. +* +* A (input) COMPLEX*16 array, dimension (LDA,N) +* The m by n matrix A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(M,1). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)), +* where LWORK >= M when NORM = 'I'; otherwise, WORK is not +* referenced. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION SCALE, SUM, VALUE +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( MIN( M, N ).EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + DO 20 J = 1, N + DO 10 I = 1, M + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 10 CONTINUE + 20 CONTINUE + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + DO 40 J = 1, N + SUM = ZERO + DO 30 I = 1, M + SUM = SUM + ABS( A( I, J ) ) + 30 CONTINUE + VALUE = MAX( VALUE, SUM ) + 40 CONTINUE + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + DO 50 I = 1, M + WORK( I ) = ZERO + 50 CONTINUE + DO 70 J = 1, N + DO 60 I = 1, M + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 60 CONTINUE + 70 CONTINUE + VALUE = ZERO + DO 80 I = 1, M + VALUE = MAX( VALUE, WORK( I ) ) + 80 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + DO 90 J = 1, N + CALL CLASSQ( M, A( 1, J ), 1, SCALE, SUM ) + 90 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* +*** CLANGE = VALUE + RETURN +* +* End of CLANGE +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack-xtra/xslamch.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack-xtra/xslamch.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,6 @@ + subroutine xslamch (cmach, retval) + character cmach + real retval, slamch + retval = slamch (cmach) + return + end diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack-xtra/xslange.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack-xtra/xslange.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,7 @@ + subroutine xslange (norm, m, n, a, lda, work, retval) + character norm + integer lda, m, n + real a (lda, *), work (*), slange, retval + retval = slange (norm, m, n, a, lda, work) + return + end diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/Makefile.in --- a/libcruft/lapack/Makefile.in Wed May 14 18:09:56 2008 +0200 +++ b/libcruft/lapack/Makefile.in Sun Apr 27 22:34:17 2008 +0200 @@ -26,7 +26,25 @@ EXTERNAL_DISTFILES = $(DISTFILES) -FSRC = dbdsqr.f dgbcon.f dgbtf2.f dgbtrf.f dgbtrs.f dgebak.f dgebal.f \ +FSRC = cbdsqr.f csrscl.f cgbcon.f cgbtf2.f cgbtrf.f cgbtrs.f \ + cgebak.f cgebal.f cgebd2.f cgebrd.f cgecon.f cgeesx.f cgeev.f \ + cgehd2.f cgehrd.f cgelq2.f cgelqf.f cgelsd.f cgelss.f cgelsy.f \ + cgeqp3.f cgeqpf.f cgeqr2.f cgeqrf.f cgesv.f cgesvd.f cgetf2.f \ + cgetrf.f cgetri.f cgetrs.f cggbal.f cgtsv.f cgttrf.f cgttrs.f \ + cgtts2.f cheev.f chetd2.f chetrd.f chseqr.f clabrd.f clacgv.f \ + clacn2.f clacon.f clacpy.f cladiv.f clahqr.f clahr2.f clahrd.f \ + claic1.f clals0.f clalsa.f clalsd.f clange.f clanhe.f clanhs.f \ + clantr.f claqp2.f claqps.f claqr0.f claqr1.f claqr2.f claqr3.f \ + claqr4.f claqr5.f clarf.f clarfb.f clarfg.f clarft.f clarfx.f \ + clartg.f clarz.f clarzb.f clarzt.f clascl.f claset.f clasr.f \ + classq.f claswp.f clatbs.f clatrd.f clatrs.f clatrz.f clauu2.f \ + clauum.f cpbcon.f cpbtf2.f cpbtrf.f cpbtrs.f cpocon.f cpotf2.f \ + cpotrf.f cpotri.f cpotrs.f cptsv.f cpttrf.f cpttrs.f cptts2.f crot.f \ + csteqr.f ctrcon.f ctrevc.f ctrexc.f ctrsen.f ctrsyl.f ctrti2.f \ + ctrtri.f ctrtrs.f ctzrzf.f cung2l.f cung2r.f cungbr.f cunghr.f \ + cungl2.f cunglq.f cungql.f cungqr.f cungtr.f cunm2r.f cunmbr.f \ + cunml2.f cunmlq.f cunmqr.f cunmr3.f cunmrz.f \ + dbdsqr.f dgbcon.f dgbtf2.f dgbtrf.f dgbtrs.f dgebak.f dgebal.f \ dgebd2.f dgebrd.f dgecon.f dgeesx.f dgeev.f dgehd2.f dgehrd.f \ dgelq2.f dgelqf.f dgelsd.f dgelss.f dgelsy.f dgeqp3.f dgeqpf.f \ dgeqr2.f dgeqrf.f dgesv.f dgesvd.f dgetf2.f dgetrf.f dgetri.f \ @@ -50,8 +68,32 @@ dpotri.f dpotrs.f dptsv.f dpttrf.f dpttrs.f dptts2.f drscl.f \ dsteqr.f dsterf.f dsyev.f dsytd2.f dsytrd.f dtgevc.f dtrcon.f \ dtrevc.f dtrexc.f dtrsen.f dtrsyl.f dtrti2.f dtrtri.f dtrtrs.f \ - dtzrzf.f dzsum1.f ieeeck.f ilaenv.f iparmq.f izmax1.f spotf2.f \ - spotrf.f zbdsqr.f zdrscl.f zgbcon.f zgbtf2.f zgbtrf.f zgbtrs.f \ + dtzrzf.f dzsum1.f ieeeck.f ilaenv.f iparmq.f izmax1.f \ + sbdsqr.f sgbcon.f sgbtf2.f sgbtrf.f sgbtrs.f sgebak.f sgebal.f \ + sgebd2.f sgebrd.f sgecon.f sgeesx.f sgeev.f sgehd2.f sgehrd.f \ + sgelq2.f sgelqf.f sgelsd.f sgelss.f sgelsy.f sgeqp3.f sgeqpf.f \ + sgeqr2.f sgeqrf.f sgesv.f sgesvd.f sgetf2.f sgetrf.f sgetri.f \ + sgetrs.f sggbak.f sggbal.f sgghrd.f sgtsv.f sgttrf.f sgttrs.f \ + sgtts2.f shgeqz.f shseqr.f slabad.f slabrd.f slacn2.f slacon.f \ + slacpy.f sladiv.f slae2.f slaed6.f slaev2.f slaexc.f slag2.f \ + slahqr.f slahr2.f slahrd.f slaic1.f slaln2.f slals0.f slalsa.f \ + slalsd.f slamc1.f slamc2.f slamc3.f slamc4.f slamc5.f slamch.f \ + slamrg.f slange.f slanhs.f slanst.f slansy.f slantr.f slanv2.f \ + slapy2.f slapy3.f slaqp2.f slaqps.f slaqr0.f slaqr1.f slaqr2.f \ + slaqr3.f slaqr4.f slaqr5.f slarf.f slarfb.f slarfg.f slarft.f \ + slarfx.f slartg.f slarz.f slarzb.f slarzt.f slas2.f slascl.f \ + slasd0.f slasd1.f slasd2.f slasd3.f slasd4.f slasd5.f slasd6.f \ + slasd7.f slasd8.f slasda.f slasdq.f slasdt.f slaset.f slasq1.f \ + slasq2.f slasq3.f slasq4.f slasq5.f slasq6.f slasr.f slasrt.f \ + slassq.f slasv2.f slaswp.f slasy2.f slatbs.f slatrd.f slatrs.f \ + slatrz.f slauu2.f slauum.f slazq3.f slazq4.f sorg2l.f sorg2r.f \ + sorgbr.f sorghr.f sorgl2.f sorglq.f sorgql.f sorgqr.f sorgtr.f \ + sorm2r.f sormbr.f sorml2.f sormlq.f sormqr.f sormr3.f sormrz.f \ + spbcon.f spbtf2.f spbtrf.f spbtrs.f spocon.f spotf2.f spotrf.f \ + spotri.f spotrs.f sptsv.f spttrf.f spttrs.f sptts2.f srscl.f \ + ssteqr.f ssterf.f ssyev.f ssytd2.f ssytrd.f stgevc.f strcon.f \ + strevc.f strexc.f strsen.f strsyl.f strti2.f strtri.f strtrs.f \ + stzrzf.f scsum1.f zbdsqr.f zdrscl.f zgbcon.f zgbtf2.f zgbtrf.f zgbtrs.f \ zgebak.f zgebal.f zgebd2.f zgebrd.f zgecon.f zgeesx.f zgeev.f \ zgehd2.f zgehrd.f zgelq2.f zgelqf.f zgelsd.f zgelss.f zgelsy.f \ zgeqp3.f zgeqpf.f zgeqr2.f zgeqrf.f zgesv.f zgesvd.f zgetf2.f \ @@ -73,6 +115,7 @@ include $(TOPDIR)/Makeconf dlamc1.o pic/dlamc1.o: FFLAGS += $(F77_FLOAT_STORE_FLAG) +slamc1.o pic/slamc1.o: FFLAGS += $(F77_FLOAT_STORE_FLAG) include ../Makerules diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cbdsqr.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cbdsqr.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,742 @@ + SUBROUTINE CBDSQR( UPLO, N, NCVT, NRU, NCC, D, E, VT, LDVT, U, + $ LDU, C, LDC, RWORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDC, LDU, LDVT, N, NCC, NCVT, NRU +* .. +* .. Array Arguments .. + REAL D( * ), E( * ), RWORK( * ) + COMPLEX C( LDC, * ), U( LDU, * ), VT( LDVT, * ) +* .. +* +* Purpose +* ======= +* +* CBDSQR computes the singular values and, optionally, the right and/or +* left singular vectors from the singular value decomposition (SVD) of +* a real N-by-N (upper or lower) bidiagonal matrix B using the implicit +* zero-shift QR algorithm. The SVD of B has the form +* +* B = Q * S * P**H +* +* where S is the diagonal matrix of singular values, Q is an orthogonal +* matrix of left singular vectors, and P is an orthogonal matrix of +* right singular vectors. If left singular vectors are requested, this +* subroutine actually returns U*Q instead of Q, and, if right singular +* vectors are requested, this subroutine returns P**H*VT instead of +* P**H, for given complex input matrices U and VT. When U and VT are +* the unitary matrices that reduce a general matrix A to bidiagonal +* form: A = U*B*VT, as computed by CGEBRD, then +* +* A = (U*Q) * S * (P**H*VT) +* +* is the SVD of A. Optionally, the subroutine may also compute Q**H*C +* for a given complex input matrix C. +* +* See "Computing Small Singular Values of Bidiagonal Matrices With +* Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, +* LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11, +* no. 5, pp. 873-912, Sept 1990) and +* "Accurate singular values and differential qd algorithms," by +* B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics +* Department, University of California at Berkeley, July 1992 +* for a detailed description of the algorithm. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': B is upper bidiagonal; +* = 'L': B is lower bidiagonal. +* +* N (input) INTEGER +* The order of the matrix B. N >= 0. +* +* NCVT (input) INTEGER +* The number of columns of the matrix VT. NCVT >= 0. +* +* NRU (input) INTEGER +* The number of rows of the matrix U. NRU >= 0. +* +* NCC (input) INTEGER +* The number of columns of the matrix C. NCC >= 0. +* +* D (input/output) REAL array, dimension (N) +* On entry, the n diagonal elements of the bidiagonal matrix B. +* On exit, if INFO=0, the singular values of B in decreasing +* order. +* +* E (input/output) REAL array, dimension (N-1) +* On entry, the N-1 offdiagonal elements of the bidiagonal +* matrix B. +* On exit, if INFO = 0, E is destroyed; if INFO > 0, D and E +* will contain the diagonal and superdiagonal elements of a +* bidiagonal matrix orthogonally equivalent to the one given +* as input. +* +* VT (input/output) COMPLEX array, dimension (LDVT, NCVT) +* On entry, an N-by-NCVT matrix VT. +* On exit, VT is overwritten by P**H * VT. +* Not referenced if NCVT = 0. +* +* LDVT (input) INTEGER +* The leading dimension of the array VT. +* LDVT >= max(1,N) if NCVT > 0; LDVT >= 1 if NCVT = 0. +* +* U (input/output) COMPLEX array, dimension (LDU, N) +* On entry, an NRU-by-N matrix U. +* On exit, U is overwritten by U * Q. +* Not referenced if NRU = 0. +* +* LDU (input) INTEGER +* The leading dimension of the array U. LDU >= max(1,NRU). +* +* C (input/output) COMPLEX array, dimension (LDC, NCC) +* On entry, an N-by-NCC matrix C. +* On exit, C is overwritten by Q**H * C. +* Not referenced if NCC = 0. +* +* LDC (input) INTEGER +* The leading dimension of the array C. +* LDC >= max(1,N) if NCC > 0; LDC >=1 if NCC = 0. +* +* RWORK (workspace) REAL array, dimension (2*N) +* if NCVT = NRU = NCC = 0, (max(1, 4*N-4)) otherwise +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: If INFO = -i, the i-th argument had an illegal value +* > 0: the algorithm did not converge; D and E contain the +* elements of a bidiagonal matrix which is orthogonally +* similar to the input matrix B; if INFO = i, i +* elements of E have not converged to zero. +* +* Internal Parameters +* =================== +* +* TOLMUL REAL, default = max(10,min(100,EPS**(-1/8))) +* TOLMUL controls the convergence criterion of the QR loop. +* If it is positive, TOLMUL*EPS is the desired relative +* precision in the computed singular values. +* If it is negative, abs(TOLMUL*EPS*sigma_max) is the +* desired absolute accuracy in the computed singular +* values (corresponds to relative accuracy +* abs(TOLMUL*EPS) in the largest singular value. +* abs(TOLMUL) should be between 1 and 1/EPS, and preferably +* between 10 (for fast convergence) and .1/EPS +* (for there to be some accuracy in the results). +* Default is to lose at either one eighth or 2 of the +* available decimal digits in each computed singular value +* (whichever is smaller). +* +* MAXITR INTEGER, default = 6 +* MAXITR controls the maximum number of passes of the +* algorithm through its inner loop. The algorithms stops +* (and so fails to converge) if the number of passes +* through the inner loop exceeds MAXITR*N**2. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E0 ) + REAL ONE + PARAMETER ( ONE = 1.0E0 ) + REAL NEGONE + PARAMETER ( NEGONE = -1.0E0 ) + REAL HNDRTH + PARAMETER ( HNDRTH = 0.01E0 ) + REAL TEN + PARAMETER ( TEN = 10.0E0 ) + REAL HNDRD + PARAMETER ( HNDRD = 100.0E0 ) + REAL MEIGTH + PARAMETER ( MEIGTH = -0.125E0 ) + INTEGER MAXITR + PARAMETER ( MAXITR = 6 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, ROTATE + INTEGER I, IDIR, ISUB, ITER, J, LL, LLL, M, MAXIT, NM1, + $ NM12, NM13, OLDLL, OLDM + REAL ABSE, ABSS, COSL, COSR, CS, EPS, F, G, H, MU, + $ OLDCS, OLDSN, R, SHIFT, SIGMN, SIGMX, SINL, + $ SINR, SLL, SMAX, SMIN, SMINL, SMINOA, + $ SN, THRESH, TOL, TOLMUL, UNFL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CLASR, CSROT, CSSCAL, CSWAP, SLARTG, SLAS2, + $ SLASQ1, SLASV2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, REAL, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LOWER = LSAME( UPLO, 'L' ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LOWER ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NCVT.LT.0 ) THEN + INFO = -3 + ELSE IF( NRU.LT.0 ) THEN + INFO = -4 + ELSE IF( NCC.LT.0 ) THEN + INFO = -5 + ELSE IF( ( NCVT.EQ.0 .AND. LDVT.LT.1 ) .OR. + $ ( NCVT.GT.0 .AND. LDVT.LT.MAX( 1, N ) ) ) THEN + INFO = -9 + ELSE IF( LDU.LT.MAX( 1, NRU ) ) THEN + INFO = -11 + ELSE IF( ( NCC.EQ.0 .AND. LDC.LT.1 ) .OR. + $ ( NCC.GT.0 .AND. LDC.LT.MAX( 1, N ) ) ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CBDSQR', -INFO ) + RETURN + END IF + IF( N.EQ.0 ) + $ RETURN + IF( N.EQ.1 ) + $ GO TO 160 +* +* ROTATE is true if any singular vectors desired, false otherwise +* + ROTATE = ( NCVT.GT.0 ) .OR. ( NRU.GT.0 ) .OR. ( NCC.GT.0 ) +* +* If no singular vectors desired, use qd algorithm +* + IF( .NOT.ROTATE ) THEN + CALL SLASQ1( N, D, E, RWORK, INFO ) + RETURN + END IF +* + NM1 = N - 1 + NM12 = NM1 + NM1 + NM13 = NM12 + NM1 + IDIR = 0 +* +* Get machine constants +* + EPS = SLAMCH( 'Epsilon' ) + UNFL = SLAMCH( 'Safe minimum' ) +* +* If matrix lower bidiagonal, rotate to be upper bidiagonal +* by applying Givens rotations on the left +* + IF( LOWER ) THEN + DO 10 I = 1, N - 1 + CALL SLARTG( D( I ), E( I ), CS, SN, R ) + D( I ) = R + E( I ) = SN*D( I+1 ) + D( I+1 ) = CS*D( I+1 ) + RWORK( I ) = CS + RWORK( NM1+I ) = SN + 10 CONTINUE +* +* Update singular vectors if desired +* + IF( NRU.GT.0 ) + $ CALL CLASR( 'R', 'V', 'F', NRU, N, RWORK( 1 ), RWORK( N ), + $ U, LDU ) + IF( NCC.GT.0 ) + $ CALL CLASR( 'L', 'V', 'F', N, NCC, RWORK( 1 ), RWORK( N ), + $ C, LDC ) + END IF +* +* Compute singular values to relative accuracy TOL +* (By setting TOL to be negative, algorithm will compute +* singular values to absolute accuracy ABS(TOL)*norm(input matrix)) +* + TOLMUL = MAX( TEN, MIN( HNDRD, EPS**MEIGTH ) ) + TOL = TOLMUL*EPS +* +* Compute approximate maximum, minimum singular values +* + SMAX = ZERO + DO 20 I = 1, N + SMAX = MAX( SMAX, ABS( D( I ) ) ) + 20 CONTINUE + DO 30 I = 1, N - 1 + SMAX = MAX( SMAX, ABS( E( I ) ) ) + 30 CONTINUE + SMINL = ZERO + IF( TOL.GE.ZERO ) THEN +* +* Relative accuracy desired +* + SMINOA = ABS( D( 1 ) ) + IF( SMINOA.EQ.ZERO ) + $ GO TO 50 + MU = SMINOA + DO 40 I = 2, N + MU = ABS( D( I ) )*( MU / ( MU+ABS( E( I-1 ) ) ) ) + SMINOA = MIN( SMINOA, MU ) + IF( SMINOA.EQ.ZERO ) + $ GO TO 50 + 40 CONTINUE + 50 CONTINUE + SMINOA = SMINOA / SQRT( REAL( N ) ) + THRESH = MAX( TOL*SMINOA, MAXITR*N*N*UNFL ) + ELSE +* +* Absolute accuracy desired +* + THRESH = MAX( ABS( TOL )*SMAX, MAXITR*N*N*UNFL ) + END IF +* +* Prepare for main iteration loop for the singular values +* (MAXIT is the maximum number of passes through the inner +* loop permitted before nonconvergence signalled.) +* + MAXIT = MAXITR*N*N + ITER = 0 + OLDLL = -1 + OLDM = -1 +* +* M points to last element of unconverged part of matrix +* + M = N +* +* Begin main iteration loop +* + 60 CONTINUE +* +* Check for convergence or exceeding iteration count +* + IF( M.LE.1 ) + $ GO TO 160 + IF( ITER.GT.MAXIT ) + $ GO TO 200 +* +* Find diagonal block of matrix to work on +* + IF( TOL.LT.ZERO .AND. ABS( D( M ) ).LE.THRESH ) + $ D( M ) = ZERO + SMAX = ABS( D( M ) ) + SMIN = SMAX + DO 70 LLL = 1, M - 1 + LL = M - LLL + ABSS = ABS( D( LL ) ) + ABSE = ABS( E( LL ) ) + IF( TOL.LT.ZERO .AND. ABSS.LE.THRESH ) + $ D( LL ) = ZERO + IF( ABSE.LE.THRESH ) + $ GO TO 80 + SMIN = MIN( SMIN, ABSS ) + SMAX = MAX( SMAX, ABSS, ABSE ) + 70 CONTINUE + LL = 0 + GO TO 90 + 80 CONTINUE + E( LL ) = ZERO +* +* Matrix splits since E(LL) = 0 +* + IF( LL.EQ.M-1 ) THEN +* +* Convergence of bottom singular value, return to top of loop +* + M = M - 1 + GO TO 60 + END IF + 90 CONTINUE + LL = LL + 1 +* +* E(LL) through E(M-1) are nonzero, E(LL-1) is zero +* + IF( LL.EQ.M-1 ) THEN +* +* 2 by 2 block, handle separately +* + CALL SLASV2( D( M-1 ), E( M-1 ), D( M ), SIGMN, SIGMX, SINR, + $ COSR, SINL, COSL ) + D( M-1 ) = SIGMX + E( M-1 ) = ZERO + D( M ) = SIGMN +* +* Compute singular vectors, if desired +* + IF( NCVT.GT.0 ) + $ CALL CSROT( NCVT, VT( M-1, 1 ), LDVT, VT( M, 1 ), LDVT, + $ COSR, SINR ) + IF( NRU.GT.0 ) + $ CALL CSROT( NRU, U( 1, M-1 ), 1, U( 1, M ), 1, COSL, SINL ) + IF( NCC.GT.0 ) + $ CALL CSROT( NCC, C( M-1, 1 ), LDC, C( M, 1 ), LDC, COSL, + $ SINL ) + M = M - 2 + GO TO 60 + END IF +* +* If working on new submatrix, choose shift direction +* (from larger end diagonal element towards smaller) +* + IF( LL.GT.OLDM .OR. M.LT.OLDLL ) THEN + IF( ABS( D( LL ) ).GE.ABS( D( M ) ) ) THEN +* +* Chase bulge from top (big end) to bottom (small end) +* + IDIR = 1 + ELSE +* +* Chase bulge from bottom (big end) to top (small end) +* + IDIR = 2 + END IF + END IF +* +* Apply convergence tests +* + IF( IDIR.EQ.1 ) THEN +* +* Run convergence test in forward direction +* First apply standard test to bottom of matrix +* + IF( ABS( E( M-1 ) ).LE.ABS( TOL )*ABS( D( M ) ) .OR. + $ ( TOL.LT.ZERO .AND. ABS( E( M-1 ) ).LE.THRESH ) ) THEN + E( M-1 ) = ZERO + GO TO 60 + END IF +* + IF( TOL.GE.ZERO ) THEN +* +* If relative accuracy desired, +* apply convergence criterion forward +* + MU = ABS( D( LL ) ) + SMINL = MU + DO 100 LLL = LL, M - 1 + IF( ABS( E( LLL ) ).LE.TOL*MU ) THEN + E( LLL ) = ZERO + GO TO 60 + END IF + MU = ABS( D( LLL+1 ) )*( MU / ( MU+ABS( E( LLL ) ) ) ) + SMINL = MIN( SMINL, MU ) + 100 CONTINUE + END IF +* + ELSE +* +* Run convergence test in backward direction +* First apply standard test to top of matrix +* + IF( ABS( E( LL ) ).LE.ABS( TOL )*ABS( D( LL ) ) .OR. + $ ( TOL.LT.ZERO .AND. ABS( E( LL ) ).LE.THRESH ) ) THEN + E( LL ) = ZERO + GO TO 60 + END IF +* + IF( TOL.GE.ZERO ) THEN +* +* If relative accuracy desired, +* apply convergence criterion backward +* + MU = ABS( D( M ) ) + SMINL = MU + DO 110 LLL = M - 1, LL, -1 + IF( ABS( E( LLL ) ).LE.TOL*MU ) THEN + E( LLL ) = ZERO + GO TO 60 + END IF + MU = ABS( D( LLL ) )*( MU / ( MU+ABS( E( LLL ) ) ) ) + SMINL = MIN( SMINL, MU ) + 110 CONTINUE + END IF + END IF + OLDLL = LL + OLDM = M +* +* Compute shift. First, test if shifting would ruin relative +* accuracy, and if so set the shift to zero. +* + IF( TOL.GE.ZERO .AND. N*TOL*( SMINL / SMAX ).LE. + $ MAX( EPS, HNDRTH*TOL ) ) THEN +* +* Use a zero shift to avoid loss of relative accuracy +* + SHIFT = ZERO + ELSE +* +* Compute the shift from 2-by-2 block at end of matrix +* + IF( IDIR.EQ.1 ) THEN + SLL = ABS( D( LL ) ) + CALL SLAS2( D( M-1 ), E( M-1 ), D( M ), SHIFT, R ) + ELSE + SLL = ABS( D( M ) ) + CALL SLAS2( D( LL ), E( LL ), D( LL+1 ), SHIFT, R ) + END IF +* +* Test if shift negligible, and if so set to zero +* + IF( SLL.GT.ZERO ) THEN + IF( ( SHIFT / SLL )**2.LT.EPS ) + $ SHIFT = ZERO + END IF + END IF +* +* Increment iteration count +* + ITER = ITER + M - LL +* +* If SHIFT = 0, do simplified QR iteration +* + IF( SHIFT.EQ.ZERO ) THEN + IF( IDIR.EQ.1 ) THEN +* +* Chase bulge from top to bottom +* Save cosines and sines for later singular vector updates +* + CS = ONE + OLDCS = ONE + DO 120 I = LL, M - 1 + CALL SLARTG( D( I )*CS, E( I ), CS, SN, R ) + IF( I.GT.LL ) + $ E( I-1 ) = OLDSN*R + CALL SLARTG( OLDCS*R, D( I+1 )*SN, OLDCS, OLDSN, D( I ) ) + RWORK( I-LL+1 ) = CS + RWORK( I-LL+1+NM1 ) = SN + RWORK( I-LL+1+NM12 ) = OLDCS + RWORK( I-LL+1+NM13 ) = OLDSN + 120 CONTINUE + H = D( M )*CS + D( M ) = H*OLDCS + E( M-1 ) = H*OLDSN +* +* Update singular vectors +* + IF( NCVT.GT.0 ) + $ CALL CLASR( 'L', 'V', 'F', M-LL+1, NCVT, RWORK( 1 ), + $ RWORK( N ), VT( LL, 1 ), LDVT ) + IF( NRU.GT.0 ) + $ CALL CLASR( 'R', 'V', 'F', NRU, M-LL+1, RWORK( NM12+1 ), + $ RWORK( NM13+1 ), U( 1, LL ), LDU ) + IF( NCC.GT.0 ) + $ CALL CLASR( 'L', 'V', 'F', M-LL+1, NCC, RWORK( NM12+1 ), + $ RWORK( NM13+1 ), C( LL, 1 ), LDC ) +* +* Test convergence +* + IF( ABS( E( M-1 ) ).LE.THRESH ) + $ E( M-1 ) = ZERO +* + ELSE +* +* Chase bulge from bottom to top +* Save cosines and sines for later singular vector updates +* + CS = ONE + OLDCS = ONE + DO 130 I = M, LL + 1, -1 + CALL SLARTG( D( I )*CS, E( I-1 ), CS, SN, R ) + IF( I.LT.M ) + $ E( I ) = OLDSN*R + CALL SLARTG( OLDCS*R, D( I-1 )*SN, OLDCS, OLDSN, D( I ) ) + RWORK( I-LL ) = CS + RWORK( I-LL+NM1 ) = -SN + RWORK( I-LL+NM12 ) = OLDCS + RWORK( I-LL+NM13 ) = -OLDSN + 130 CONTINUE + H = D( LL )*CS + D( LL ) = H*OLDCS + E( LL ) = H*OLDSN +* +* Update singular vectors +* + IF( NCVT.GT.0 ) + $ CALL CLASR( 'L', 'V', 'B', M-LL+1, NCVT, RWORK( NM12+1 ), + $ RWORK( NM13+1 ), VT( LL, 1 ), LDVT ) + IF( NRU.GT.0 ) + $ CALL CLASR( 'R', 'V', 'B', NRU, M-LL+1, RWORK( 1 ), + $ RWORK( N ), U( 1, LL ), LDU ) + IF( NCC.GT.0 ) + $ CALL CLASR( 'L', 'V', 'B', M-LL+1, NCC, RWORK( 1 ), + $ RWORK( N ), C( LL, 1 ), LDC ) +* +* Test convergence +* + IF( ABS( E( LL ) ).LE.THRESH ) + $ E( LL ) = ZERO + END IF + ELSE +* +* Use nonzero shift +* + IF( IDIR.EQ.1 ) THEN +* +* Chase bulge from top to bottom +* Save cosines and sines for later singular vector updates +* + F = ( ABS( D( LL ) )-SHIFT )* + $ ( SIGN( ONE, D( LL ) )+SHIFT / D( LL ) ) + G = E( LL ) + DO 140 I = LL, M - 1 + CALL SLARTG( F, G, COSR, SINR, R ) + IF( I.GT.LL ) + $ E( I-1 ) = R + F = COSR*D( I ) + SINR*E( I ) + E( I ) = COSR*E( I ) - SINR*D( I ) + G = SINR*D( I+1 ) + D( I+1 ) = COSR*D( I+1 ) + CALL SLARTG( F, G, COSL, SINL, R ) + D( I ) = R + F = COSL*E( I ) + SINL*D( I+1 ) + D( I+1 ) = COSL*D( I+1 ) - SINL*E( I ) + IF( I.LT.M-1 ) THEN + G = SINL*E( I+1 ) + E( I+1 ) = COSL*E( I+1 ) + END IF + RWORK( I-LL+1 ) = COSR + RWORK( I-LL+1+NM1 ) = SINR + RWORK( I-LL+1+NM12 ) = COSL + RWORK( I-LL+1+NM13 ) = SINL + 140 CONTINUE + E( M-1 ) = F +* +* Update singular vectors +* + IF( NCVT.GT.0 ) + $ CALL CLASR( 'L', 'V', 'F', M-LL+1, NCVT, RWORK( 1 ), + $ RWORK( N ), VT( LL, 1 ), LDVT ) + IF( NRU.GT.0 ) + $ CALL CLASR( 'R', 'V', 'F', NRU, M-LL+1, RWORK( NM12+1 ), + $ RWORK( NM13+1 ), U( 1, LL ), LDU ) + IF( NCC.GT.0 ) + $ CALL CLASR( 'L', 'V', 'F', M-LL+1, NCC, RWORK( NM12+1 ), + $ RWORK( NM13+1 ), C( LL, 1 ), LDC ) +* +* Test convergence +* + IF( ABS( E( M-1 ) ).LE.THRESH ) + $ E( M-1 ) = ZERO +* + ELSE +* +* Chase bulge from bottom to top +* Save cosines and sines for later singular vector updates +* + F = ( ABS( D( M ) )-SHIFT )*( SIGN( ONE, D( M ) )+SHIFT / + $ D( M ) ) + G = E( M-1 ) + DO 150 I = M, LL + 1, -1 + CALL SLARTG( F, G, COSR, SINR, R ) + IF( I.LT.M ) + $ E( I ) = R + F = COSR*D( I ) + SINR*E( I-1 ) + E( I-1 ) = COSR*E( I-1 ) - SINR*D( I ) + G = SINR*D( I-1 ) + D( I-1 ) = COSR*D( I-1 ) + CALL SLARTG( F, G, COSL, SINL, R ) + D( I ) = R + F = COSL*E( I-1 ) + SINL*D( I-1 ) + D( I-1 ) = COSL*D( I-1 ) - SINL*E( I-1 ) + IF( I.GT.LL+1 ) THEN + G = SINL*E( I-2 ) + E( I-2 ) = COSL*E( I-2 ) + END IF + RWORK( I-LL ) = COSR + RWORK( I-LL+NM1 ) = -SINR + RWORK( I-LL+NM12 ) = COSL + RWORK( I-LL+NM13 ) = -SINL + 150 CONTINUE + E( LL ) = F +* +* Test convergence +* + IF( ABS( E( LL ) ).LE.THRESH ) + $ E( LL ) = ZERO +* +* Update singular vectors if desired +* + IF( NCVT.GT.0 ) + $ CALL CLASR( 'L', 'V', 'B', M-LL+1, NCVT, RWORK( NM12+1 ), + $ RWORK( NM13+1 ), VT( LL, 1 ), LDVT ) + IF( NRU.GT.0 ) + $ CALL CLASR( 'R', 'V', 'B', NRU, M-LL+1, RWORK( 1 ), + $ RWORK( N ), U( 1, LL ), LDU ) + IF( NCC.GT.0 ) + $ CALL CLASR( 'L', 'V', 'B', M-LL+1, NCC, RWORK( 1 ), + $ RWORK( N ), C( LL, 1 ), LDC ) + END IF + END IF +* +* QR iteration finished, go back and check convergence +* + GO TO 60 +* +* All singular values converged, so make them positive +* + 160 CONTINUE + DO 170 I = 1, N + IF( D( I ).LT.ZERO ) THEN + D( I ) = -D( I ) +* +* Change sign of singular vectors, if desired +* + IF( NCVT.GT.0 ) + $ CALL CSSCAL( NCVT, NEGONE, VT( I, 1 ), LDVT ) + END IF + 170 CONTINUE +* +* Sort the singular values into decreasing order (insertion sort on +* singular values, but only one transposition per singular vector) +* + DO 190 I = 1, N - 1 +* +* Scan for smallest D(I) +* + ISUB = 1 + SMIN = D( 1 ) + DO 180 J = 2, N + 1 - I + IF( D( J ).LE.SMIN ) THEN + ISUB = J + SMIN = D( J ) + END IF + 180 CONTINUE + IF( ISUB.NE.N+1-I ) THEN +* +* Swap singular values and vectors +* + D( ISUB ) = D( N+1-I ) + D( N+1-I ) = SMIN + IF( NCVT.GT.0 ) + $ CALL CSWAP( NCVT, VT( ISUB, 1 ), LDVT, VT( N+1-I, 1 ), + $ LDVT ) + IF( NRU.GT.0 ) + $ CALL CSWAP( NRU, U( 1, ISUB ), 1, U( 1, N+1-I ), 1 ) + IF( NCC.GT.0 ) + $ CALL CSWAP( NCC, C( ISUB, 1 ), LDC, C( N+1-I, 1 ), LDC ) + END IF + 190 CONTINUE + GO TO 220 +* +* Maximum number of iterations exceeded, failure to converge +* + 200 CONTINUE + INFO = 0 + DO 210 I = 1, N - 1 + IF( E( I ).NE.ZERO ) + $ INFO = INFO + 1 + 210 CONTINUE + 220 CONTINUE + RETURN +* +* End of CBDSQR +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cgbcon.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cgbcon.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,234 @@ + SUBROUTINE CGBCON( NORM, N, KL, KU, AB, LDAB, IPIV, ANORM, RCOND, + $ WORK, RWORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* Modified to call CLACN2 in place of CLACON, 10 Feb 03, SJH. +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER INFO, KL, KU, LDAB, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL RWORK( * ) + COMPLEX AB( LDAB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CGBCON estimates the reciprocal of the condition number of a complex +* general band matrix A, in either the 1-norm or the infinity-norm, +* using the LU factorization computed by CGBTRF. +* +* An estimate is obtained for norm(inv(A)), and the reciprocal of the +* condition number is computed as +* RCOND = 1 / ( norm(A) * norm(inv(A)) ). +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies whether the 1-norm condition number or the +* infinity-norm condition number is required: +* = '1' or 'O': 1-norm; +* = 'I': Infinity-norm. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KL (input) INTEGER +* The number of subdiagonals within the band of A. KL >= 0. +* +* KU (input) INTEGER +* The number of superdiagonals within the band of A. KU >= 0. +* +* AB (input) COMPLEX array, dimension (LDAB,N) +* Details of the LU factorization of the band matrix A, as +* computed by CGBTRF. U is stored as an upper triangular band +* matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and +* the multipliers used during the factorization are stored in +* rows KL+KU+2 to 2*KL+KU+1. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= 2*KL+KU+1. +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices; for 1 <= i <= N, row i of the matrix was +* interchanged with row IPIV(i). +* +* ANORM (input) REAL +* If NORM = '1' or 'O', the 1-norm of the original matrix A. +* If NORM = 'I', the infinity-norm of the original matrix A. +* +* RCOND (output) REAL +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(norm(A) * norm(inv(A))). +* +* WORK (workspace) COMPLEX array, dimension (2*N) +* +* RWORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LNOTI, ONENRM + CHARACTER NORMIN + INTEGER IX, J, JP, KASE, KASE1, KD, LM + REAL AINVNM, SCALE, SMLNUM + COMPLEX T, ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX + REAL SLAMCH + COMPLEX CDOTC + EXTERNAL LSAME, ICAMAX, SLAMCH, CDOTC +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CLACN2, CLATBS, CSRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MIN, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.2*KL+KU+1 ) THEN + INFO = -6 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGBCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* + SMLNUM = SLAMCH( 'Safe minimum' ) +* +* Estimate the norm of inv(A). +* + AINVNM = ZERO + NORMIN = 'N' + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KD = KL + KU + 1 + LNOTI = KL.GT.0 + KASE = 0 + 10 CONTINUE + CALL CLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(L). +* + IF( LNOTI ) THEN + DO 20 J = 1, N - 1 + LM = MIN( KL, N-J ) + JP = IPIV( J ) + T = WORK( JP ) + IF( JP.NE.J ) THEN + WORK( JP ) = WORK( J ) + WORK( J ) = T + END IF + CALL CAXPY( LM, -T, AB( KD+1, J ), 1, WORK( J+1 ), 1 ) + 20 CONTINUE + END IF +* +* Multiply by inv(U). +* + CALL CLATBS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N, + $ KL+KU, AB, LDAB, WORK, SCALE, RWORK, INFO ) + ELSE +* +* Multiply by inv(U'). +* + CALL CLATBS( 'Upper', 'Conjugate transpose', 'Non-unit', + $ NORMIN, N, KL+KU, AB, LDAB, WORK, SCALE, RWORK, + $ INFO ) +* +* Multiply by inv(L'). +* + IF( LNOTI ) THEN + DO 30 J = N - 1, 1, -1 + LM = MIN( KL, N-J ) + WORK( J ) = WORK( J ) - CDOTC( LM, AB( KD+1, J ), 1, + $ WORK( J+1 ), 1 ) + JP = IPIV( J ) + IF( JP.NE.J ) THEN + T = WORK( JP ) + WORK( JP ) = WORK( J ) + WORK( J ) = T + END IF + 30 CONTINUE + END IF + END IF +* +* Divide X by 1/SCALE if doing so will not cause overflow. +* + NORMIN = 'Y' + IF( SCALE.NE.ONE ) THEN + IX = ICAMAX( N, WORK, 1 ) + IF( SCALE.LT.CABS1( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 40 + CALL CSRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + 40 CONTINUE + RETURN +* +* End of CGBCON +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cgbtf2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cgbtf2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,202 @@ + SUBROUTINE CGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, KL, KU, LDAB, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX AB( LDAB, * ) +* .. +* +* Purpose +* ======= +* +* CGBTF2 computes an LU factorization of a complex m-by-n band matrix +* A using partial pivoting with row interchanges. +* +* This is the unblocked version of the algorithm, calling Level 2 BLAS. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* KL (input) INTEGER +* The number of subdiagonals within the band of A. KL >= 0. +* +* KU (input) INTEGER +* The number of superdiagonals within the band of A. KU >= 0. +* +* AB (input/output) COMPLEX array, dimension (LDAB,N) +* On entry, the matrix A in band storage, in rows KL+1 to +* 2*KL+KU+1; rows 1 to KL of the array need not be set. +* The j-th column of A is stored in the j-th column of the +* array AB as follows: +* AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) +* +* On exit, details of the factorization: U is stored as an +* upper triangular band matrix with KL+KU superdiagonals in +* rows 1 to KL+KU+1, and the multipliers used during the +* factorization are stored in rows KL+KU+2 to 2*KL+KU+1. +* See below for further details. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= 2*KL+KU+1. +* +* IPIV (output) INTEGER array, dimension (min(M,N)) +* The pivot indices; for 1 <= i <= min(M,N), row i of the +* matrix was interchanged with row IPIV(i). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = +i, U(i,i) is exactly zero. The factorization +* has been completed, but the factor U is exactly +* singular, and division by zero will occur if it is used +* to solve a system of equations. +* +* Further Details +* =============== +* +* The band storage scheme is illustrated by the following example, when +* M = N = 6, KL = 2, KU = 1: +* +* On entry: On exit: +* +* * * * + + + * * * u14 u25 u36 +* * * + + + + * * u13 u24 u35 u46 +* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +* a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * +* a31 a42 a53 a64 * * m31 m42 m53 m64 * * +* +* Array elements marked * are not used by the routine; elements marked +* + need not be set on entry, but are required by the routine to store +* elements of U, because of fill-in resulting from the row +* interchanges. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, J, JP, JU, KM, KV +* .. +* .. External Functions .. + INTEGER ICAMAX + EXTERNAL ICAMAX +* .. +* .. External Subroutines .. + EXTERNAL CGERU, CSCAL, CSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* KV is the number of superdiagonals in the factor U, allowing for +* fill-in. +* + KV = KU + KL +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KL+KV+1 ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGBTF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Gaussian elimination with partial pivoting +* +* Set fill-in elements in columns KU+2 to KV to zero. +* + DO 20 J = KU + 2, MIN( KV, N ) + DO 10 I = KV - J + 2, KL + AB( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE +* +* JU is the index of the last column affected by the current stage +* of the factorization. +* + JU = 1 +* + DO 40 J = 1, MIN( M, N ) +* +* Set fill-in elements in column J+KV to zero. +* + IF( J+KV.LE.N ) THEN + DO 30 I = 1, KL + AB( I, J+KV ) = ZERO + 30 CONTINUE + END IF +* +* Find pivot and test for singularity. KM is the number of +* subdiagonal elements in the current column. +* + KM = MIN( KL, M-J ) + JP = ICAMAX( KM+1, AB( KV+1, J ), 1 ) + IPIV( J ) = JP + J - 1 + IF( AB( KV+JP, J ).NE.ZERO ) THEN + JU = MAX( JU, MIN( J+KU+JP-1, N ) ) +* +* Apply interchange to columns J to JU. +* + IF( JP.NE.1 ) + $ CALL CSWAP( JU-J+1, AB( KV+JP, J ), LDAB-1, + $ AB( KV+1, J ), LDAB-1 ) + IF( KM.GT.0 ) THEN +* +* Compute multipliers. +* + CALL CSCAL( KM, ONE / AB( KV+1, J ), AB( KV+2, J ), 1 ) +* +* Update trailing submatrix within the band. +* + IF( JU.GT.J ) + $ CALL CGERU( KM, JU-J, -ONE, AB( KV+2, J ), 1, + $ AB( KV, J+1 ), LDAB-1, AB( KV+1, J+1 ), + $ LDAB-1 ) + END IF + ELSE +* +* If pivot is zero, set INFO to the index of the pivot +* unless a zero pivot has already been found. +* + IF( INFO.EQ.0 ) + $ INFO = J + END IF + 40 CONTINUE + RETURN +* +* End of CGBTF2 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cgbtrf.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cgbtrf.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,442 @@ + SUBROUTINE CGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, KL, KU, LDAB, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX AB( LDAB, * ) +* .. +* +* Purpose +* ======= +* +* CGBTRF computes an LU factorization of a complex m-by-n band matrix A +* using partial pivoting with row interchanges. +* +* This is the blocked version of the algorithm, calling Level 3 BLAS. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* KL (input) INTEGER +* The number of subdiagonals within the band of A. KL >= 0. +* +* KU (input) INTEGER +* The number of superdiagonals within the band of A. KU >= 0. +* +* AB (input/output) COMPLEX array, dimension (LDAB,N) +* On entry, the matrix A in band storage, in rows KL+1 to +* 2*KL+KU+1; rows 1 to KL of the array need not be set. +* The j-th column of A is stored in the j-th column of the +* array AB as follows: +* AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) +* +* On exit, details of the factorization: U is stored as an +* upper triangular band matrix with KL+KU superdiagonals in +* rows 1 to KL+KU+1, and the multipliers used during the +* factorization are stored in rows KL+KU+2 to 2*KL+KU+1. +* See below for further details. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= 2*KL+KU+1. +* +* IPIV (output) INTEGER array, dimension (min(M,N)) +* The pivot indices; for 1 <= i <= min(M,N), row i of the +* matrix was interchanged with row IPIV(i). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = +i, U(i,i) is exactly zero. The factorization +* has been completed, but the factor U is exactly +* singular, and division by zero will occur if it is used +* to solve a system of equations. +* +* Further Details +* =============== +* +* The band storage scheme is illustrated by the following example, when +* M = N = 6, KL = 2, KU = 1: +* +* On entry: On exit: +* +* * * * + + + * * * u14 u25 u36 +* * * + + + + * * u13 u24 u35 u46 +* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +* a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * +* a31 a42 a53 a64 * * m31 m42 m53 m64 * * +* +* Array elements marked * are not used by the routine; elements marked +* + need not be set on entry, but are required by the routine to store +* elements of U because of fill-in resulting from the row interchanges. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) + INTEGER NBMAX, LDWORK + PARAMETER ( NBMAX = 64, LDWORK = NBMAX+1 ) +* .. +* .. Local Scalars .. + INTEGER I, I2, I3, II, IP, J, J2, J3, JB, JJ, JM, JP, + $ JU, K2, KM, KV, NB, NW + COMPLEX TEMP +* .. +* .. Local Arrays .. + COMPLEX WORK13( LDWORK, NBMAX ), + $ WORK31( LDWORK, NBMAX ) +* .. +* .. External Functions .. + INTEGER ICAMAX, ILAENV + EXTERNAL ICAMAX, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CGBTF2, CGEMM, CGERU, CLASWP, CSCAL, + $ CSWAP, CTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* KV is the number of superdiagonals in the factor U, allowing for +* fill-in +* + KV = KU + KL +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KL+KV+1 ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGBTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment +* + NB = ILAENV( 1, 'CGBTRF', ' ', M, N, KL, KU ) +* +* The block size must not exceed the limit set by the size of the +* local arrays WORK13 and WORK31. +* + NB = MIN( NB, NBMAX ) +* + IF( NB.LE.1 .OR. NB.GT.KL ) THEN +* +* Use unblocked code +* + CALL CGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO ) + ELSE +* +* Use blocked code +* +* Zero the superdiagonal elements of the work array WORK13 +* + DO 20 J = 1, NB + DO 10 I = 1, J - 1 + WORK13( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE +* +* Zero the subdiagonal elements of the work array WORK31 +* + DO 40 J = 1, NB + DO 30 I = J + 1, NB + WORK31( I, J ) = ZERO + 30 CONTINUE + 40 CONTINUE +* +* Gaussian elimination with partial pivoting +* +* Set fill-in elements in columns KU+2 to KV to zero +* + DO 60 J = KU + 2, MIN( KV, N ) + DO 50 I = KV - J + 2, KL + AB( I, J ) = ZERO + 50 CONTINUE + 60 CONTINUE +* +* JU is the index of the last column affected by the current +* stage of the factorization +* + JU = 1 +* + DO 180 J = 1, MIN( M, N ), NB + JB = MIN( NB, MIN( M, N )-J+1 ) +* +* The active part of the matrix is partitioned +* +* A11 A12 A13 +* A21 A22 A23 +* A31 A32 A33 +* +* Here A11, A21 and A31 denote the current block of JB columns +* which is about to be factorized. The number of rows in the +* partitioning are JB, I2, I3 respectively, and the numbers +* of columns are JB, J2, J3. The superdiagonal elements of A13 +* and the subdiagonal elements of A31 lie outside the band. +* + I2 = MIN( KL-JB, M-J-JB+1 ) + I3 = MIN( JB, M-J-KL+1 ) +* +* J2 and J3 are computed after JU has been updated. +* +* Factorize the current block of JB columns +* + DO 80 JJ = J, J + JB - 1 +* +* Set fill-in elements in column JJ+KV to zero +* + IF( JJ+KV.LE.N ) THEN + DO 70 I = 1, KL + AB( I, JJ+KV ) = ZERO + 70 CONTINUE + END IF +* +* Find pivot and test for singularity. KM is the number of +* subdiagonal elements in the current column. +* + KM = MIN( KL, M-JJ ) + JP = ICAMAX( KM+1, AB( KV+1, JJ ), 1 ) + IPIV( JJ ) = JP + JJ - J + IF( AB( KV+JP, JJ ).NE.ZERO ) THEN + JU = MAX( JU, MIN( JJ+KU+JP-1, N ) ) + IF( JP.NE.1 ) THEN +* +* Apply interchange to columns J to J+JB-1 +* + IF( JP+JJ-1.LT.J+KL ) THEN +* + CALL CSWAP( JB, AB( KV+1+JJ-J, J ), LDAB-1, + $ AB( KV+JP+JJ-J, J ), LDAB-1 ) + ELSE +* +* The interchange affects columns J to JJ-1 of A31 +* which are stored in the work array WORK31 +* + CALL CSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1, + $ WORK31( JP+JJ-J-KL, 1 ), LDWORK ) + CALL CSWAP( J+JB-JJ, AB( KV+1, JJ ), LDAB-1, + $ AB( KV+JP, JJ ), LDAB-1 ) + END IF + END IF +* +* Compute multipliers +* + CALL CSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ), + $ 1 ) +* +* Update trailing submatrix within the band and within +* the current block. JM is the index of the last column +* which needs to be updated. +* + JM = MIN( JU, J+JB-1 ) + IF( JM.GT.JJ ) + $ CALL CGERU( KM, JM-JJ, -ONE, AB( KV+2, JJ ), 1, + $ AB( KV, JJ+1 ), LDAB-1, + $ AB( KV+1, JJ+1 ), LDAB-1 ) + ELSE +* +* If pivot is zero, set INFO to the index of the pivot +* unless a zero pivot has already been found. +* + IF( INFO.EQ.0 ) + $ INFO = JJ + END IF +* +* Copy current column of A31 into the work array WORK31 +* + NW = MIN( JJ-J+1, I3 ) + IF( NW.GT.0 ) + $ CALL CCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1, + $ WORK31( 1, JJ-J+1 ), 1 ) + 80 CONTINUE + IF( J+JB.LE.N ) THEN +* +* Apply the row interchanges to the other blocks. +* + J2 = MIN( JU-J+1, KV ) - JB + J3 = MAX( 0, JU-J-KV+1 ) +* +* Use CLASWP to apply the row interchanges to A12, A22, and +* A32. +* + CALL CLASWP( J2, AB( KV+1-JB, J+JB ), LDAB-1, 1, JB, + $ IPIV( J ), 1 ) +* +* Adjust the pivot indices. +* + DO 90 I = J, J + JB - 1 + IPIV( I ) = IPIV( I ) + J - 1 + 90 CONTINUE +* +* Apply the row interchanges to A13, A23, and A33 +* columnwise. +* + K2 = J - 1 + JB + J2 + DO 110 I = 1, J3 + JJ = K2 + I + DO 100 II = J + I - 1, J + JB - 1 + IP = IPIV( II ) + IF( IP.NE.II ) THEN + TEMP = AB( KV+1+II-JJ, JJ ) + AB( KV+1+II-JJ, JJ ) = AB( KV+1+IP-JJ, JJ ) + AB( KV+1+IP-JJ, JJ ) = TEMP + END IF + 100 CONTINUE + 110 CONTINUE +* +* Update the relevant part of the trailing submatrix +* + IF( J2.GT.0 ) THEN +* +* Update A12 +* + CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit', + $ JB, J2, ONE, AB( KV+1, J ), LDAB-1, + $ AB( KV+1-JB, J+JB ), LDAB-1 ) +* + IF( I2.GT.0 ) THEN +* +* Update A22 +* + CALL CGEMM( 'No transpose', 'No transpose', I2, J2, + $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1, + $ AB( KV+1-JB, J+JB ), LDAB-1, ONE, + $ AB( KV+1, J+JB ), LDAB-1 ) + END IF +* + IF( I3.GT.0 ) THEN +* +* Update A32 +* + CALL CGEMM( 'No transpose', 'No transpose', I3, J2, + $ JB, -ONE, WORK31, LDWORK, + $ AB( KV+1-JB, J+JB ), LDAB-1, ONE, + $ AB( KV+KL+1-JB, J+JB ), LDAB-1 ) + END IF + END IF +* + IF( J3.GT.0 ) THEN +* +* Copy the lower triangle of A13 into the work array +* WORK13 +* + DO 130 JJ = 1, J3 + DO 120 II = JJ, JB + WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 ) + 120 CONTINUE + 130 CONTINUE +* +* Update A13 in the work array +* + CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit', + $ JB, J3, ONE, AB( KV+1, J ), LDAB-1, + $ WORK13, LDWORK ) +* + IF( I2.GT.0 ) THEN +* +* Update A23 +* + CALL CGEMM( 'No transpose', 'No transpose', I2, J3, + $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1, + $ WORK13, LDWORK, ONE, AB( 1+JB, J+KV ), + $ LDAB-1 ) + END IF +* + IF( I3.GT.0 ) THEN +* +* Update A33 +* + CALL CGEMM( 'No transpose', 'No transpose', I3, J3, + $ JB, -ONE, WORK31, LDWORK, WORK13, + $ LDWORK, ONE, AB( 1+KL, J+KV ), LDAB-1 ) + END IF +* +* Copy the lower triangle of A13 back into place +* + DO 150 JJ = 1, J3 + DO 140 II = JJ, JB + AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ ) + 140 CONTINUE + 150 CONTINUE + END IF + ELSE +* +* Adjust the pivot indices. +* + DO 160 I = J, J + JB - 1 + IPIV( I ) = IPIV( I ) + J - 1 + 160 CONTINUE + END IF +* +* Partially undo the interchanges in the current block to +* restore the upper triangular form of A31 and copy the upper +* triangle of A31 back into place +* + DO 170 JJ = J + JB - 1, J, -1 + JP = IPIV( JJ ) - JJ + 1 + IF( JP.NE.1 ) THEN +* +* Apply interchange to columns J to JJ-1 +* + IF( JP+JJ-1.LT.J+KL ) THEN +* +* The interchange does not affect A31 +* + CALL CSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1, + $ AB( KV+JP+JJ-J, J ), LDAB-1 ) + ELSE +* +* The interchange does affect A31 +* + CALL CSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1, + $ WORK31( JP+JJ-J-KL, 1 ), LDWORK ) + END IF + END IF +* +* Copy the current column of A31 back into place +* + NW = MIN( I3, JJ-J+1 ) + IF( NW.GT.0 ) + $ CALL CCOPY( NW, WORK31( 1, JJ-J+1 ), 1, + $ AB( KV+KL+1-JJ+J, JJ ), 1 ) + 170 CONTINUE + 180 CONTINUE + END IF +* + RETURN +* +* End of CGBTRF +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cgbtrs.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cgbtrs.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,214 @@ + SUBROUTINE CGBTRS( TRANS, N, KL, KU, NRHS, AB, LDAB, IPIV, B, LDB, + $ INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, KL, KU, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX AB( LDAB, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* CGBTRS solves a system of linear equations +* A * X = B, A**T * X = B, or A**H * X = B +* with a general band matrix A using the LU factorization computed +* by CGBTRF. +* +* Arguments +* ========= +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations. +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose) +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KL (input) INTEGER +* The number of subdiagonals within the band of A. KL >= 0. +* +* KU (input) INTEGER +* The number of superdiagonals within the band of A. KU >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* AB (input) COMPLEX array, dimension (LDAB,N) +* Details of the LU factorization of the band matrix A, as +* computed by CGBTRF. U is stored as an upper triangular band +* matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and +* the multipliers used during the factorization are stored in +* rows KL+KU+2 to 2*KL+KU+1. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= 2*KL+KU+1. +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices; for 1 <= i <= N, row i of the matrix was +* interchanged with row IPIV(i). +* +* B (input/output) COMPLEX array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LNOTI, NOTRAN + INTEGER I, J, KD, L, LM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CGEMV, CGERU, CLACGV, CSWAP, CTBSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.( 2*KL+KU+1 ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGBTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + KD = KU + KL + 1 + LNOTI = KL.GT.0 +* + IF( NOTRAN ) THEN +* +* Solve A*X = B. +* +* Solve L*X = B, overwriting B with X. +* +* L is represented as a product of permutations and unit lower +* triangular matrices L = P(1) * L(1) * ... * P(n-1) * L(n-1), +* where each transformation L(i) is a rank-one modification of +* the identity matrix. +* + IF( LNOTI ) THEN + DO 10 J = 1, N - 1 + LM = MIN( KL, N-J ) + L = IPIV( J ) + IF( L.NE.J ) + $ CALL CSWAP( NRHS, B( L, 1 ), LDB, B( J, 1 ), LDB ) + CALL CGERU( LM, NRHS, -ONE, AB( KD+1, J ), 1, B( J, 1 ), + $ LDB, B( J+1, 1 ), LDB ) + 10 CONTINUE + END IF +* + DO 20 I = 1, NRHS +* +* Solve U*X = B, overwriting B with X. +* + CALL CTBSV( 'Upper', 'No transpose', 'Non-unit', N, KL+KU, + $ AB, LDAB, B( 1, I ), 1 ) + 20 CONTINUE +* + ELSE IF( LSAME( TRANS, 'T' ) ) THEN +* +* Solve A**T * X = B. +* + DO 30 I = 1, NRHS +* +* Solve U**T * X = B, overwriting B with X. +* + CALL CTBSV( 'Upper', 'Transpose', 'Non-unit', N, KL+KU, AB, + $ LDAB, B( 1, I ), 1 ) + 30 CONTINUE +* +* Solve L**T * X = B, overwriting B with X. +* + IF( LNOTI ) THEN + DO 40 J = N - 1, 1, -1 + LM = MIN( KL, N-J ) + CALL CGEMV( 'Transpose', LM, NRHS, -ONE, B( J+1, 1 ), + $ LDB, AB( KD+1, J ), 1, ONE, B( J, 1 ), LDB ) + L = IPIV( J ) + IF( L.NE.J ) + $ CALL CSWAP( NRHS, B( L, 1 ), LDB, B( J, 1 ), LDB ) + 40 CONTINUE + END IF +* + ELSE +* +* Solve A**H * X = B. +* + DO 50 I = 1, NRHS +* +* Solve U**H * X = B, overwriting B with X. +* + CALL CTBSV( 'Upper', 'Conjugate transpose', 'Non-unit', N, + $ KL+KU, AB, LDAB, B( 1, I ), 1 ) + 50 CONTINUE +* +* Solve L**H * X = B, overwriting B with X. +* + IF( LNOTI ) THEN + DO 60 J = N - 1, 1, -1 + LM = MIN( KL, N-J ) + CALL CLACGV( NRHS, B( J, 1 ), LDB ) + CALL CGEMV( 'Conjugate transpose', LM, NRHS, -ONE, + $ B( J+1, 1 ), LDB, AB( KD+1, J ), 1, ONE, + $ B( J, 1 ), LDB ) + CALL CLACGV( NRHS, B( J, 1 ), LDB ) + L = IPIV( J ) + IF( L.NE.J ) + $ CALL CSWAP( NRHS, B( L, 1 ), LDB, B( J, 1 ), LDB ) + 60 CONTINUE + END IF + END IF + RETURN +* +* End of CGBTRS +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cgebak.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cgebak.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,189 @@ + SUBROUTINE CGEBAK( JOB, SIDE, N, ILO, IHI, SCALE, M, V, LDV, + $ INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER JOB, SIDE + INTEGER IHI, ILO, INFO, LDV, M, N +* .. +* .. Array Arguments .. + REAL SCALE( * ) + COMPLEX V( LDV, * ) +* .. +* +* Purpose +* ======= +* +* CGEBAK forms the right or left eigenvectors of a complex general +* matrix by backward transformation on the computed eigenvectors of the +* balanced matrix output by CGEBAL. +* +* Arguments +* ========= +* +* JOB (input) CHARACTER*1 +* Specifies the type of backward transformation required: +* = 'N', do nothing, return immediately; +* = 'P', do backward transformation for permutation only; +* = 'S', do backward transformation for scaling only; +* = 'B', do backward transformations for both permutation and +* scaling. +* JOB must be the same as the argument JOB supplied to CGEBAL. +* +* SIDE (input) CHARACTER*1 +* = 'R': V contains right eigenvectors; +* = 'L': V contains left eigenvectors. +* +* N (input) INTEGER +* The number of rows of the matrix V. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* The integers ILO and IHI determined by CGEBAL. +* 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +* +* SCALE (input) REAL array, dimension (N) +* Details of the permutation and scaling factors, as returned +* by CGEBAL. +* +* M (input) INTEGER +* The number of columns of the matrix V. M >= 0. +* +* V (input/output) COMPLEX array, dimension (LDV,M) +* On entry, the matrix of right or left eigenvectors to be +* transformed, as returned by CHSEIN or CTREVC. +* On exit, V is overwritten by the transformed eigenvectors. +* +* LDV (input) INTEGER +* The leading dimension of the array V. LDV >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LEFTV, RIGHTV + INTEGER I, II, K + REAL S +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CSSCAL, CSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Decode and Test the input parameters +* + RIGHTV = LSAME( SIDE, 'R' ) + LEFTV = LSAME( SIDE, 'L' ) +* + INFO = 0 + IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND. + $ .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN + INFO = -1 + ELSE IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN + INFO = -5 + ELSE IF( M.LT.0 ) THEN + INFO = -7 + ELSE IF( LDV.LT.MAX( 1, N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEBAK', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN + IF( M.EQ.0 ) + $ RETURN + IF( LSAME( JOB, 'N' ) ) + $ RETURN +* + IF( ILO.EQ.IHI ) + $ GO TO 30 +* +* Backward balance +* + IF( LSAME( JOB, 'S' ) .OR. LSAME( JOB, 'B' ) ) THEN +* + IF( RIGHTV ) THEN + DO 10 I = ILO, IHI + S = SCALE( I ) + CALL CSSCAL( M, S, V( I, 1 ), LDV ) + 10 CONTINUE + END IF +* + IF( LEFTV ) THEN + DO 20 I = ILO, IHI + S = ONE / SCALE( I ) + CALL CSSCAL( M, S, V( I, 1 ), LDV ) + 20 CONTINUE + END IF +* + END IF +* +* Backward permutation +* +* For I = ILO-1 step -1 until 1, +* IHI+1 step 1 until N do -- +* + 30 CONTINUE + IF( LSAME( JOB, 'P' ) .OR. LSAME( JOB, 'B' ) ) THEN + IF( RIGHTV ) THEN + DO 40 II = 1, N + I = II + IF( I.GE.ILO .AND. I.LE.IHI ) + $ GO TO 40 + IF( I.LT.ILO ) + $ I = ILO - II + K = SCALE( I ) + IF( K.EQ.I ) + $ GO TO 40 + CALL CSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 40 CONTINUE + END IF +* + IF( LEFTV ) THEN + DO 50 II = 1, N + I = II + IF( I.GE.ILO .AND. I.LE.IHI ) + $ GO TO 50 + IF( I.LT.ILO ) + $ I = ILO - II + K = SCALE( I ) + IF( K.EQ.I ) + $ GO TO 50 + CALL CSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 50 CONTINUE + END IF + END IF +* + RETURN +* +* End of CGEBAK +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cgebal.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cgebal.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,330 @@ + SUBROUTINE CGEBAL( JOB, N, A, LDA, ILO, IHI, SCALE, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER JOB + INTEGER IHI, ILO, INFO, LDA, N +* .. +* .. Array Arguments .. + REAL SCALE( * ) + COMPLEX A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* CGEBAL balances a general complex matrix A. This involves, first, +* permuting A by a similarity transformation to isolate eigenvalues +* in the first 1 to ILO-1 and last IHI+1 to N elements on the +* diagonal; and second, applying a diagonal similarity transformation +* to rows and columns ILO to IHI to make the rows and columns as +* close in norm as possible. Both steps are optional. +* +* Balancing may reduce the 1-norm of the matrix, and improve the +* accuracy of the computed eigenvalues and/or eigenvectors. +* +* Arguments +* ========= +* +* JOB (input) CHARACTER*1 +* Specifies the operations to be performed on A: +* = 'N': none: simply set ILO = 1, IHI = N, SCALE(I) = 1.0 +* for i = 1,...,N; +* = 'P': permute only; +* = 'S': scale only; +* = 'B': both permute and scale. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the input matrix A. +* On exit, A is overwritten by the balanced matrix. +* If JOB = 'N', A is not referenced. +* See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* ILO (output) INTEGER +* IHI (output) INTEGER +* ILO and IHI are set to integers such that on exit +* A(i,j) = 0 if i > j and j = 1,...,ILO-1 or I = IHI+1,...,N. +* If JOB = 'N' or 'S', ILO = 1 and IHI = N. +* +* SCALE (output) REAL array, dimension (N) +* Details of the permutations and scaling factors applied to +* A. If P(j) is the index of the row and column interchanged +* with row and column j and D(j) is the scaling factor +* applied to row and column j, then +* SCALE(j) = P(j) for j = 1,...,ILO-1 +* = D(j) for j = ILO,...,IHI +* = P(j) for j = IHI+1,...,N. +* The order in which the interchanges are made is N to IHI+1, +* then 1 to ILO-1. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* The permutations consist of row and column interchanges which put +* the matrix in the form +* +* ( T1 X Y ) +* P A P = ( 0 B Z ) +* ( 0 0 T2 ) +* +* where T1 and T2 are upper triangular matrices whose eigenvalues lie +* along the diagonal. The column indices ILO and IHI mark the starting +* and ending columns of the submatrix B. Balancing consists of applying +* a diagonal similarity transformation inv(D) * B * D to make the +* 1-norms of each row of B and its corresponding column nearly equal. +* The output matrix is +* +* ( T1 X*D Y ) +* ( 0 inv(D)*B*D inv(D)*Z ). +* ( 0 0 T2 ) +* +* Information about the permutations P and the diagonal matrix D is +* returned in the vector SCALE. +* +* This subroutine is based on the EISPACK routine CBAL. +* +* Modified by Tzu-Yi Chen, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL SCLFAC + PARAMETER ( SCLFAC = 2.0E+0 ) + REAL FACTOR + PARAMETER ( FACTOR = 0.95E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOCONV + INTEGER I, ICA, IEXC, IRA, J, K, L, M + REAL C, CA, F, G, R, RA, S, SFMAX1, SFMAX2, SFMIN1, + $ SFMIN2 + COMPLEX CDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX + REAL SLAMCH + EXTERNAL LSAME, ICAMAX, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CSSCAL, CSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, MIN, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND. + $ .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEBAL', -INFO ) + RETURN + END IF +* + K = 1 + L = N +* + IF( N.EQ.0 ) + $ GO TO 210 +* + IF( LSAME( JOB, 'N' ) ) THEN + DO 10 I = 1, N + SCALE( I ) = ONE + 10 CONTINUE + GO TO 210 + END IF +* + IF( LSAME( JOB, 'S' ) ) + $ GO TO 120 +* +* Permutation to isolate eigenvalues if possible +* + GO TO 50 +* +* Row and column exchange. +* + 20 CONTINUE + SCALE( M ) = J + IF( J.EQ.M ) + $ GO TO 30 +* + CALL CSWAP( L, A( 1, J ), 1, A( 1, M ), 1 ) + CALL CSWAP( N-K+1, A( J, K ), LDA, A( M, K ), LDA ) +* + 30 CONTINUE + GO TO ( 40, 80 )IEXC +* +* Search for rows isolating an eigenvalue and push them down. +* + 40 CONTINUE + IF( L.EQ.1 ) + $ GO TO 210 + L = L - 1 +* + 50 CONTINUE + DO 70 J = L, 1, -1 +* + DO 60 I = 1, L + IF( I.EQ.J ) + $ GO TO 60 + IF( REAL( A( J, I ) ).NE.ZERO .OR. AIMAG( A( J, I ) ).NE. + $ ZERO )GO TO 70 + 60 CONTINUE +* + M = L + IEXC = 1 + GO TO 20 + 70 CONTINUE +* + GO TO 90 +* +* Search for columns isolating an eigenvalue and push them left. +* + 80 CONTINUE + K = K + 1 +* + 90 CONTINUE + DO 110 J = K, L +* + DO 100 I = K, L + IF( I.EQ.J ) + $ GO TO 100 + IF( REAL( A( I, J ) ).NE.ZERO .OR. AIMAG( A( I, J ) ).NE. + $ ZERO )GO TO 110 + 100 CONTINUE +* + M = K + IEXC = 2 + GO TO 20 + 110 CONTINUE +* + 120 CONTINUE + DO 130 I = K, L + SCALE( I ) = ONE + 130 CONTINUE +* + IF( LSAME( JOB, 'P' ) ) + $ GO TO 210 +* +* Balance the submatrix in rows K to L. +* +* Iterative loop for norm reduction +* + SFMIN1 = SLAMCH( 'S' ) / SLAMCH( 'P' ) + SFMAX1 = ONE / SFMIN1 + SFMIN2 = SFMIN1*SCLFAC + SFMAX2 = ONE / SFMIN2 + 140 CONTINUE + NOCONV = .FALSE. +* + DO 200 I = K, L + C = ZERO + R = ZERO +* + DO 150 J = K, L + IF( J.EQ.I ) + $ GO TO 150 + C = C + CABS1( A( J, I ) ) + R = R + CABS1( A( I, J ) ) + 150 CONTINUE + ICA = ICAMAX( L, A( 1, I ), 1 ) + CA = ABS( A( ICA, I ) ) + IRA = ICAMAX( N-K+1, A( I, K ), LDA ) + RA = ABS( A( I, IRA+K-1 ) ) +* +* Guard against zero C or R due to underflow. +* + IF( C.EQ.ZERO .OR. R.EQ.ZERO ) + $ GO TO 200 + G = R / SCLFAC + F = ONE + S = C + R + 160 CONTINUE + IF( C.GE.G .OR. MAX( F, C, CA ).GE.SFMAX2 .OR. + $ MIN( R, G, RA ).LE.SFMIN2 )GO TO 170 + F = F*SCLFAC + C = C*SCLFAC + CA = CA*SCLFAC + R = R / SCLFAC + G = G / SCLFAC + RA = RA / SCLFAC + GO TO 160 +* + 170 CONTINUE + G = C / SCLFAC + 180 CONTINUE + IF( G.LT.R .OR. MAX( R, RA ).GE.SFMAX2 .OR. + $ MIN( F, C, G, CA ).LE.SFMIN2 )GO TO 190 + F = F / SCLFAC + C = C / SCLFAC + G = G / SCLFAC + CA = CA / SCLFAC + R = R*SCLFAC + RA = RA*SCLFAC + GO TO 180 +* +* Now balance. +* + 190 CONTINUE + IF( ( C+R ).GE.FACTOR*S ) + $ GO TO 200 + IF( F.LT.ONE .AND. SCALE( I ).LT.ONE ) THEN + IF( F*SCALE( I ).LE.SFMIN1 ) + $ GO TO 200 + END IF + IF( F.GT.ONE .AND. SCALE( I ).GT.ONE ) THEN + IF( SCALE( I ).GE.SFMAX1 / F ) + $ GO TO 200 + END IF + G = ONE / F + SCALE( I ) = SCALE( I )*F + NOCONV = .TRUE. +* + CALL CSSCAL( N-K+1, G, A( I, K ), LDA ) + CALL CSSCAL( L, F, A( 1, I ), 1 ) +* + 200 CONTINUE +* + IF( NOCONV ) + $ GO TO 140 +* + 210 CONTINUE + ILO = K + IHI = L +* + RETURN +* +* End of CGEBAL +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cgebd2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cgebd2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,250 @@ + SUBROUTINE CGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + REAL D( * ), E( * ) + COMPLEX A( LDA, * ), TAUP( * ), TAUQ( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CGEBD2 reduces a complex general m by n matrix A to upper or lower +* real bidiagonal form B by a unitary transformation: Q' * A * P = B. +* +* If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows in the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns in the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the m by n general matrix to be reduced. +* On exit, +* if m >= n, the diagonal and the first superdiagonal are +* overwritten with the upper bidiagonal matrix B; the +* elements below the diagonal, with the array TAUQ, represent +* the unitary matrix Q as a product of elementary +* reflectors, and the elements above the first superdiagonal, +* with the array TAUP, represent the unitary matrix P as +* a product of elementary reflectors; +* if m < n, the diagonal and the first subdiagonal are +* overwritten with the lower bidiagonal matrix B; the +* elements below the first subdiagonal, with the array TAUQ, +* represent the unitary matrix Q as a product of +* elementary reflectors, and the elements above the diagonal, +* with the array TAUP, represent the unitary matrix P as +* a product of elementary reflectors. +* See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* D (output) REAL array, dimension (min(M,N)) +* The diagonal elements of the bidiagonal matrix B: +* D(i) = A(i,i). +* +* E (output) REAL array, dimension (min(M,N)-1) +* The off-diagonal elements of the bidiagonal matrix B: +* if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; +* if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. +* +* TAUQ (output) COMPLEX array dimension (min(M,N)) +* The scalar factors of the elementary reflectors which +* represent the unitary matrix Q. See Further Details. +* +* TAUP (output) COMPLEX array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors which +* represent the unitary matrix P. See Further Details. +* +* WORK (workspace) COMPLEX array, dimension (max(M,N)) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* The matrices Q and P are represented as products of elementary +* reflectors: +* +* If m >= n, +* +* Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) +* +* Each H(i) and G(i) has the form: +* +* H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' +* +* where tauq and taup are complex scalars, and v and u are complex +* vectors; v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in +* A(i+1:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in +* A(i,i+2:n); tauq is stored in TAUQ(i) and taup in TAUP(i). +* +* If m < n, +* +* Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) +* +* Each H(i) and G(i) has the form: +* +* H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' +* +* where tauq and taup are complex scalars, v and u are complex vectors; +* v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); +* u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); +* tauq is stored in TAUQ(i) and taup in TAUP(i). +* +* The contents of A on exit are illustrated by the following examples: +* +* m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): +* +* ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) +* ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) +* ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) +* ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) +* ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) +* ( v1 v2 v3 v4 v5 ) +* +* where d and e denote diagonal and off-diagonal elements of B, vi +* denotes an element of the vector defining H(i), and ui an element of +* the vector defining G(i). +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), + $ ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I + COMPLEX ALPHA +* .. +* .. External Subroutines .. + EXTERNAL CLACGV, CLARF, CLARFG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.LT.0 ) THEN + CALL XERBLA( 'CGEBD2', -INFO ) + RETURN + END IF +* + IF( M.GE.N ) THEN +* +* Reduce to upper bidiagonal form +* + DO 10 I = 1, N +* +* Generate elementary reflector H(i) to annihilate A(i+1:m,i) +* + ALPHA = A( I, I ) + CALL CLARFG( M-I+1, ALPHA, A( MIN( I+1, M ), I ), 1, + $ TAUQ( I ) ) + D( I ) = ALPHA + A( I, I ) = ONE +* +* Apply H(i)' to A(i:m,i+1:n) from the left +* + IF( I.LT.N ) + $ CALL CLARF( 'Left', M-I+1, N-I, A( I, I ), 1, + $ CONJG( TAUQ( I ) ), A( I, I+1 ), LDA, WORK ) + A( I, I ) = D( I ) +* + IF( I.LT.N ) THEN +* +* Generate elementary reflector G(i) to annihilate +* A(i,i+2:n) +* + CALL CLACGV( N-I, A( I, I+1 ), LDA ) + ALPHA = A( I, I+1 ) + CALL CLARFG( N-I, ALPHA, A( I, MIN( I+2, N ) ), + $ LDA, TAUP( I ) ) + E( I ) = ALPHA + A( I, I+1 ) = ONE +* +* Apply G(i) to A(i+1:m,i+1:n) from the right +* + CALL CLARF( 'Right', M-I, N-I, A( I, I+1 ), LDA, + $ TAUP( I ), A( I+1, I+1 ), LDA, WORK ) + CALL CLACGV( N-I, A( I, I+1 ), LDA ) + A( I, I+1 ) = E( I ) + ELSE + TAUP( I ) = ZERO + END IF + 10 CONTINUE + ELSE +* +* Reduce to lower bidiagonal form +* + DO 20 I = 1, M +* +* Generate elementary reflector G(i) to annihilate A(i,i+1:n) +* + CALL CLACGV( N-I+1, A( I, I ), LDA ) + ALPHA = A( I, I ) + CALL CLARFG( N-I+1, ALPHA, A( I, MIN( I+1, N ) ), LDA, + $ TAUP( I ) ) + D( I ) = ALPHA + A( I, I ) = ONE +* +* Apply G(i) to A(i+1:m,i:n) from the right +* + IF( I.LT.M ) + $ CALL CLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, + $ TAUP( I ), A( I+1, I ), LDA, WORK ) + CALL CLACGV( N-I+1, A( I, I ), LDA ) + A( I, I ) = D( I ) +* + IF( I.LT.M ) THEN +* +* Generate elementary reflector H(i) to annihilate +* A(i+2:m,i) +* + ALPHA = A( I+1, I ) + CALL CLARFG( M-I, ALPHA, A( MIN( I+2, M ), I ), 1, + $ TAUQ( I ) ) + E( I ) = ALPHA + A( I+1, I ) = ONE +* +* Apply H(i)' to A(i+1:m,i+1:n) from the left +* + CALL CLARF( 'Left', M-I, N-I, A( I+1, I ), 1, + $ CONJG( TAUQ( I ) ), A( I+1, I+1 ), LDA, + $ WORK ) + A( I+1, I ) = E( I ) + ELSE + TAUQ( I ) = ZERO + END IF + 20 CONTINUE + END IF + RETURN +* +* End of CGEBD2 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cgebrd.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cgebrd.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,269 @@ + SUBROUTINE CGEBRD( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK, + $ INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + REAL D( * ), E( * ) + COMPLEX A( LDA, * ), TAUP( * ), TAUQ( * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* CGEBRD reduces a general complex M-by-N matrix A to upper or lower +* bidiagonal form B by a unitary transformation: Q**H * A * P = B. +* +* If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows in the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns in the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the M-by-N general matrix to be reduced. +* On exit, +* if m >= n, the diagonal and the first superdiagonal are +* overwritten with the upper bidiagonal matrix B; the +* elements below the diagonal, with the array TAUQ, represent +* the unitary matrix Q as a product of elementary +* reflectors, and the elements above the first superdiagonal, +* with the array TAUP, represent the unitary matrix P as +* a product of elementary reflectors; +* if m < n, the diagonal and the first subdiagonal are +* overwritten with the lower bidiagonal matrix B; the +* elements below the first subdiagonal, with the array TAUQ, +* represent the unitary matrix Q as a product of +* elementary reflectors, and the elements above the diagonal, +* with the array TAUP, represent the unitary matrix P as +* a product of elementary reflectors. +* See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* D (output) REAL array, dimension (min(M,N)) +* The diagonal elements of the bidiagonal matrix B: +* D(i) = A(i,i). +* +* E (output) REAL array, dimension (min(M,N)-1) +* The off-diagonal elements of the bidiagonal matrix B: +* if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; +* if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. +* +* TAUQ (output) COMPLEX array dimension (min(M,N)) +* The scalar factors of the elementary reflectors which +* represent the unitary matrix Q. See Further Details. +* +* TAUP (output) COMPLEX array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors which +* represent the unitary matrix P. See Further Details. +* +* WORK (workspace/output) COMPLEX array, dimension (MAX(1,LWORK)) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of the array WORK. LWORK >= max(1,M,N). +* For optimum performance LWORK >= (M+N)*NB, where NB +* is the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* The matrices Q and P are represented as products of elementary +* reflectors: +* +* If m >= n, +* +* Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) +* +* Each H(i) and G(i) has the form: +* +* H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' +* +* where tauq and taup are complex scalars, and v and u are complex +* vectors; v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in +* A(i+1:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in +* A(i,i+2:n); tauq is stored in TAUQ(i) and taup in TAUP(i). +* +* If m < n, +* +* Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) +* +* Each H(i) and G(i) has the form: +* +* H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' +* +* where tauq and taup are complex scalars, and v and u are complex +* vectors; v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in +* A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in +* A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). +* +* The contents of A on exit are illustrated by the following examples: +* +* m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): +* +* ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) +* ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) +* ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) +* ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) +* ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) +* ( v1 v2 v3 v4 v5 ) +* +* where d and e denote diagonal and off-diagonal elements of B, vi +* denotes an element of the vector defining H(i), and ui an element of +* the vector defining G(i). +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IINFO, J, LDWRKX, LDWRKY, LWKOPT, MINMN, NB, + $ NBMIN, NX + REAL WS +* .. +* .. External Subroutines .. + EXTERNAL CGEBD2, CGEMM, CLABRD, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, REAL +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + NB = MAX( 1, ILAENV( 1, 'CGEBRD', ' ', M, N, -1, -1 ) ) + LWKOPT = ( M+N )*NB + WORK( 1 ) = REAL( LWKOPT ) + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, M, N ) .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF + IF( INFO.LT.0 ) THEN + CALL XERBLA( 'CGEBRD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + MINMN = MIN( M, N ) + IF( MINMN.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + WS = MAX( M, N ) + LDWRKX = M + LDWRKY = N +* + IF( NB.GT.1 .AND. NB.LT.MINMN ) THEN +* +* Set the crossover point NX. +* + NX = MAX( NB, ILAENV( 3, 'CGEBRD', ' ', M, N, -1, -1 ) ) +* +* Determine when to switch from blocked to unblocked code. +* + IF( NX.LT.MINMN ) THEN + WS = ( M+N )*NB + IF( LWORK.LT.WS ) THEN +* +* Not enough work space for the optimal NB, consider using +* a smaller block size. +* + NBMIN = ILAENV( 2, 'CGEBRD', ' ', M, N, -1, -1 ) + IF( LWORK.GE.( M+N )*NBMIN ) THEN + NB = LWORK / ( M+N ) + ELSE + NB = 1 + NX = MINMN + END IF + END IF + END IF + ELSE + NX = MINMN + END IF +* + DO 30 I = 1, MINMN - NX, NB +* +* Reduce rows and columns i:i+ib-1 to bidiagonal form and return +* the matrices X and Y which are needed to update the unreduced +* part of the matrix +* + CALL CLABRD( M-I+1, N-I+1, NB, A( I, I ), LDA, D( I ), E( I ), + $ TAUQ( I ), TAUP( I ), WORK, LDWRKX, + $ WORK( LDWRKX*NB+1 ), LDWRKY ) +* +* Update the trailing submatrix A(i+ib:m,i+ib:n), using +* an update of the form A := A - V*Y' - X*U' +* + CALL CGEMM( 'No transpose', 'Conjugate transpose', M-I-NB+1, + $ N-I-NB+1, NB, -ONE, A( I+NB, I ), LDA, + $ WORK( LDWRKX*NB+NB+1 ), LDWRKY, ONE, + $ A( I+NB, I+NB ), LDA ) + CALL CGEMM( 'No transpose', 'No transpose', M-I-NB+1, N-I-NB+1, + $ NB, -ONE, WORK( NB+1 ), LDWRKX, A( I, I+NB ), LDA, + $ ONE, A( I+NB, I+NB ), LDA ) +* +* Copy diagonal and off-diagonal elements of B back into A +* + IF( M.GE.N ) THEN + DO 10 J = I, I + NB - 1 + A( J, J ) = D( J ) + A( J, J+1 ) = E( J ) + 10 CONTINUE + ELSE + DO 20 J = I, I + NB - 1 + A( J, J ) = D( J ) + A( J+1, J ) = E( J ) + 20 CONTINUE + END IF + 30 CONTINUE +* +* Use unblocked code to reduce the remainder of the matrix +* + CALL CGEBD2( M-I+1, N-I+1, A( I, I ), LDA, D( I ), E( I ), + $ TAUQ( I ), TAUP( I ), WORK, IINFO ) + WORK( 1 ) = WS + RETURN +* +* End of CGEBRD +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cgecon.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cgecon.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,193 @@ + SUBROUTINE CGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, RWORK, + $ INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* Modified to call CLACN2 in place of CLACON, 10 Feb 03, SJH. +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER INFO, LDA, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + REAL RWORK( * ) + COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CGECON estimates the reciprocal of the condition number of a general +* complex matrix A, in either the 1-norm or the infinity-norm, using +* the LU factorization computed by CGETRF. +* +* An estimate is obtained for norm(inv(A)), and the reciprocal of the +* condition number is computed as +* RCOND = 1 / ( norm(A) * norm(inv(A)) ). +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies whether the 1-norm condition number or the +* infinity-norm condition number is required: +* = '1' or 'O': 1-norm; +* = 'I': Infinity-norm. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input) COMPLEX array, dimension (LDA,N) +* The factors L and U from the factorization A = P*L*U +* as computed by CGETRF. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* ANORM (input) REAL +* If NORM = '1' or 'O', the 1-norm of the original matrix A. +* If NORM = 'I', the infinity-norm of the original matrix A. +* +* RCOND (output) REAL +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(norm(A) * norm(inv(A))). +* +* WORK (workspace) COMPLEX array, dimension (2*N) +* +* RWORK (workspace) REAL array, dimension (2*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL ONENRM + CHARACTER NORMIN + INTEGER IX, KASE, KASE1 + REAL AINVNM, SCALE, SL, SMLNUM, SU + COMPLEX ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX + REAL SLAMCH + EXTERNAL LSAME, ICAMAX, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CLACN2, CLATRS, CSRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGECON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* + SMLNUM = SLAMCH( 'Safe minimum' ) +* +* Estimate the norm of inv(A). +* + AINVNM = ZERO + NORMIN = 'N' + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KASE = 0 + 10 CONTINUE + CALL CLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(L). +* + CALL CLATRS( 'Lower', 'No transpose', 'Unit', NORMIN, N, A, + $ LDA, WORK, SL, RWORK, INFO ) +* +* Multiply by inv(U). +* + CALL CLATRS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N, + $ A, LDA, WORK, SU, RWORK( N+1 ), INFO ) + ELSE +* +* Multiply by inv(U'). +* + CALL CLATRS( 'Upper', 'Conjugate transpose', 'Non-unit', + $ NORMIN, N, A, LDA, WORK, SU, RWORK( N+1 ), + $ INFO ) +* +* Multiply by inv(L'). +* + CALL CLATRS( 'Lower', 'Conjugate transpose', 'Unit', NORMIN, + $ N, A, LDA, WORK, SL, RWORK, INFO ) + END IF +* +* Divide X by 1/(SL*SU) if doing so will not cause overflow. +* + SCALE = SL*SU + NORMIN = 'Y' + IF( SCALE.NE.ONE ) THEN + IX = ICAMAX( N, WORK, 1 ) + IF( SCALE.LT.CABS1( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL CSRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + 20 CONTINUE + RETURN +* +* End of CGECON +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cgeesx.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cgeesx.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,384 @@ + SUBROUTINE CGEESX( JOBVS, SORT, SELECT, SENSE, N, A, LDA, SDIM, W, + $ VS, LDVS, RCONDE, RCONDV, WORK, LWORK, RWORK, + $ BWORK, INFO ) +* +* -- LAPACK driver routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER JOBVS, SENSE, SORT + INTEGER INFO, LDA, LDVS, LWORK, N, SDIM + REAL RCONDE, RCONDV +* .. +* .. Array Arguments .. + LOGICAL BWORK( * ) + REAL RWORK( * ) + COMPLEX A( LDA, * ), VS( LDVS, * ), W( * ), WORK( * ) +* .. +* .. Function Arguments .. + LOGICAL SELECT + EXTERNAL SELECT +* .. +* +* Purpose +* ======= +* +* CGEESX computes for an N-by-N complex nonsymmetric matrix A, the +* eigenvalues, the Schur form T, and, optionally, the matrix of Schur +* vectors Z. This gives the Schur factorization A = Z*T*(Z**H). +* +* Optionally, it also orders the eigenvalues on the diagonal of the +* Schur form so that selected eigenvalues are at the top left; +* computes a reciprocal condition number for the average of the +* selected eigenvalues (RCONDE); and computes a reciprocal condition +* number for the right invariant subspace corresponding to the +* selected eigenvalues (RCONDV). The leading columns of Z form an +* orthonormal basis for this invariant subspace. +* +* For further explanation of the reciprocal condition numbers RCONDE +* and RCONDV, see Section 4.10 of the LAPACK Users' Guide (where +* these quantities are called s and sep respectively). +* +* A complex matrix is in Schur form if it is upper triangular. +* +* Arguments +* ========= +* +* JOBVS (input) CHARACTER*1 +* = 'N': Schur vectors are not computed; +* = 'V': Schur vectors are computed. +* +* SORT (input) CHARACTER*1 +* Specifies whether or not to order the eigenvalues on the +* diagonal of the Schur form. +* = 'N': Eigenvalues are not ordered; +* = 'S': Eigenvalues are ordered (see SELECT). +* +* SELECT (external procedure) LOGICAL FUNCTION of one COMPLEX argument +* SELECT must be declared EXTERNAL in the calling subroutine. +* If SORT = 'S', SELECT is used to select eigenvalues to order +* to the top left of the Schur form. +* If SORT = 'N', SELECT is not referenced. +* An eigenvalue W(j) is selected if SELECT(W(j)) is true. +* +* SENSE (input) CHARACTER*1 +* Determines which reciprocal condition numbers are computed. +* = 'N': None are computed; +* = 'E': Computed for average of selected eigenvalues only; +* = 'V': Computed for selected right invariant subspace only; +* = 'B': Computed for both. +* If SENSE = 'E', 'V' or 'B', SORT must equal 'S'. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA, N) +* On entry, the N-by-N matrix A. +* On exit, A is overwritten by its Schur form T. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* SDIM (output) INTEGER +* If SORT = 'N', SDIM = 0. +* If SORT = 'S', SDIM = number of eigenvalues for which +* SELECT is true. +* +* W (output) COMPLEX array, dimension (N) +* W contains the computed eigenvalues, in the same order +* that they appear on the diagonal of the output Schur form T. +* +* VS (output) COMPLEX array, dimension (LDVS,N) +* If JOBVS = 'V', VS contains the unitary matrix Z of Schur +* vectors. +* If JOBVS = 'N', VS is not referenced. +* +* LDVS (input) INTEGER +* The leading dimension of the array VS. LDVS >= 1, and if +* JOBVS = 'V', LDVS >= N. +* +* RCONDE (output) REAL +* If SENSE = 'E' or 'B', RCONDE contains the reciprocal +* condition number for the average of the selected eigenvalues. +* Not referenced if SENSE = 'N' or 'V'. +* +* RCONDV (output) REAL +* If SENSE = 'V' or 'B', RCONDV contains the reciprocal +* condition number for the selected right invariant subspace. +* Not referenced if SENSE = 'N' or 'E'. +* +* WORK (workspace/output) COMPLEX array, dimension (MAX(1,LWORK)) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,2*N). +* Also, if SENSE = 'E' or 'V' or 'B', LWORK >= 2*SDIM*(N-SDIM), +* where SDIM is the number of selected eigenvalues computed by +* this routine. Note that 2*SDIM*(N-SDIM) <= N*N/2. Note also +* that an error is only returned if LWORK < max(1,2*N), but if +* SENSE = 'E' or 'V' or 'B' this may not be large enough. +* For good performance, LWORK must generally be larger. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates upper bound on the optimal size of the +* array WORK, returns this value as the first entry of the WORK +* array, and no error message related to LWORK is issued by +* XERBLA. +* +* RWORK (workspace) REAL array, dimension (N) +* +* BWORK (workspace) LOGICAL array, dimension (N) +* Not referenced if SORT = 'N'. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = i, and i is +* <= N: the QR algorithm failed to compute all the +* eigenvalues; elements 1:ILO-1 and i+1:N of W +* contain those eigenvalues which have converged; if +* JOBVS = 'V', VS contains the transformation which +* reduces A to its partially converged Schur form. +* = N+1: the eigenvalues could not be reordered because some +* eigenvalues were too close to separate (the problem +* is very ill-conditioned); +* = N+2: after reordering, roundoff changed values of some +* complex eigenvalues so that leading eigenvalues in +* the Schur form no longer satisfy SELECT=.TRUE. This +* could also be caused by underflow due to scaling. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL SCALEA, WANTSB, WANTSE, WANTSN, WANTST, + $ WANTSV, WANTVS + INTEGER HSWORK, I, IBAL, ICOND, IERR, IEVAL, IHI, ILO, + $ ITAU, IWRK, LWRK, MAXWRK, MINWRK + REAL ANRM, BIGNUM, CSCALE, EPS, SMLNUM +* .. +* .. Local Arrays .. + REAL DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CGEBAK, CGEBAL, CGEHRD, CHSEQR, CLACPY, + $ CLASCL, CTRSEN, CUNGHR, SLABAD, SLASCL, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL CLANGE, SLAMCH + EXTERNAL LSAME, ILAENV, CLANGE, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + WANTVS = LSAME( JOBVS, 'V' ) + WANTST = LSAME( SORT, 'S' ) + WANTSN = LSAME( SENSE, 'N' ) + WANTSE = LSAME( SENSE, 'E' ) + WANTSV = LSAME( SENSE, 'V' ) + WANTSB = LSAME( SENSE, 'B' ) + IF( ( .NOT.WANTVS ) .AND. ( .NOT.LSAME( JOBVS, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( WANTSN .OR. WANTSE .OR. WANTSV .OR. WANTSB ) .OR. + $ ( .NOT.WANTST .AND. .NOT.WANTSN ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVS.LT.1 .OR. ( WANTVS .AND. LDVS.LT.N ) ) THEN + INFO = -11 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of real workspace needed at that point in the +* code, as well as the preferred amount for good performance. +* CWorkspace refers to complex workspace, and RWorkspace to real +* workspace. NB refers to the optimal block size for the +* immediately following subroutine, as returned by ILAENV. +* HSWORK refers to the workspace preferred by CHSEQR, as +* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, +* the worst case. +* If SENSE = 'E', 'V' or 'B', then the amount of workspace needed +* depends on SDIM, which is computed by the routine CTRSEN later +* in the code.) +* + IF( INFO.EQ.0 ) THEN + IF( N.EQ.0 ) THEN + MINWRK = 1 + LWRK = 1 + ELSE + MAXWRK = N + N*ILAENV( 1, 'CGEHRD', ' ', N, 1, N, 0 ) + MINWRK = 2*N +* + CALL CHSEQR( 'S', JOBVS, N, 1, N, A, LDA, W, VS, LDVS, + $ WORK, -1, IEVAL ) + HSWORK = WORK( 1 ) +* + IF( .NOT.WANTVS ) THEN + MAXWRK = MAX( MAXWRK, HSWORK ) + ELSE + MAXWRK = MAX( MAXWRK, N + ( N - 1 )*ILAENV( 1, 'CUNGHR', + $ ' ', N, 1, N, -1 ) ) + MAXWRK = MAX( MAXWRK, HSWORK ) + END IF + LWRK = MAXWRK + IF( .NOT.WANTSN ) + $ LWRK = MAX( LWRK, ( N*N )/2 ) + END IF + WORK( 1 ) = LWRK +* + IF( LWORK.LT.MINWRK ) THEN + INFO = -15 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEESX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SDIM = 0 + RETURN + END IF +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = CLANGE( 'M', N, N, A, LDA, DUM ) + SCALEA = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + SCALEA = .TRUE. + CSCALE = SMLNUM + ELSE IF( ANRM.GT.BIGNUM ) THEN + SCALEA = .TRUE. + CSCALE = BIGNUM + END IF + IF( SCALEA ) + $ CALL CLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR ) +* +* +* Permute the matrix to make it more nearly triangular +* (CWorkspace: none) +* (RWorkspace: need N) +* + IBAL = 1 + CALL CGEBAL( 'P', N, A, LDA, ILO, IHI, RWORK( IBAL ), IERR ) +* +* Reduce to upper Hessenberg form +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: none) +* + ITAU = 1 + IWRK = N + ITAU + CALL CGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* + IF( WANTVS ) THEN +* +* Copy Householder vectors to VS +* + CALL CLACPY( 'L', N, N, A, LDA, VS, LDVS ) +* +* Generate unitary matrix in VS +* (CWorkspace: need 2*N-1, prefer N+(N-1)*NB) +* (RWorkspace: none) +* + CALL CUNGHR( N, ILO, IHI, VS, LDVS, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) + END IF +* + SDIM = 0 +* +* Perform QR iteration, accumulating Schur vectors in VS if desired +* (CWorkspace: need 1, prefer HSWORK (see comments) ) +* (RWorkspace: none) +* + IWRK = ITAU + CALL CHSEQR( 'S', JOBVS, N, ILO, IHI, A, LDA, W, VS, LDVS, + $ WORK( IWRK ), LWORK-IWRK+1, IEVAL ) + IF( IEVAL.GT.0 ) + $ INFO = IEVAL +* +* Sort eigenvalues if desired +* + IF( WANTST .AND. INFO.EQ.0 ) THEN + IF( SCALEA ) + $ CALL CLASCL( 'G', 0, 0, CSCALE, ANRM, N, 1, W, N, IERR ) + DO 10 I = 1, N + BWORK( I ) = SELECT( W( I ) ) + 10 CONTINUE +* +* Reorder eigenvalues, transform Schur vectors, and compute +* reciprocal condition numbers +* (CWorkspace: if SENSE is not 'N', need 2*SDIM*(N-SDIM) +* otherwise, need none ) +* (RWorkspace: none) +* + CALL CTRSEN( SENSE, JOBVS, BWORK, N, A, LDA, VS, LDVS, W, SDIM, + $ RCONDE, RCONDV, WORK( IWRK ), LWORK-IWRK+1, + $ ICOND ) + IF( .NOT.WANTSN ) + $ MAXWRK = MAX( MAXWRK, 2*SDIM*( N-SDIM ) ) + IF( ICOND.EQ.-14 ) THEN +* +* Not enough complex workspace +* + INFO = -15 + END IF + END IF +* + IF( WANTVS ) THEN +* +* Undo balancing +* (CWorkspace: none) +* (RWorkspace: need N) +* + CALL CGEBAK( 'P', 'R', N, ILO, IHI, RWORK( IBAL ), N, VS, LDVS, + $ IERR ) + END IF +* + IF( SCALEA ) THEN +* +* Undo scaling for the Schur form of A +* + CALL CLASCL( 'U', 0, 0, CSCALE, ANRM, N, N, A, LDA, IERR ) + CALL CCOPY( N, A, LDA+1, W, 1 ) + IF( ( WANTSV .OR. WANTSB ) .AND. INFO.EQ.0 ) THEN + DUM( 1 ) = RCONDV + CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, 1, 1, DUM, 1, IERR ) + RCONDV = DUM( 1 ) + END IF + END IF +* + WORK( 1 ) = MAXWRK + RETURN +* +* End of CGEESX +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cgeev.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cgeev.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,397 @@ + SUBROUTINE CGEEV( JOBVL, JOBVR, N, A, LDA, W, VL, LDVL, VR, LDVR, + $ WORK, LWORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER JOBVL, JOBVR + INTEGER INFO, LDA, LDVL, LDVR, LWORK, N +* .. +* .. Array Arguments .. + REAL RWORK( * ) + COMPLEX A( LDA, * ), VL( LDVL, * ), VR( LDVR, * ), + $ W( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CGEEV computes for an N-by-N complex nonsymmetric matrix A, the +* eigenvalues and, optionally, the left and/or right eigenvectors. +* +* The right eigenvector v(j) of A satisfies +* A * v(j) = lambda(j) * v(j) +* where lambda(j) is its eigenvalue. +* The left eigenvector u(j) of A satisfies +* u(j)**H * A = lambda(j) * u(j)**H +* where u(j)**H denotes the conjugate transpose of u(j). +* +* The computed eigenvectors are normalized to have Euclidean norm +* equal to 1 and largest component real. +* +* Arguments +* ========= +* +* JOBVL (input) CHARACTER*1 +* = 'N': left eigenvectors of A are not computed; +* = 'V': left eigenvectors of are computed. +* +* JOBVR (input) CHARACTER*1 +* = 'N': right eigenvectors of A are not computed; +* = 'V': right eigenvectors of A are computed. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the N-by-N matrix A. +* On exit, A has been overwritten. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* W (output) COMPLEX array, dimension (N) +* W contains the computed eigenvalues. +* +* VL (output) COMPLEX array, dimension (LDVL,N) +* If JOBVL = 'V', the left eigenvectors u(j) are stored one +* after another in the columns of VL, in the same order +* as their eigenvalues. +* If JOBVL = 'N', VL is not referenced. +* u(j) = VL(:,j), the j-th column of VL. +* +* LDVL (input) INTEGER +* The leading dimension of the array VL. LDVL >= 1; if +* JOBVL = 'V', LDVL >= N. +* +* VR (output) COMPLEX array, dimension (LDVR,N) +* If JOBVR = 'V', the right eigenvectors v(j) are stored one +* after another in the columns of VR, in the same order +* as their eigenvalues. +* If JOBVR = 'N', VR is not referenced. +* v(j) = VR(:,j), the j-th column of VR. +* +* LDVR (input) INTEGER +* The leading dimension of the array VR. LDVR >= 1; if +* JOBVR = 'V', LDVR >= N. +* +* WORK (workspace/output) COMPLEX array, dimension (MAX(1,LWORK)) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,2*N). +* For good performance, LWORK must generally be larger. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace) REAL array, dimension (2*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = i, the QR algorithm failed to compute all the +* eigenvalues, and no eigenvectors have been computed; +* elements and i+1:N of W contain eigenvalues which have +* converged. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, SCALEA, WANTVL, WANTVR + CHARACTER SIDE + INTEGER HSWORK, I, IBAL, IERR, IHI, ILO, IRWORK, ITAU, + $ IWRK, K, MAXWRK, MINWRK, NOUT + REAL ANRM, BIGNUM, CSCALE, EPS, SCL, SMLNUM + COMPLEX TMP +* .. +* .. Local Arrays .. + LOGICAL SELECT( 1 ) + REAL DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL CGEBAK, CGEBAL, CGEHRD, CHSEQR, CLACPY, CLASCL, + $ CSCAL, CSSCAL, CTREVC, CUNGHR, SLABAD, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV, ISAMAX + REAL CLANGE, SCNRM2, SLAMCH + EXTERNAL LSAME, ILAENV, ISAMAX, CLANGE, SCNRM2, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC AIMAG, CMPLX, CONJG, MAX, REAL, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + WANTVL = LSAME( JOBVL, 'V' ) + WANTVR = LSAME( JOBVR, 'V' ) + IF( ( .NOT.WANTVL ) .AND. ( .NOT.LSAME( JOBVL, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( ( .NOT.WANTVR ) .AND. ( .NOT.LSAME( JOBVR, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDVL.LT.1 .OR. ( WANTVL .AND. LDVL.LT.N ) ) THEN + INFO = -8 + ELSE IF( LDVR.LT.1 .OR. ( WANTVR .AND. LDVR.LT.N ) ) THEN + INFO = -10 + END IF + +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* CWorkspace refers to complex workspace, and RWorkspace to real +* workspace. NB refers to the optimal block size for the +* immediately following subroutine, as returned by ILAENV. +* HSWORK refers to the workspace preferred by CHSEQR, as +* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, +* the worst case.) +* + IF( INFO.EQ.0 ) THEN + IF( N.EQ.0 ) THEN + MINWRK = 1 + MAXWRK = 1 + ELSE + MAXWRK = N + N*ILAENV( 1, 'CGEHRD', ' ', N, 1, N, 0 ) + MINWRK = 2*N + IF( WANTVL ) THEN + MAXWRK = MAX( MAXWRK, N + ( N - 1 )*ILAENV( 1, 'CUNGHR', + $ ' ', N, 1, N, -1 ) ) + CALL CHSEQR( 'S', 'V', N, 1, N, A, LDA, W, VL, LDVL, + $ WORK, -1, INFO ) + ELSE IF( WANTVR ) THEN + MAXWRK = MAX( MAXWRK, N + ( N - 1 )*ILAENV( 1, 'CUNGHR', + $ ' ', N, 1, N, -1 ) ) + CALL CHSEQR( 'S', 'V', N, 1, N, A, LDA, W, VR, LDVR, + $ WORK, -1, INFO ) + ELSE + CALL CHSEQR( 'E', 'N', N, 1, N, A, LDA, W, VR, LDVR, + $ WORK, -1, INFO ) + END IF + HSWORK = WORK( 1 ) + MAXWRK = MAX( MAXWRK, HSWORK, MINWRK ) + END IF + WORK( 1 ) = MAXWRK +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEEV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = CLANGE( 'M', N, N, A, LDA, DUM ) + SCALEA = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + SCALEA = .TRUE. + CSCALE = SMLNUM + ELSE IF( ANRM.GT.BIGNUM ) THEN + SCALEA = .TRUE. + CSCALE = BIGNUM + END IF + IF( SCALEA ) + $ CALL CLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR ) +* +* Balance the matrix +* (CWorkspace: none) +* (RWorkspace: need N) +* + IBAL = 1 + CALL CGEBAL( 'B', N, A, LDA, ILO, IHI, RWORK( IBAL ), IERR ) +* +* Reduce to upper Hessenberg form +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: none) +* + ITAU = 1 + IWRK = ITAU + N + CALL CGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* + IF( WANTVL ) THEN +* +* Want left eigenvectors +* Copy Householder vectors to VL +* + SIDE = 'L' + CALL CLACPY( 'L', N, N, A, LDA, VL, LDVL ) +* +* Generate unitary matrix in VL +* (CWorkspace: need 2*N-1, prefer N+(N-1)*NB) +* (RWorkspace: none) +* + CALL CUNGHR( N, ILO, IHI, VL, LDVL, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* +* Perform QR iteration, accumulating Schur vectors in VL +* (CWorkspace: need 1, prefer HSWORK (see comments) ) +* (RWorkspace: none) +* + IWRK = ITAU + CALL CHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, W, VL, LDVL, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) +* + IF( WANTVR ) THEN +* +* Want left and right eigenvectors +* Copy Schur vectors to VR +* + SIDE = 'B' + CALL CLACPY( 'F', N, N, VL, LDVL, VR, LDVR ) + END IF +* + ELSE IF( WANTVR ) THEN +* +* Want right eigenvectors +* Copy Householder vectors to VR +* + SIDE = 'R' + CALL CLACPY( 'L', N, N, A, LDA, VR, LDVR ) +* +* Generate unitary matrix in VR +* (CWorkspace: need 2*N-1, prefer N+(N-1)*NB) +* (RWorkspace: none) +* + CALL CUNGHR( N, ILO, IHI, VR, LDVR, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* +* Perform QR iteration, accumulating Schur vectors in VR +* (CWorkspace: need 1, prefer HSWORK (see comments) ) +* (RWorkspace: none) +* + IWRK = ITAU + CALL CHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, W, VR, LDVR, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) +* + ELSE +* +* Compute eigenvalues only +* (CWorkspace: need 1, prefer HSWORK (see comments) ) +* (RWorkspace: none) +* + IWRK = ITAU + CALL CHSEQR( 'E', 'N', N, ILO, IHI, A, LDA, W, VR, LDVR, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) + END IF +* +* If INFO > 0 from CHSEQR, then quit +* + IF( INFO.GT.0 ) + $ GO TO 50 +* + IF( WANTVL .OR. WANTVR ) THEN +* +* Compute left and/or right eigenvectors +* (CWorkspace: need 2*N) +* (RWorkspace: need 2*N) +* + IRWORK = IBAL + N + CALL CTREVC( SIDE, 'B', SELECT, N, A, LDA, VL, LDVL, VR, LDVR, + $ N, NOUT, WORK( IWRK ), RWORK( IRWORK ), IERR ) + END IF +* + IF( WANTVL ) THEN +* +* Undo balancing of left eigenvectors +* (CWorkspace: none) +* (RWorkspace: need N) +* + CALL CGEBAK( 'B', 'L', N, ILO, IHI, RWORK( IBAL ), N, VL, LDVL, + $ IERR ) +* +* Normalize left eigenvectors and make largest component real +* + DO 20 I = 1, N + SCL = ONE / SCNRM2( N, VL( 1, I ), 1 ) + CALL CSSCAL( N, SCL, VL( 1, I ), 1 ) + DO 10 K = 1, N + RWORK( IRWORK+K-1 ) = REAL( VL( K, I ) )**2 + + $ AIMAG( VL( K, I ) )**2 + 10 CONTINUE + K = ISAMAX( N, RWORK( IRWORK ), 1 ) + TMP = CONJG( VL( K, I ) ) / SQRT( RWORK( IRWORK+K-1 ) ) + CALL CSCAL( N, TMP, VL( 1, I ), 1 ) + VL( K, I ) = CMPLX( REAL( VL( K, I ) ), ZERO ) + 20 CONTINUE + END IF +* + IF( WANTVR ) THEN +* +* Undo balancing of right eigenvectors +* (CWorkspace: none) +* (RWorkspace: need N) +* + CALL CGEBAK( 'B', 'R', N, ILO, IHI, RWORK( IBAL ), N, VR, LDVR, + $ IERR ) +* +* Normalize right eigenvectors and make largest component real +* + DO 40 I = 1, N + SCL = ONE / SCNRM2( N, VR( 1, I ), 1 ) + CALL CSSCAL( N, SCL, VR( 1, I ), 1 ) + DO 30 K = 1, N + RWORK( IRWORK+K-1 ) = REAL( VR( K, I ) )**2 + + $ AIMAG( VR( K, I ) )**2 + 30 CONTINUE + K = ISAMAX( N, RWORK( IRWORK ), 1 ) + TMP = CONJG( VR( K, I ) ) / SQRT( RWORK( IRWORK+K-1 ) ) + CALL CSCAL( N, TMP, VR( 1, I ), 1 ) + VR( K, I ) = CMPLX( REAL( VR( K, I ) ), ZERO ) + 40 CONTINUE + END IF +* +* Undo scaling if necessary +* + 50 CONTINUE + IF( SCALEA ) THEN + CALL CLASCL( 'G', 0, 0, CSCALE, ANRM, N-INFO, 1, W( INFO+1 ), + $ MAX( N-INFO, 1 ), IERR ) + IF( INFO.GT.0 ) THEN + CALL CLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, W, N, IERR ) + END IF + END IF +* + WORK( 1 ) = MAXWRK + RETURN +* +* End of CGEEV +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cgehd2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cgehd2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,148 @@ + SUBROUTINE CGEHD2( N, ILO, IHI, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER IHI, ILO, INFO, LDA, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CGEHD2 reduces a complex general matrix A to upper Hessenberg form H +* by a unitary similarity transformation: Q' * A * Q = H . +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* It is assumed that A is already upper triangular in rows +* and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally +* set by a previous call to CGEBAL; otherwise they should be +* set to 1 and N respectively. See Further Details. +* 1 <= ILO <= IHI <= max(1,N). +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the n by n general matrix to be reduced. +* On exit, the upper triangle and the first subdiagonal of A +* are overwritten with the upper Hessenberg matrix H, and the +* elements below the first subdiagonal, with the array TAU, +* represent the unitary matrix Q as a product of elementary +* reflectors. See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* TAU (output) COMPLEX array, dimension (N-1) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace) COMPLEX array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of (ihi-ilo) elementary +* reflectors +* +* Q = H(ilo) H(ilo+1) . . . H(ihi-1). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on +* exit in A(i+2:ihi,i), and tau in TAU(i). +* +* The contents of A are illustrated by the following example, with +* n = 7, ilo = 2 and ihi = 6: +* +* on entry, on exit, +* +* ( a a a a a a a ) ( a a h h h h a ) +* ( a a a a a a ) ( a h h h h a ) +* ( a a a a a a ) ( h h h h h h ) +* ( a a a a a a ) ( v2 h h h h h ) +* ( a a a a a a ) ( v2 v3 h h h h ) +* ( a a a a a a ) ( v2 v3 v4 h h h ) +* ( a ) ( a ) +* +* where a denotes an element of the original matrix A, h denotes a +* modified element of the upper Hessenberg matrix H, and vi denotes an +* element of the vector defining H(i). +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I + COMPLEX ALPHA +* .. +* .. External Subroutines .. + EXTERNAL CLARF, CLARFG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN + INFO = -2 + ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEHD2', -INFO ) + RETURN + END IF +* + DO 10 I = ILO, IHI - 1 +* +* Compute elementary reflector H(i) to annihilate A(i+2:ihi,i) +* + ALPHA = A( I+1, I ) + CALL CLARFG( IHI-I, ALPHA, A( MIN( I+2, N ), I ), 1, TAU( I ) ) + A( I+1, I ) = ONE +* +* Apply H(i) to A(1:ihi,i+1:ihi) from the right +* + CALL CLARF( 'Right', IHI, IHI-I, A( I+1, I ), 1, TAU( I ), + $ A( 1, I+1 ), LDA, WORK ) +* +* Apply H(i)' to A(i+1:ihi,i+1:n) from the left +* + CALL CLARF( 'Left', IHI-I, N-I, A( I+1, I ), 1, + $ CONJG( TAU( I ) ), A( I+1, I+1 ), LDA, WORK ) +* + A( I+1, I ) = ALPHA + 10 CONTINUE +* + RETURN +* +* End of CGEHD2 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cgehrd.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cgehrd.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,273 @@ + SUBROUTINE CGEHRD( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER IHI, ILO, INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CGEHRD reduces a complex general matrix A to upper Hessenberg form H by +* an unitary similarity transformation: Q' * A * Q = H . +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* It is assumed that A is already upper triangular in rows +* and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally +* set by a previous call to CGEBAL; otherwise they should be +* set to 1 and N respectively. See Further Details. +* 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the N-by-N general matrix to be reduced. +* On exit, the upper triangle and the first subdiagonal of A +* are overwritten with the upper Hessenberg matrix H, and the +* elements below the first subdiagonal, with the array TAU, +* represent the unitary matrix Q as a product of elementary +* reflectors. See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* TAU (output) COMPLEX array, dimension (N-1) +* The scalar factors of the elementary reflectors (see Further +* Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to +* zero. +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of the array WORK. LWORK >= max(1,N). +* For optimum performance LWORK >= N*NB, where NB is the +* optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of (ihi-ilo) elementary +* reflectors +* +* Q = H(ilo) H(ilo+1) . . . H(ihi-1). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on +* exit in A(i+2:ihi,i), and tau in TAU(i). +* +* The contents of A are illustrated by the following example, with +* n = 7, ilo = 2 and ihi = 6: +* +* on entry, on exit, +* +* ( a a a a a a a ) ( a a h h h h a ) +* ( a a a a a a ) ( a h h h h a ) +* ( a a a a a a ) ( h h h h h h ) +* ( a a a a a a ) ( v2 h h h h h ) +* ( a a a a a a ) ( v2 v3 h h h h ) +* ( a a a a a a ) ( v2 v3 v4 h h h ) +* ( a ) ( a ) +* +* where a denotes an element of the original matrix A, h denotes a +* modified element of the upper Hessenberg matrix H, and vi denotes an +* element of the vector defining H(i). +* +* This file is a slight modification of LAPACK-3.0's CGEHRD +* subroutine incorporating improvements proposed by Quintana-Orti and +* Van de Geijn (2005). +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT + PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), + $ ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, J, LDWORK, LWKOPT, NB, + $ NBMIN, NH, NX + COMPLEX EI +* .. +* .. Local Arrays .. + COMPLEX T( LDT, NBMAX ) +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CGEHD2, CGEMM, CLAHR2, CLARFB, CTRMM, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + NB = MIN( NBMAX, ILAENV( 1, 'CGEHRD', ' ', N, ILO, IHI, -1 ) ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN + INFO = -2 + ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEHRD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Set elements 1:ILO-1 and IHI:N-1 of TAU to zero +* + DO 10 I = 1, ILO - 1 + TAU( I ) = ZERO + 10 CONTINUE + DO 20 I = MAX( 1, IHI ), N - 1 + TAU( I ) = ZERO + 20 CONTINUE +* +* Quick return if possible +* + NH = IHI - ILO + 1 + IF( NH.LE.1 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* +* Determine the block size +* + NB = MIN( NBMAX, ILAENV( 1, 'CGEHRD', ' ', N, ILO, IHI, -1 ) ) + NBMIN = 2 + IWS = 1 + IF( NB.GT.1 .AND. NB.LT.NH ) THEN +* +* Determine when to cross over from blocked to unblocked code +* (last block is always handled by unblocked code) +* + NX = MAX( NB, ILAENV( 3, 'CGEHRD', ' ', N, ILO, IHI, -1 ) ) + IF( NX.LT.NH ) THEN +* +* Determine if workspace is large enough for blocked code +* + IWS = N*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: determine the +* minimum value of NB, and reduce NB or force use of +* unblocked code +* + NBMIN = MAX( 2, ILAENV( 2, 'CGEHRD', ' ', N, ILO, IHI, + $ -1 ) ) + IF( LWORK.GE.N*NBMIN ) THEN + NB = LWORK / N + ELSE + NB = 1 + END IF + END IF + END IF + END IF + LDWORK = N +* + IF( NB.LT.NBMIN .OR. NB.GE.NH ) THEN +* +* Use unblocked code below +* + I = ILO +* + ELSE +* +* Use blocked code +* + DO 40 I = ILO, IHI - 1 - NX, NB + IB = MIN( NB, IHI-I ) +* +* Reduce columns i:i+ib-1 to Hessenberg form, returning the +* matrices V and T of the block reflector H = I - V*T*V' +* which performs the reduction, and also the matrix Y = A*V*T +* + CALL CLAHR2( IHI, I, IB, A( 1, I ), LDA, TAU( I ), T, LDT, + $ WORK, LDWORK ) +* +* Apply the block reflector H to A(1:ihi,i+ib:ihi) from the +* right, computing A := A - Y * V'. V(i+ib,ib-1) must be set +* to 1 +* + EI = A( I+IB, I+IB-1 ) + A( I+IB, I+IB-1 ) = ONE + CALL CGEMM( 'No transpose', 'Conjugate transpose', + $ IHI, IHI-I-IB+1, + $ IB, -ONE, WORK, LDWORK, A( I+IB, I ), LDA, ONE, + $ A( 1, I+IB ), LDA ) + A( I+IB, I+IB-1 ) = EI +* +* Apply the block reflector H to A(1:i,i+1:i+ib-1) from the +* right +* + CALL CTRMM( 'Right', 'Lower', 'Conjugate transpose', + $ 'Unit', I, IB-1, + $ ONE, A( I+1, I ), LDA, WORK, LDWORK ) + DO 30 J = 0, IB-2 + CALL CAXPY( I, -ONE, WORK( LDWORK*J+1 ), 1, + $ A( 1, I+J+1 ), 1 ) + 30 CONTINUE +* +* Apply the block reflector H to A(i+1:ihi,i+ib:n) from the +* left +* + CALL CLARFB( 'Left', 'Conjugate transpose', 'Forward', + $ 'Columnwise', + $ IHI-I, N-I-IB+1, IB, A( I+1, I ), LDA, T, LDT, + $ A( I+1, I+IB ), LDA, WORK, LDWORK ) + 40 CONTINUE + END IF +* +* Use unblocked code to reduce the rest of the matrix +* + CALL CGEHD2( N, I, IHI, A, LDA, TAU, WORK, IINFO ) + WORK( 1 ) = IWS +* + RETURN +* +* End of CGEHRD +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cgelq2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cgelq2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,123 @@ + SUBROUTINE CGELQ2( M, N, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CGELQ2 computes an LQ factorization of a complex m by n matrix A: +* A = L * Q. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the m by n matrix A. +* On exit, the elements on and below the diagonal of the array +* contain the m by min(m,n) lower trapezoidal matrix L (L is +* lower triangular if m <= n); the elements above the diagonal, +* with the array TAU, represent the unitary matrix Q as a +* product of elementary reflectors (see Further Details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) COMPLEX array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace) COMPLEX array, dimension (M) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(k)' . . . H(2)' H(1)', where k = min(m,n). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in +* A(i,i+1:n), and tau in TAU(i). +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, K + COMPLEX ALPHA +* .. +* .. External Subroutines .. + EXTERNAL CLACGV, CLARF, CLARFG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGELQ2', -INFO ) + RETURN + END IF +* + K = MIN( M, N ) +* + DO 10 I = 1, K +* +* Generate elementary reflector H(i) to annihilate A(i,i+1:n) +* + CALL CLACGV( N-I+1, A( I, I ), LDA ) + ALPHA = A( I, I ) + CALL CLARFG( N-I+1, ALPHA, A( I, MIN( I+1, N ) ), LDA, + $ TAU( I ) ) + IF( I.LT.M ) THEN +* +* Apply H(i) to A(i+1:m,i:n) from the right +* + A( I, I ) = ONE + CALL CLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, TAU( I ), + $ A( I+1, I ), LDA, WORK ) + END IF + A( I, I ) = ALPHA + CALL CLACGV( N-I+1, A( I, I ), LDA ) + 10 CONTINUE + RETURN +* +* End of CGELQ2 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cgelqf.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cgelqf.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,195 @@ + SUBROUTINE CGELQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CGELQF computes an LQ factorization of a complex M-by-N matrix A: +* A = L * Q. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, the elements on and below the diagonal of the array +* contain the m-by-min(m,n) lower trapezoidal matrix L (L is +* lower triangular if m <= n); the elements above the diagonal, +* with the array TAU, represent the unitary matrix Q as a +* product of elementary reflectors (see Further Details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) COMPLEX array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace/output) COMPLEX array, dimension (MAX(1,LWORK)) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,M). +* For optimum performance LWORK >= M*NB, where NB is the +* optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(k)' . . . H(2)' H(1)', where k = min(m,n). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in +* A(i,i+1:n), and tau in TAU(i). +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, K, LDWORK, LWKOPT, NB, + $ NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL CGELQ2, CLARFB, CLARFT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 ) + LWKOPT = M*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGELQF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + K = MIN( M, N ) + IF( K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = M + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'CGELQF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = M + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'CGELQF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code initially +* + DO 10 I = 1, K - NX, NB + IB = MIN( K-I+1, NB ) +* +* Compute the LQ factorization of the current block +* A(i:i+ib-1,i:n) +* + CALL CGELQ2( IB, N-I+1, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) + IF( I+IB.LE.M ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL CLARFT( 'Forward', 'Rowwise', N-I+1, IB, A( I, I ), + $ LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(i+ib:m,i:n) from the right +* + CALL CLARFB( 'Right', 'No transpose', 'Forward', + $ 'Rowwise', M-I-IB+1, N-I+1, IB, A( I, I ), + $ LDA, WORK, LDWORK, A( I+IB, I ), LDA, + $ WORK( IB+1 ), LDWORK ) + END IF + 10 CONTINUE + ELSE + I = 1 + END IF +* +* Use unblocked code to factor the last or only block. +* + IF( I.LE.K ) + $ CALL CGELQ2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) +* + WORK( 1 ) = IWS + RETURN +* +* End of CGELQF +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cgelsd.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cgelsd.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,567 @@ + SUBROUTINE CGELSD( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK, + $ WORK, LWORK, RWORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL RWORK( * ), S( * ) + COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CGELSD computes the minimum-norm solution to a real linear least +* squares problem: +* minimize 2-norm(| b - A*x |) +* using the singular value decomposition (SVD) of A. A is an M-by-N +* matrix which may be rank-deficient. +* +* Several right hand side vectors b and solution vectors x can be +* handled in a single call; they are stored as the columns of the +* M-by-NRHS right hand side matrix B and the N-by-NRHS solution +* matrix X. +* +* The problem is solved in three steps: +* (1) Reduce the coefficient matrix A to bidiagonal form with +* Householder tranformations, reducing the original problem +* into a "bidiagonal least squares problem" (BLS) +* (2) Solve the BLS using a divide and conquer approach. +* (3) Apply back all the Householder tranformations to solve +* the original least squares problem. +* +* The effective rank of A is determined by treating as zero those +* singular values which are less than RCOND times the largest singular +* value. +* +* The divide and conquer algorithm makes very mild assumptions about +* floating point arithmetic. It will work on machines with a guard +* digit in add/subtract, or on those binary machines without guard +* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +* Cray-2. It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, A has been destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (input/output) COMPLEX array, dimension (LDB,NRHS) +* On entry, the M-by-NRHS right hand side matrix B. +* On exit, B is overwritten by the N-by-NRHS solution matrix X. +* If m >= n and RANK = n, the residual sum-of-squares for +* the solution in the i-th column is given by the sum of +* squares of the modulus of elements n+1:m in that column. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,M,N). +* +* S (output) REAL array, dimension (min(M,N)) +* The singular values of A in decreasing order. +* The condition number of A in the 2-norm = S(1)/S(min(m,n)). +* +* RCOND (input) REAL +* RCOND is used to determine the effective rank of A. +* Singular values S(i) <= RCOND*S(1) are treated as zero. +* If RCOND < 0, machine precision is used instead. +* +* RANK (output) INTEGER +* The effective rank of A, i.e., the number of singular values +* which are greater than RCOND*S(1). +* +* WORK (workspace/output) COMPLEX array, dimension (MAX(1,LWORK)) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK must be at least 1. +* The exact minimum amount of workspace needed depends on M, +* N and NRHS. As long as LWORK is at least +* 2 * N + N * NRHS +* if M is greater than or equal to N or +* 2 * M + M * NRHS +* if M is less than N, the code will execute correctly. +* For good performance, LWORK should generally be larger. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the array WORK and the +* minimum sizes of the arrays RWORK and IWORK, and returns +* these values as the first entries of the WORK, RWORK and +* IWORK arrays, and no error message related to LWORK is issued +* by XERBLA. +* +* RWORK (workspace) REAL array, dimension (MAX(1,LRWORK)) +* LRWORK >= +* 10*N + 2*N*SMLSIZ + 8*N*NLVL + 3*SMLSIZ*NRHS + +* (SMLSIZ+1)**2 +* if M is greater than or equal to N or +* 10*M + 2*M*SMLSIZ + 8*M*NLVL + 3*SMLSIZ*NRHS + +* (SMLSIZ+1)**2 +* if M is less than N, the code will execute correctly. +* SMLSIZ is returned by ILAENV and is equal to the maximum +* size of the subproblems at the bottom of the computation +* tree (usually about 25), and +* NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 ) +* On exit, if INFO = 0, RWORK(1) returns the minimum LRWORK. +* +* IWORK (workspace) INTEGER array, dimension (MAX(1,LIWORK)) +* LIWORK >= max(1, 3*MINMN*NLVL + 11*MINMN), +* where MINMN = MIN( M,N ). +* On exit, if INFO = 0, IWORK(1) returns the minimum LIWORK. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: the algorithm for computing the SVD failed to converge; +* if INFO = i, i off-diagonal elements of an intermediate +* bidiagonal form did not converge to zero. +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Ren-Cang Li, Computer Science Division, University of +* California at Berkeley, USA +* Osni Marques, LBNL/NERSC, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0 ) + COMPLEX CZERO + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER IASCL, IBSCL, IE, IL, ITAU, ITAUP, ITAUQ, + $ LDWORK, LIWORK, LRWORK, MAXMN, MAXWRK, MINMN, + $ MINWRK, MM, MNTHR, NLVL, NRWORK, NWORK, SMLSIZ + REAL ANRM, BIGNUM, BNRM, EPS, SFMIN, SMLNUM +* .. +* .. External Subroutines .. + EXTERNAL CGEBRD, CGELQF, CGEQRF, CLACPY, + $ CLALSD, CLASCL, CLASET, CUNMBR, + $ CUNMLQ, CUNMQR, SLABAD, SLASCL, + $ SLASET, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + REAL CLANGE, SLAMCH + EXTERNAL CLANGE, SLAMCH, ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, LOG, MAX, MIN, REAL +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + MINMN = MIN( M, N ) + MAXMN = MAX( M, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, MAXMN ) ) THEN + INFO = -7 + END IF +* +* Compute workspace. +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV.) +* + IF( INFO.EQ.0 ) THEN + MINWRK = 1 + MAXWRK = 1 + LIWORK = 1 + LRWORK = 1 + IF( MINMN.GT.0 ) THEN + SMLSIZ = ILAENV( 9, 'CGELSD', ' ', 0, 0, 0, 0 ) + MNTHR = ILAENV( 6, 'CGELSD', ' ', M, N, NRHS, -1 ) + NLVL = MAX( INT( LOG( REAL( MINMN ) / REAL( SMLSIZ + 1 ) ) / + $ LOG( TWO ) ) + 1, 0 ) + LIWORK = 3*MINMN*NLVL + 11*MINMN + MM = M + IF( M.GE.N .AND. M.GE.MNTHR ) THEN +* +* Path 1a - overdetermined, with many more rows than +* columns. +* + MM = N + MAXWRK = MAX( MAXWRK, N*ILAENV( 1, 'CGEQRF', ' ', M, N, + $ -1, -1 ) ) + MAXWRK = MAX( MAXWRK, NRHS*ILAENV( 1, 'CUNMQR', 'LC', M, + $ NRHS, N, -1 ) ) + END IF + IF( M.GE.N ) THEN +* +* Path 1 - overdetermined or exactly determined. +* + LRWORK = 10*N + 2*N*SMLSIZ + 8*N*NLVL + 3*SMLSIZ*NRHS + + $ ( SMLSIZ + 1 )**2 + MAXWRK = MAX( MAXWRK, 2*N + ( MM + N )*ILAENV( 1, + $ 'CGEBRD', ' ', MM, N, -1, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*N + NRHS*ILAENV( 1, 'CUNMBR', + $ 'QLC', MM, NRHS, N, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*N + ( N - 1 )*ILAENV( 1, + $ 'CUNMBR', 'PLN', N, NRHS, N, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*N + N*NRHS ) + MINWRK = MAX( 2*N + MM, 2*N + N*NRHS ) + END IF + IF( N.GT.M ) THEN + LRWORK = 10*M + 2*M*SMLSIZ + 8*M*NLVL + 3*SMLSIZ*NRHS + + $ ( SMLSIZ + 1 )**2 + IF( N.GE.MNTHR ) THEN +* +* Path 2a - underdetermined, with many more columns +* than rows. +* + MAXWRK = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, + $ -1 ) + MAXWRK = MAX( MAXWRK, M*M + 4*M + 2*M*ILAENV( 1, + $ 'CGEBRD', ' ', M, M, -1, -1 ) ) + MAXWRK = MAX( MAXWRK, M*M + 4*M + NRHS*ILAENV( 1, + $ 'CUNMBR', 'QLC', M, NRHS, M, -1 ) ) + MAXWRK = MAX( MAXWRK, M*M + 4*M + ( M - 1 )*ILAENV( 1, + $ 'CUNMLQ', 'LC', N, NRHS, M, -1 ) ) + IF( NRHS.GT.1 ) THEN + MAXWRK = MAX( MAXWRK, M*M + M + M*NRHS ) + ELSE + MAXWRK = MAX( MAXWRK, M*M + 2*M ) + END IF + MAXWRK = MAX( MAXWRK, M*M + 4*M + M*NRHS ) + ELSE +* +* Path 2 - underdetermined. +* + MAXWRK = 2*M + ( N + M )*ILAENV( 1, 'CGEBRD', ' ', M, + $ N, -1, -1 ) + MAXWRK = MAX( MAXWRK, 2*M + NRHS*ILAENV( 1, 'CUNMBR', + $ 'QLC', M, NRHS, M, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*M + M*ILAENV( 1, 'CUNMBR', + $ 'PLN', N, NRHS, M, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*M + M*NRHS ) + END IF + MINWRK = MAX( 2*M + N, 2*M + M*NRHS ) + END IF + END IF + MINWRK = MIN( MINWRK, MAXWRK ) + WORK( 1 ) = MAXWRK + IWORK( 1 ) = LIWORK + RWORK( 1 ) = LRWORK +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGELSD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible. +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + RANK = 0 + RETURN + END IF +* +* Get machine parameters. +* + EPS = SLAMCH( 'P' ) + SFMIN = SLAMCH( 'S' ) + SMLNUM = SFMIN / EPS + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* +* Scale A if max entry outside range [SMLNUM,BIGNUM]. +* + ANRM = CLANGE( 'M', M, N, A, LDA, RWORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM. +* + CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL CLASET( 'F', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB ) + CALL SLASET( 'F', MINMN, 1, ZERO, ZERO, S, 1 ) + RANK = 0 + GO TO 10 + END IF +* +* Scale B if max entry outside range [SMLNUM,BIGNUM]. +* + BNRM = CLANGE( 'M', M, NRHS, B, LDB, RWORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM. +* + CALL CLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM. +* + CALL CLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 2 + END IF +* +* If M < N make sure B(M+1:N,:) = 0 +* + IF( M.LT.N ) + $ CALL CLASET( 'F', N-M, NRHS, CZERO, CZERO, B( M+1, 1 ), LDB ) +* +* Overdetermined case. +* + IF( M.GE.N ) THEN +* +* Path 1 - overdetermined or exactly determined. +* + MM = M + IF( M.GE.MNTHR ) THEN +* +* Path 1a - overdetermined, with many more rows than columns +* + MM = N + ITAU = 1 + NWORK = ITAU + N +* +* Compute A=Q*R. +* (RWorkspace: need N) +* (CWorkspace: need N, prefer N*NB) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, INFO ) +* +* Multiply B by transpose(Q). +* (RWorkspace: need N) +* (CWorkspace: need NRHS, prefer NRHS*NB) +* + CALL CUNMQR( 'L', 'C', M, NRHS, N, A, LDA, WORK( ITAU ), B, + $ LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* +* Zero out below R. +* + IF( N.GT.1 ) THEN + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, A( 2, 1 ), + $ LDA ) + END IF + END IF +* + ITAUQ = 1 + ITAUP = ITAUQ + N + NWORK = ITAUP + N + IE = 1 + NRWORK = IE + N +* +* Bidiagonalize R in A. +* (RWorkspace: need N) +* (CWorkspace: need 2*N+MM, prefer 2*N+(MM+N)*NB) +* + CALL CGEBRD( MM, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors of R. +* (CWorkspace: need 2*N+NRHS, prefer 2*N+NRHS*NB) +* + CALL CUNMBR( 'Q', 'L', 'C', MM, NRHS, N, A, LDA, WORK( ITAUQ ), + $ B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* +* Solve the bidiagonal least squares problem. +* + CALL CLALSD( 'U', SMLSIZ, N, NRHS, S, RWORK( IE ), B, LDB, + $ RCOND, RANK, WORK( NWORK ), RWORK( NRWORK ), + $ IWORK, INFO ) + IF( INFO.NE.0 ) THEN + GO TO 10 + END IF +* +* Multiply B by right bidiagonalizing vectors of R. +* + CALL CUNMBR( 'P', 'L', 'N', N, NRHS, N, A, LDA, WORK( ITAUP ), + $ B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* + ELSE IF( N.GE.MNTHR .AND. LWORK.GE.4*M+M*M+ + $ MAX( M, 2*M-4, NRHS, N-3*M ) ) THEN +* +* Path 2a - underdetermined, with many more columns than rows +* and sufficient workspace for an efficient algorithm. +* + LDWORK = M + IF( LWORK.GE.MAX( 4*M+M*LDA+MAX( M, 2*M-4, NRHS, N-3*M ), + $ M*LDA+M+M*NRHS ) )LDWORK = LDA + ITAU = 1 + NWORK = M + 1 +* +* Compute A=L*Q. +* (CWorkspace: need 2*M, prefer M+M*NB) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, INFO ) + IL = NWORK +* +* Copy L to WORK(IL), zeroing out above its diagonal. +* + CALL CLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWORK ) + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, WORK( IL+LDWORK ), + $ LDWORK ) + ITAUQ = IL + LDWORK*M + ITAUP = ITAUQ + M + NWORK = ITAUP + M + IE = 1 + NRWORK = IE + M +* +* Bidiagonalize L in WORK(IL). +* (RWorkspace: need M) +* (CWorkspace: need M*M+4*M, prefer M*M+4*M+2*M*NB) +* + CALL CGEBRD( M, M, WORK( IL ), LDWORK, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ), + $ LWORK-NWORK+1, INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors of L. +* (CWorkspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB) +* + CALL CUNMBR( 'Q', 'L', 'C', M, NRHS, M, WORK( IL ), LDWORK, + $ WORK( ITAUQ ), B, LDB, WORK( NWORK ), + $ LWORK-NWORK+1, INFO ) +* +* Solve the bidiagonal least squares problem. +* + CALL CLALSD( 'U', SMLSIZ, M, NRHS, S, RWORK( IE ), B, LDB, + $ RCOND, RANK, WORK( NWORK ), RWORK( NRWORK ), + $ IWORK, INFO ) + IF( INFO.NE.0 ) THEN + GO TO 10 + END IF +* +* Multiply B by right bidiagonalizing vectors of L. +* + CALL CUNMBR( 'P', 'L', 'N', M, NRHS, M, WORK( IL ), LDWORK, + $ WORK( ITAUP ), B, LDB, WORK( NWORK ), + $ LWORK-NWORK+1, INFO ) +* +* Zero out below first M rows of B. +* + CALL CLASET( 'F', N-M, NRHS, CZERO, CZERO, B( M+1, 1 ), LDB ) + NWORK = ITAU + M +* +* Multiply transpose(Q) by B. +* (CWorkspace: need NRHS, prefer NRHS*NB) +* + CALL CUNMLQ( 'L', 'C', N, NRHS, M, A, LDA, WORK( ITAU ), B, + $ LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* + ELSE +* +* Path 2 - remaining underdetermined cases. +* + ITAUQ = 1 + ITAUP = ITAUQ + M + NWORK = ITAUP + M + IE = 1 + NRWORK = IE + M +* +* Bidiagonalize A. +* (RWorkspace: need M) +* (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB) +* + CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors. +* (CWorkspace: need 2*M+NRHS, prefer 2*M+NRHS*NB) +* + CALL CUNMBR( 'Q', 'L', 'C', M, NRHS, N, A, LDA, WORK( ITAUQ ), + $ B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* +* Solve the bidiagonal least squares problem. +* + CALL CLALSD( 'L', SMLSIZ, M, NRHS, S, RWORK( IE ), B, LDB, + $ RCOND, RANK, WORK( NWORK ), RWORK( NRWORK ), + $ IWORK, INFO ) + IF( INFO.NE.0 ) THEN + GO TO 10 + END IF +* +* Multiply B by right bidiagonalizing vectors of A. +* + CALL CUNMBR( 'P', 'L', 'N', N, NRHS, M, A, LDA, WORK( ITAUP ), + $ B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* + END IF +* +* Undo scaling. +* + IF( IASCL.EQ.1 ) THEN + CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO ) + CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO ) + CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL CLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL CLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO ) + END IF +* + 10 CONTINUE + WORK( 1 ) = MAXWRK + IWORK( 1 ) = LIWORK + RWORK( 1 ) = LRWORK + RETURN +* +* End of CGELSD +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cgelss.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cgelss.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,634 @@ + SUBROUTINE CGELSS( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK, + $ WORK, LWORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK + REAL RCOND +* .. +* .. Array Arguments .. + REAL RWORK( * ), S( * ) + COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CGELSS computes the minimum norm solution to a complex linear +* least squares problem: +* +* Minimize 2-norm(| b - A*x |). +* +* using the singular value decomposition (SVD) of A. A is an M-by-N +* matrix which may be rank-deficient. +* +* Several right hand side vectors b and solution vectors x can be +* handled in a single call; they are stored as the columns of the +* M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix +* X. +* +* The effective rank of A is determined by treating as zero those +* singular values which are less than RCOND times the largest singular +* value. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, the first min(m,n) rows of A are overwritten with +* its right singular vectors, stored rowwise. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (input/output) COMPLEX array, dimension (LDB,NRHS) +* On entry, the M-by-NRHS right hand side matrix B. +* On exit, B is overwritten by the N-by-NRHS solution matrix X. +* If m >= n and RANK = n, the residual sum-of-squares for +* the solution in the i-th column is given by the sum of +* squares of the modulus of elements n+1:m in that column. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,M,N). +* +* S (output) REAL array, dimension (min(M,N)) +* The singular values of A in decreasing order. +* The condition number of A in the 2-norm = S(1)/S(min(m,n)). +* +* RCOND (input) REAL +* RCOND is used to determine the effective rank of A. +* Singular values S(i) <= RCOND*S(1) are treated as zero. +* If RCOND < 0, machine precision is used instead. +* +* RANK (output) INTEGER +* The effective rank of A, i.e., the number of singular values +* which are greater than RCOND*S(1). +* +* WORK (workspace/output) COMPLEX array, dimension (MAX(1,LWORK)) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= 1, and also: +* LWORK >= 2*min(M,N) + max(M,N,NRHS) +* For good performance, LWORK should generally be larger. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace) REAL array, dimension (5*min(M,N)) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: the algorithm for computing the SVD failed to converge; +* if INFO = i, i off-diagonal elements of an intermediate +* bidiagonal form did not converge to zero. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), + $ CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER BL, CHUNK, I, IASCL, IBSCL, IE, IL, IRWORK, + $ ITAU, ITAUP, ITAUQ, IWORK, LDWORK, MAXMN, + $ MAXWRK, MINMN, MINWRK, MM, MNTHR + REAL ANRM, BIGNUM, BNRM, EPS, SFMIN, SMLNUM, THR +* .. +* .. Local Arrays .. + COMPLEX VDUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL CBDSQR, CCOPY, CGEBRD, CGELQF, CGEMM, CGEMV, + $ CGEQRF, CLACPY, CLASCL, CLASET, CSRSCL, CUNGBR, + $ CUNMBR, CUNMLQ, CUNMQR, SLABAD, SLASCL, SLASET, + $ XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + REAL CLANGE, SLAMCH + EXTERNAL ILAENV, CLANGE, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + MINMN = MIN( M, N ) + MAXMN = MAX( M, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, MAXMN ) ) THEN + INFO = -7 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* CWorkspace refers to complex workspace, and RWorkspace refers +* to real workspace. NB refers to the optimal block size for the +* immediately following subroutine, as returned by ILAENV.) +* + IF( INFO.EQ.0 ) THEN + MINWRK = 1 + MAXWRK = 1 + IF( MINMN.GT.0 ) THEN + MM = M + MNTHR = ILAENV( 6, 'CGELSS', ' ', M, N, NRHS, -1 ) + IF( M.GE.N .AND. M.GE.MNTHR ) THEN +* +* Path 1a - overdetermined, with many more rows than +* columns +* + MM = N + MAXWRK = MAX( MAXWRK, N + N*ILAENV( 1, 'CGEQRF', ' ', M, + $ N, -1, -1 ) ) + MAXWRK = MAX( MAXWRK, N + NRHS*ILAENV( 1, 'CUNMQR', 'LC', + $ M, NRHS, N, -1 ) ) + END IF + IF( M.GE.N ) THEN +* +* Path 1 - overdetermined or exactly determined +* + MAXWRK = MAX( MAXWRK, 2*N + ( MM + N )*ILAENV( 1, + $ 'CGEBRD', ' ', MM, N, -1, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*N + NRHS*ILAENV( 1, 'CUNMBR', + $ 'QLC', MM, NRHS, N, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*N + ( N - 1 )*ILAENV( 1, + $ 'CUNGBR', 'P', N, N, N, -1 ) ) + MAXWRK = MAX( MAXWRK, N*NRHS ) + MINWRK = 2*N + MAX( NRHS, M ) + END IF + IF( N.GT.M ) THEN + MINWRK = 2*M + MAX( NRHS, N ) + IF( N.GE.MNTHR ) THEN +* +* Path 2a - underdetermined, with many more columns +* than rows +* + MAXWRK = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, + $ -1 ) + MAXWRK = MAX( MAXWRK, 3*M + M*M + 2*M*ILAENV( 1, + $ 'CGEBRD', ' ', M, M, -1, -1 ) ) + MAXWRK = MAX( MAXWRK, 3*M + M*M + NRHS*ILAENV( 1, + $ 'CUNMBR', 'QLC', M, NRHS, M, -1 ) ) + MAXWRK = MAX( MAXWRK, 3*M + M*M + ( M - 1 )*ILAENV( 1, + $ 'CUNGBR', 'P', M, M, M, -1 ) ) + IF( NRHS.GT.1 ) THEN + MAXWRK = MAX( MAXWRK, M*M + M + M*NRHS ) + ELSE + MAXWRK = MAX( MAXWRK, M*M + 2*M ) + END IF + MAXWRK = MAX( MAXWRK, M + NRHS*ILAENV( 1, 'CUNMLQ', + $ 'LC', N, NRHS, M, -1 ) ) + ELSE +* +* Path 2 - underdetermined +* + MAXWRK = 2*M + ( N + M )*ILAENV( 1, 'CGEBRD', ' ', M, + $ N, -1, -1 ) + MAXWRK = MAX( MAXWRK, 2*M + NRHS*ILAENV( 1, 'CUNMBR', + $ 'QLC', M, NRHS, M, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*M + M*ILAENV( 1, 'CUNGBR', + $ 'P', M, N, M, -1 ) ) + MAXWRK = MAX( MAXWRK, N*NRHS ) + END IF + END IF + MAXWRK = MAX( MINWRK, MAXWRK ) + END IF + WORK( 1 ) = MAXWRK +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) + $ INFO = -12 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGELSS', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + RANK = 0 + RETURN + END IF +* +* Get machine parameters +* + EPS = SLAMCH( 'P' ) + SFMIN = SLAMCH( 'S' ) + SMLNUM = SFMIN / EPS + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = CLANGE( 'M', M, N, A, LDA, RWORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL CLASET( 'F', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB ) + CALL SLASET( 'F', MINMN, 1, ZERO, ZERO, S, MINMN ) + RANK = 0 + GO TO 70 + END IF +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = CLANGE( 'M', M, NRHS, B, LDB, RWORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL CLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL CLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 2 + END IF +* +* Overdetermined case +* + IF( M.GE.N ) THEN +* +* Path 1 - overdetermined or exactly determined +* + MM = M + IF( M.GE.MNTHR ) THEN +* +* Path 1a - overdetermined, with many more rows than columns +* + MM = N + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: none) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ), + $ LWORK-IWORK+1, INFO ) +* +* Multiply B by transpose(Q) +* (CWorkspace: need N+NRHS, prefer N+NRHS*NB) +* (RWorkspace: none) +* + CALL CUNMQR( 'L', 'C', M, NRHS, N, A, LDA, WORK( ITAU ), B, + $ LDB, WORK( IWORK ), LWORK-IWORK+1, INFO ) +* +* Zero out below R +* + IF( N.GT.1 ) + $ CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, A( 2, 1 ), + $ LDA ) + END IF +* + IE = 1 + ITAUQ = 1 + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in A +* (CWorkspace: need 2*N+MM, prefer 2*N+(MM+N)*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( MM, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors of R +* (CWorkspace: need 2*N+NRHS, prefer 2*N+NRHS*NB) +* (RWorkspace: none) +* + CALL CUNMBR( 'Q', 'L', 'C', MM, NRHS, N, A, LDA, WORK( ITAUQ ), + $ B, LDB, WORK( IWORK ), LWORK-IWORK+1, INFO ) +* +* Generate right bidiagonalizing vectors of R in A +* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* (RWorkspace: none) +* + CALL CUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, INFO ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration +* multiply B by transpose of left singular vectors +* compute right singular vectors in A +* (CWorkspace: none) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, N, 0, NRHS, S, RWORK( IE ), A, LDA, VDUM, + $ 1, B, LDB, RWORK( IRWORK ), INFO ) + IF( INFO.NE.0 ) + $ GO TO 70 +* +* Multiply B by reciprocals of singular values +* + THR = MAX( RCOND*S( 1 ), SFMIN ) + IF( RCOND.LT.ZERO ) + $ THR = MAX( EPS*S( 1 ), SFMIN ) + RANK = 0 + DO 10 I = 1, N + IF( S( I ).GT.THR ) THEN + CALL CSRSCL( NRHS, S( I ), B( I, 1 ), LDB ) + RANK = RANK + 1 + ELSE + CALL CLASET( 'F', 1, NRHS, CZERO, CZERO, B( I, 1 ), LDB ) + END IF + 10 CONTINUE +* +* Multiply B by right singular vectors +* (CWorkspace: need N, prefer N*NRHS) +* (RWorkspace: none) +* + IF( LWORK.GE.LDB*NRHS .AND. NRHS.GT.1 ) THEN + CALL CGEMM( 'C', 'N', N, NRHS, N, CONE, A, LDA, B, LDB, + $ CZERO, WORK, LDB ) + CALL CLACPY( 'G', N, NRHS, WORK, LDB, B, LDB ) + ELSE IF( NRHS.GT.1 ) THEN + CHUNK = LWORK / N + DO 20 I = 1, NRHS, CHUNK + BL = MIN( NRHS-I+1, CHUNK ) + CALL CGEMM( 'C', 'N', N, BL, N, CONE, A, LDA, B( 1, I ), + $ LDB, CZERO, WORK, N ) + CALL CLACPY( 'G', N, BL, WORK, N, B( 1, I ), LDB ) + 20 CONTINUE + ELSE + CALL CGEMV( 'C', N, N, CONE, A, LDA, B, 1, CZERO, WORK, 1 ) + CALL CCOPY( N, WORK, 1, B, 1 ) + END IF +* + ELSE IF( N.GE.MNTHR .AND. LWORK.GE.3*M+M*M+MAX( M, NRHS, N-2*M ) ) + $ THEN +* +* Underdetermined case, M much less than N +* +* Path 2a - underdetermined, with many more columns than rows +* and sufficient workspace for an efficient algorithm +* + LDWORK = M + IF( LWORK.GE.3*M+M*LDA+MAX( M, NRHS, N-2*M ) ) + $ LDWORK = LDA + ITAU = 1 + IWORK = M + 1 +* +* Compute A=L*Q +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: none) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ), + $ LWORK-IWORK+1, INFO ) + IL = IWORK +* +* Copy L to WORK(IL), zeroing out above it +* + CALL CLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWORK ) + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, WORK( IL+LDWORK ), + $ LDWORK ) + IE = 1 + ITAUQ = IL + LDWORK*M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IL) +* (CWorkspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, WORK( IL ), LDWORK, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors of L +* (CWorkspace: need M*M+3*M+NRHS, prefer M*M+3*M+NRHS*NB) +* (RWorkspace: none) +* + CALL CUNMBR( 'Q', 'L', 'C', M, NRHS, M, WORK( IL ), LDWORK, + $ WORK( ITAUQ ), B, LDB, WORK( IWORK ), + $ LWORK-IWORK+1, INFO ) +* +* Generate right bidiagonalizing vectors of R in WORK(IL) +* (CWorkspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB) +* (RWorkspace: none) +* + CALL CUNGBR( 'P', M, M, M, WORK( IL ), LDWORK, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, INFO ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right singular +* vectors of L in WORK(IL) and multiplying B by transpose of +* left singular vectors +* (CWorkspace: need M*M) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', M, M, 0, NRHS, S, RWORK( IE ), WORK( IL ), + $ LDWORK, A, LDA, B, LDB, RWORK( IRWORK ), INFO ) + IF( INFO.NE.0 ) + $ GO TO 70 +* +* Multiply B by reciprocals of singular values +* + THR = MAX( RCOND*S( 1 ), SFMIN ) + IF( RCOND.LT.ZERO ) + $ THR = MAX( EPS*S( 1 ), SFMIN ) + RANK = 0 + DO 30 I = 1, M + IF( S( I ).GT.THR ) THEN + CALL CSRSCL( NRHS, S( I ), B( I, 1 ), LDB ) + RANK = RANK + 1 + ELSE + CALL CLASET( 'F', 1, NRHS, CZERO, CZERO, B( I, 1 ), LDB ) + END IF + 30 CONTINUE + IWORK = IL + M*LDWORK +* +* Multiply B by right singular vectors of L in WORK(IL) +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NRHS) +* (RWorkspace: none) +* + IF( LWORK.GE.LDB*NRHS+IWORK-1 .AND. NRHS.GT.1 ) THEN + CALL CGEMM( 'C', 'N', M, NRHS, M, CONE, WORK( IL ), LDWORK, + $ B, LDB, CZERO, WORK( IWORK ), LDB ) + CALL CLACPY( 'G', M, NRHS, WORK( IWORK ), LDB, B, LDB ) + ELSE IF( NRHS.GT.1 ) THEN + CHUNK = ( LWORK-IWORK+1 ) / M + DO 40 I = 1, NRHS, CHUNK + BL = MIN( NRHS-I+1, CHUNK ) + CALL CGEMM( 'C', 'N', M, BL, M, CONE, WORK( IL ), LDWORK, + $ B( 1, I ), LDB, CZERO, WORK( IWORK ), M ) + CALL CLACPY( 'G', M, BL, WORK( IWORK ), M, B( 1, I ), + $ LDB ) + 40 CONTINUE + ELSE + CALL CGEMV( 'C', M, M, CONE, WORK( IL ), LDWORK, B( 1, 1 ), + $ 1, CZERO, WORK( IWORK ), 1 ) + CALL CCOPY( M, WORK( IWORK ), 1, B( 1, 1 ), 1 ) + END IF +* +* Zero out below first M rows of B +* + CALL CLASET( 'F', N-M, NRHS, CZERO, CZERO, B( M+1, 1 ), LDB ) + IWORK = ITAU + M +* +* Multiply transpose(Q) by B +* (CWorkspace: need M+NRHS, prefer M+NHRS*NB) +* (RWorkspace: none) +* + CALL CUNMLQ( 'L', 'C', N, NRHS, M, A, LDA, WORK( ITAU ), B, + $ LDB, WORK( IWORK ), LWORK-IWORK+1, INFO ) +* + ELSE +* +* Path 2 - remaining underdetermined cases +* + IE = 1 + ITAUQ = 1 + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize A +* (CWorkspace: need 3*M, prefer 2*M+(M+N)*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors +* (CWorkspace: need 2*M+NRHS, prefer 2*M+NRHS*NB) +* (RWorkspace: none) +* + CALL CUNMBR( 'Q', 'L', 'C', M, NRHS, N, A, LDA, WORK( ITAUQ ), + $ B, LDB, WORK( IWORK ), LWORK-IWORK+1, INFO ) +* +* Generate right bidiagonalizing vectors in A +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: none) +* + CALL CUNGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, INFO ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, +* computing right singular vectors of A in A and +* multiplying B by transpose of left singular vectors +* (CWorkspace: none) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'L', M, N, 0, NRHS, S, RWORK( IE ), A, LDA, VDUM, + $ 1, B, LDB, RWORK( IRWORK ), INFO ) + IF( INFO.NE.0 ) + $ GO TO 70 +* +* Multiply B by reciprocals of singular values +* + THR = MAX( RCOND*S( 1 ), SFMIN ) + IF( RCOND.LT.ZERO ) + $ THR = MAX( EPS*S( 1 ), SFMIN ) + RANK = 0 + DO 50 I = 1, M + IF( S( I ).GT.THR ) THEN + CALL CSRSCL( NRHS, S( I ), B( I, 1 ), LDB ) + RANK = RANK + 1 + ELSE + CALL CLASET( 'F', 1, NRHS, CZERO, CZERO, B( I, 1 ), LDB ) + END IF + 50 CONTINUE +* +* Multiply B by right singular vectors of A +* (CWorkspace: need N, prefer N*NRHS) +* (RWorkspace: none) +* + IF( LWORK.GE.LDB*NRHS .AND. NRHS.GT.1 ) THEN + CALL CGEMM( 'C', 'N', N, NRHS, M, CONE, A, LDA, B, LDB, + $ CZERO, WORK, LDB ) + CALL CLACPY( 'G', N, NRHS, WORK, LDB, B, LDB ) + ELSE IF( NRHS.GT.1 ) THEN + CHUNK = LWORK / N + DO 60 I = 1, NRHS, CHUNK + BL = MIN( NRHS-I+1, CHUNK ) + CALL CGEMM( 'C', 'N', N, BL, M, CONE, A, LDA, B( 1, I ), + $ LDB, CZERO, WORK, N ) + CALL CLACPY( 'F', N, BL, WORK, N, B( 1, I ), LDB ) + 60 CONTINUE + ELSE + CALL CGEMV( 'C', M, N, CONE, A, LDA, B, 1, CZERO, WORK, 1 ) + CALL CCOPY( N, WORK, 1, B, 1 ) + END IF + END IF +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO ) + CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO ) + CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL CLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL CLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO ) + END IF + 70 CONTINUE + WORK( 1 ) = MAXWRK + RETURN +* +* End of CGELSS +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cgelsy.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cgelsy.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,385 @@ + SUBROUTINE CGELSY( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK, + $ WORK, LWORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + REAL RWORK( * ) + COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CGELSY computes the minimum-norm solution to a complex linear least +* squares problem: +* minimize || A * X - B || +* using a complete orthogonal factorization of A. A is an M-by-N +* matrix which may be rank-deficient. +* +* Several right hand side vectors b and solution vectors x can be +* handled in a single call; they are stored as the columns of the +* M-by-NRHS right hand side matrix B and the N-by-NRHS solution +* matrix X. +* +* The routine first computes a QR factorization with column pivoting: +* A * P = Q * [ R11 R12 ] +* [ 0 R22 ] +* with R11 defined as the largest leading submatrix whose estimated +* condition number is less than 1/RCOND. The order of R11, RANK, +* is the effective rank of A. +* +* Then, R22 is considered to be negligible, and R12 is annihilated +* by unitary transformations from the right, arriving at the +* complete orthogonal factorization: +* A * P = Q * [ T11 0 ] * Z +* [ 0 0 ] +* The minimum-norm solution is then +* X = P * Z' [ inv(T11)*Q1'*B ] +* [ 0 ] +* where Q1 consists of the first RANK columns of Q. +* +* This routine is basically identical to the original xGELSX except +* three differences: +* o The permutation of matrix B (the right hand side) is faster and +* more simple. +* o The call to the subroutine xGEQPF has been substituted by the +* the call to the subroutine xGEQP3. This subroutine is a Blas-3 +* version of the QR factorization with column pivoting. +* o Matrix B (the right hand side) is updated with Blas-3. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of +* columns of matrices B and X. NRHS >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, A has been overwritten by details of its +* complete orthogonal factorization. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (input/output) COMPLEX array, dimension (LDB,NRHS) +* On entry, the M-by-NRHS right hand side matrix B. +* On exit, the N-by-NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,M,N). +* +* JPVT (input/output) INTEGER array, dimension (N) +* On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted +* to the front of AP, otherwise column i is a free column. +* On exit, if JPVT(i) = k, then the i-th column of A*P +* was the k-th column of A. +* +* RCOND (input) REAL +* RCOND is used to determine the effective rank of A, which +* is defined as the order of the largest leading triangular +* submatrix R11 in the QR factorization with pivoting of A, +* whose estimated condition number < 1/RCOND. +* +* RANK (output) INTEGER +* The effective rank of A, i.e., the order of the submatrix +* R11. This is the same as the order of the submatrix T11 +* in the complete orthogonal factorization of A. +* +* WORK (workspace/output) COMPLEX array, dimension (MAX(1,LWORK)) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* The unblocked strategy requires that: +* LWORK >= MN + MAX( 2*MN, N+1, MN+NRHS ) +* where MN = min(M,N). +* The block algorithm requires that: +* LWORK >= MN + MAX( 2*MN, NB*(N+1), MN+MN*NB, MN+NB*NRHS ) +* where NB is an upper bound on the blocksize returned +* by ILAENV for the routines CGEQP3, CTZRZF, CTZRQF, CUNMQR, +* and CUNMRZ. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace) REAL array, dimension (2*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* Based on contributions by +* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* E. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain +* G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain +* +* ===================================================================== +* +* .. Parameters .. + INTEGER IMAX, IMIN + PARAMETER ( IMAX = 1, IMIN = 2 ) + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), + $ CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IASCL, IBSCL, ISMAX, ISMIN, J, LWKOPT, MN, + $ NB, NB1, NB2, NB3, NB4 + REAL ANRM, BIGNUM, BNRM, SMAX, SMAXPR, SMIN, SMINPR, + $ SMLNUM, WSIZE + COMPLEX C1, C2, S1, S2 +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CGEQP3, CLAIC1, CLASCL, CLASET, CTRSM, + $ CTZRZF, CUNMQR, CUNMRZ, SLABAD, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + REAL CLANGE, SLAMCH + EXTERNAL CLANGE, ILAENV, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, REAL, CMPLX +* .. +* .. Executable Statements .. +* + MN = MIN( M, N ) + ISMIN = MN + 1 + ISMAX = 2*MN + 1 +* +* Test the input arguments. +* + INFO = 0 + NB1 = ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 ) + NB2 = ILAENV( 1, 'CGERQF', ' ', M, N, -1, -1 ) + NB3 = ILAENV( 1, 'CUNMQR', ' ', M, N, NRHS, -1 ) + NB4 = ILAENV( 1, 'CUNMRQ', ' ', M, N, NRHS, -1 ) + NB = MAX( NB1, NB2, NB3, NB4 ) + LWKOPT = MAX( 1, MN+2*N+NB*(N+1), 2*MN+NB*NRHS ) + WORK( 1 ) = CMPLX( LWKOPT ) + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN + INFO = -7 + ELSE IF( LWORK.LT.( MN+MAX( 2*MN, N+1, MN+NRHS ) ) .AND. + $ .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGELSY', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N, NRHS ).EQ.0 ) THEN + RANK = 0 + RETURN + END IF +* +* Get machine parameters +* + SMLNUM = SLAMCH( 'S' ) / SLAMCH( 'P' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* +* Scale A, B if max entries outside range [SMLNUM,BIGNUM] +* + ANRM = CLANGE( 'M', M, N, A, LDA, RWORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL CLASET( 'F', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB ) + RANK = 0 + GO TO 70 + END IF +* + BNRM = CLANGE( 'M', M, NRHS, B, LDB, RWORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL CLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL CLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 2 + END IF +* +* Compute QR factorization with column pivoting of A: +* A * P = Q * R +* + CALL CGEQP3( M, N, A, LDA, JPVT, WORK( 1 ), WORK( MN+1 ), + $ LWORK-MN, RWORK, INFO ) + WSIZE = MN + REAL( WORK( MN+1 ) ) +* +* complex workspace: MN+NB*(N+1). real workspace 2*N. +* Details of Householder rotations stored in WORK(1:MN). +* +* Determine RANK using incremental condition estimation +* + WORK( ISMIN ) = CONE + WORK( ISMAX ) = CONE + SMAX = ABS( A( 1, 1 ) ) + SMIN = SMAX + IF( ABS( A( 1, 1 ) ).EQ.ZERO ) THEN + RANK = 0 + CALL CLASET( 'F', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB ) + GO TO 70 + ELSE + RANK = 1 + END IF +* + 10 CONTINUE + IF( RANK.LT.MN ) THEN + I = RANK + 1 + CALL CLAIC1( IMIN, RANK, WORK( ISMIN ), SMIN, A( 1, I ), + $ A( I, I ), SMINPR, S1, C1 ) + CALL CLAIC1( IMAX, RANK, WORK( ISMAX ), SMAX, A( 1, I ), + $ A( I, I ), SMAXPR, S2, C2 ) +* + IF( SMAXPR*RCOND.LE.SMINPR ) THEN + DO 20 I = 1, RANK + WORK( ISMIN+I-1 ) = S1*WORK( ISMIN+I-1 ) + WORK( ISMAX+I-1 ) = S2*WORK( ISMAX+I-1 ) + 20 CONTINUE + WORK( ISMIN+RANK ) = C1 + WORK( ISMAX+RANK ) = C2 + SMIN = SMINPR + SMAX = SMAXPR + RANK = RANK + 1 + GO TO 10 + END IF + END IF +* +* complex workspace: 3*MN. +* +* Logically partition R = [ R11 R12 ] +* [ 0 R22 ] +* where R11 = R(1:RANK,1:RANK) +* +* [R11,R12] = [ T11, 0 ] * Y +* + IF( RANK.LT.N ) + $ CALL CTZRZF( RANK, N, A, LDA, WORK( MN+1 ), WORK( 2*MN+1 ), + $ LWORK-2*MN, INFO ) +* +* complex workspace: 2*MN. +* Details of Householder rotations stored in WORK(MN+1:2*MN) +* +* B(1:M,1:NRHS) := Q' * B(1:M,1:NRHS) +* + CALL CUNMQR( 'Left', 'Conjugate transpose', M, NRHS, MN, A, LDA, + $ WORK( 1 ), B, LDB, WORK( 2*MN+1 ), LWORK-2*MN, INFO ) + WSIZE = MAX( WSIZE, 2*MN+REAL( WORK( 2*MN+1 ) ) ) +* +* complex workspace: 2*MN+NB*NRHS. +* +* B(1:RANK,1:NRHS) := inv(T11) * B(1:RANK,1:NRHS) +* + CALL CTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', RANK, + $ NRHS, CONE, A, LDA, B, LDB ) +* + DO 40 J = 1, NRHS + DO 30 I = RANK + 1, N + B( I, J ) = CZERO + 30 CONTINUE + 40 CONTINUE +* +* B(1:N,1:NRHS) := Y' * B(1:N,1:NRHS) +* + IF( RANK.LT.N ) THEN + CALL CUNMRZ( 'Left', 'Conjugate transpose', N, NRHS, RANK, + $ N-RANK, A, LDA, WORK( MN+1 ), B, LDB, + $ WORK( 2*MN+1 ), LWORK-2*MN, INFO ) + END IF +* +* complex workspace: 2*MN+NRHS. +* +* B(1:N,1:NRHS) := P * B(1:N,1:NRHS) +* + DO 60 J = 1, NRHS + DO 50 I = 1, N + WORK( JPVT( I ) ) = B( I, J ) + 50 CONTINUE + CALL CCOPY( N, WORK( 1 ), 1, B( 1, J ), 1 ) + 60 CONTINUE +* +* complex workspace: N. +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO ) + CALL CLASCL( 'U', 0, 0, SMLNUM, ANRM, RANK, RANK, A, LDA, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO ) + CALL CLASCL( 'U', 0, 0, BIGNUM, ANRM, RANK, RANK, A, LDA, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL CLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL CLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO ) + END IF +* + 70 CONTINUE + WORK( 1 ) = CMPLX( LWKOPT ) +* + RETURN +* +* End of CGELSY +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cgeqp3.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cgeqp3.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,293 @@ + SUBROUTINE CGEQP3( M, N, A, LDA, JPVT, TAU, WORK, LWORK, RWORK, + $ INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + REAL RWORK( * ) + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CGEQP3 computes a QR factorization with column pivoting of a +* matrix A: A*P = Q*R using Level 3 BLAS. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, the upper triangle of the array contains the +* min(M,N)-by-N upper trapezoidal matrix R; the elements below +* the diagonal, together with the array TAU, represent the +* unitary matrix Q as a product of min(M,N) elementary +* reflectors. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* JPVT (input/output) INTEGER array, dimension (N) +* On entry, if JPVT(J).ne.0, the J-th column of A is permuted +* to the front of A*P (a leading column); if JPVT(J)=0, +* the J-th column of A is a free column. +* On exit, if JPVT(J)=K, then the J-th column of A*P was the +* the K-th column of A. +* +* TAU (output) COMPLEX array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors. +* +* WORK (workspace/output) COMPLEX array, dimension (MAX(1,LWORK)) +* On exit, if INFO=0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= N+1. +* For optimal performance LWORK >= ( N+1 )*NB, where NB +* is the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace) REAL array, dimension (2*N) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(1) H(2) . . . H(k), where k = min(m,n). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real/complex scalar, and v is a real/complex vector +* with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in +* A(i+1:m,i), and tau in TAU(i). +* +* Based on contributions by +* G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain +* X. Sun, Computer Science Dept., Duke University, USA +* +* ===================================================================== +* +* .. Parameters .. + INTEGER INB, INBMIN, IXOVER + PARAMETER ( INB = 1, INBMIN = 2, IXOVER = 3 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER FJB, IWS, J, JB, LWKOPT, MINMN, MINWS, NA, NB, + $ NBMIN, NFXD, NX, SM, SMINMN, SN, TOPBMN +* .. +* .. External Subroutines .. + EXTERNAL CGEQRF, CLAQP2, CLAQPS, CSWAP, CUNMQR, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + REAL SCNRM2 + EXTERNAL ILAENV, SCNRM2 +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test input arguments +* ==================== +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF +* + IF( INFO.EQ.0 ) THEN + MINMN = MIN( M, N ) + IF( MINMN.EQ.0 ) THEN + IWS = 1 + LWKOPT = 1 + ELSE + IWS = N + 1 + NB = ILAENV( INB, 'CGEQRF', ' ', M, N, -1, -1 ) + LWKOPT = ( N + 1 )*NB + END IF + WORK( 1 ) = LWKOPT +* + IF( ( LWORK.LT.IWS ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEQP3', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible. +* + IF( MINMN.EQ.0 ) THEN + RETURN + END IF +* +* Move initial columns up front. +* + NFXD = 1 + DO 10 J = 1, N + IF( JPVT( J ).NE.0 ) THEN + IF( J.NE.NFXD ) THEN + CALL CSWAP( M, A( 1, J ), 1, A( 1, NFXD ), 1 ) + JPVT( J ) = JPVT( NFXD ) + JPVT( NFXD ) = J + ELSE + JPVT( J ) = J + END IF + NFXD = NFXD + 1 + ELSE + JPVT( J ) = J + END IF + 10 CONTINUE + NFXD = NFXD - 1 +* +* Factorize fixed columns +* ======================= +* +* Compute the QR factorization of fixed columns and update +* remaining columns. +* + IF( NFXD.GT.0 ) THEN + NA = MIN( M, NFXD ) +*CC CALL CGEQR2( M, NA, A, LDA, TAU, WORK, INFO ) + CALL CGEQRF( M, NA, A, LDA, TAU, WORK, LWORK, INFO ) + IWS = MAX( IWS, INT( WORK( 1 ) ) ) + IF( NA.LT.N ) THEN +*CC CALL CUNM2R( 'Left', 'Conjugate Transpose', M, N-NA, +*CC $ NA, A, LDA, TAU, A( 1, NA+1 ), LDA, WORK, +*CC $ INFO ) + CALL CUNMQR( 'Left', 'Conjugate Transpose', M, N-NA, NA, A, + $ LDA, TAU, A( 1, NA+1 ), LDA, WORK, LWORK, + $ INFO ) + IWS = MAX( IWS, INT( WORK( 1 ) ) ) + END IF + END IF +* +* Factorize free columns +* ====================== +* + IF( NFXD.LT.MINMN ) THEN +* + SM = M - NFXD + SN = N - NFXD + SMINMN = MINMN - NFXD +* +* Determine the block size. +* + NB = ILAENV( INB, 'CGEQRF', ' ', SM, SN, -1, -1 ) + NBMIN = 2 + NX = 0 +* + IF( ( NB.GT.1 ) .AND. ( NB.LT.SMINMN ) ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( IXOVER, 'CGEQRF', ' ', SM, SN, -1, + $ -1 ) ) +* +* + IF( NX.LT.SMINMN ) THEN +* +* Determine if workspace is large enough for blocked code. +* + MINWS = ( SN+1 )*NB + IWS = MAX( IWS, MINWS ) + IF( LWORK.LT.MINWS ) THEN +* +* Not enough workspace to use optimal NB: Reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / ( SN+1 ) + NBMIN = MAX( 2, ILAENV( INBMIN, 'CGEQRF', ' ', SM, SN, + $ -1, -1 ) ) +* +* + END IF + END IF + END IF +* +* Initialize partial column norms. The first N elements of work +* store the exact column norms. +* + DO 20 J = NFXD + 1, N + RWORK( J ) = SCNRM2( SM, A( NFXD+1, J ), 1 ) + RWORK( N+J ) = RWORK( J ) + 20 CONTINUE +* + IF( ( NB.GE.NBMIN ) .AND. ( NB.LT.SMINMN ) .AND. + $ ( NX.LT.SMINMN ) ) THEN +* +* Use blocked code initially. +* + J = NFXD + 1 +* +* Compute factorization: while loop. +* +* + TOPBMN = MINMN - NX + 30 CONTINUE + IF( J.LE.TOPBMN ) THEN + JB = MIN( NB, TOPBMN-J+1 ) +* +* Factorize JB columns among columns J:N. +* + CALL CLAQPS( M, N-J+1, J-1, JB, FJB, A( 1, J ), LDA, + $ JPVT( J ), TAU( J ), RWORK( J ), + $ RWORK( N+J ), WORK( 1 ), WORK( JB+1 ), + $ N-J+1 ) +* + J = J + FJB + GO TO 30 + END IF + ELSE + J = NFXD + 1 + END IF +* +* Use unblocked code to factor the last or only block. +* +* + IF( J.LE.MINMN ) + $ CALL CLAQP2( M, N-J+1, J-1, A( 1, J ), LDA, JPVT( J ), + $ TAU( J ), RWORK( J ), RWORK( N+J ), WORK( 1 ) ) +* + END IF +* + WORK( 1 ) = IWS + RETURN +* +* End of CGEQP3 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cgeqpf.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cgeqpf.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,234 @@ + SUBROUTINE CGEQPF( M, N, A, LDA, JPVT, TAU, WORK, RWORK, INFO ) +* +* -- LAPACK deprecated driver routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + REAL RWORK( * ) + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* This routine is deprecated and has been replaced by routine CGEQP3. +* +* CGEQPF computes a QR factorization with column pivoting of a +* complex M-by-N matrix A: A*P = Q*R. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0 +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, the upper triangle of the array contains the +* min(M,N)-by-N upper triangular matrix R; the elements +* below the diagonal, together with the array TAU, +* represent the unitary matrix Q as a product of +* min(m,n) elementary reflectors. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* JPVT (input/output) INTEGER array, dimension (N) +* On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted +* to the front of A*P (a leading column); if JPVT(i) = 0, +* the i-th column of A is a free column. +* On exit, if JPVT(i) = k, then the i-th column of A*P +* was the k-th column of A. +* +* TAU (output) COMPLEX array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors. +* +* WORK (workspace) COMPLEX array, dimension (N) +* +* RWORK (workspace) REAL array, dimension (2*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(1) H(2) . . . H(n) +* +* Each H(i) has the form +* +* H = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i). +* +* The matrix P is represented in jpvt as follows: If +* jpvt(j) = i +* then the jth column of P is the ith canonical unit vector. +* +* Partial column norm updating strategy modified by +* Z. Drmac and Z. Bujanovic, Dept. of Mathematics, +* University of Zagreb, Croatia. +* June 2006. +* For more details see LAPACK Working Note 176. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, ITEMP, J, MA, MN, PVT + REAL TEMP, TEMP2, TOL3Z + COMPLEX AII +* .. +* .. External Subroutines .. + EXTERNAL CGEQR2, CLARF, CLARFG, CSWAP, CUNM2R, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, CMPLX, CONJG, MAX, MIN, SQRT +* .. +* .. External Functions .. + INTEGER ISAMAX + REAL SCNRM2, SLAMCH + EXTERNAL ISAMAX, SCNRM2, SLAMCH +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEQPF', -INFO ) + RETURN + END IF +* + MN = MIN( M, N ) + TOL3Z = SQRT(SLAMCH('Epsilon')) +* +* Move initial columns up front +* + ITEMP = 1 + DO 10 I = 1, N + IF( JPVT( I ).NE.0 ) THEN + IF( I.NE.ITEMP ) THEN + CALL CSWAP( M, A( 1, I ), 1, A( 1, ITEMP ), 1 ) + JPVT( I ) = JPVT( ITEMP ) + JPVT( ITEMP ) = I + ELSE + JPVT( I ) = I + END IF + ITEMP = ITEMP + 1 + ELSE + JPVT( I ) = I + END IF + 10 CONTINUE + ITEMP = ITEMP - 1 +* +* Compute the QR factorization and update remaining columns +* + IF( ITEMP.GT.0 ) THEN + MA = MIN( ITEMP, M ) + CALL CGEQR2( M, MA, A, LDA, TAU, WORK, INFO ) + IF( MA.LT.N ) THEN + CALL CUNM2R( 'Left', 'Conjugate transpose', M, N-MA, MA, A, + $ LDA, TAU, A( 1, MA+1 ), LDA, WORK, INFO ) + END IF + END IF +* + IF( ITEMP.LT.MN ) THEN +* +* Initialize partial column norms. The first n elements of +* work store the exact column norms. +* + DO 20 I = ITEMP + 1, N + RWORK( I ) = SCNRM2( M-ITEMP, A( ITEMP+1, I ), 1 ) + RWORK( N+I ) = RWORK( I ) + 20 CONTINUE +* +* Compute factorization +* + DO 40 I = ITEMP + 1, MN +* +* Determine ith pivot column and swap if necessary +* + PVT = ( I-1 ) + ISAMAX( N-I+1, RWORK( I ), 1 ) +* + IF( PVT.NE.I ) THEN + CALL CSWAP( M, A( 1, PVT ), 1, A( 1, I ), 1 ) + ITEMP = JPVT( PVT ) + JPVT( PVT ) = JPVT( I ) + JPVT( I ) = ITEMP + RWORK( PVT ) = RWORK( I ) + RWORK( N+PVT ) = RWORK( N+I ) + END IF +* +* Generate elementary reflector H(i) +* + AII = A( I, I ) + CALL CLARFG( M-I+1, AII, A( MIN( I+1, M ), I ), 1, + $ TAU( I ) ) + A( I, I ) = AII +* + IF( I.LT.N ) THEN +* +* Apply H(i) to A(i:m,i+1:n) from the left +* + AII = A( I, I ) + A( I, I ) = CMPLX( ONE ) + CALL CLARF( 'Left', M-I+1, N-I, A( I, I ), 1, + $ CONJG( TAU( I ) ), A( I, I+1 ), LDA, WORK ) + A( I, I ) = AII + END IF +* +* Update partial column norms +* + DO 30 J = I + 1, N + IF( RWORK( J ).NE.ZERO ) THEN +* +* NOTE: The following 4 lines follow from the analysis in +* Lapack Working Note 176. +* + TEMP = ABS( A( I, J ) ) / RWORK( J ) + TEMP = MAX( ZERO, ( ONE+TEMP )*( ONE-TEMP ) ) + TEMP2 = TEMP*( RWORK( J ) / RWORK( N+J ) )**2 + IF( TEMP2 .LE. TOL3Z ) THEN + IF( M-I.GT.0 ) THEN + RWORK( J ) = SCNRM2( M-I, A( I+1, J ), 1 ) + RWORK( N+J ) = RWORK( J ) + ELSE + RWORK( J ) = ZERO + RWORK( N+J ) = ZERO + END IF + ELSE + RWORK( J ) = RWORK( J )*SQRT( TEMP ) + END IF + END IF + 30 CONTINUE +* + 40 CONTINUE + END IF + RETURN +* +* End of CGEQPF +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cgeqr2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cgeqr2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,121 @@ + SUBROUTINE CGEQR2( M, N, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CGEQR2 computes a QR factorization of a complex m by n matrix A: +* A = Q * R. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the m by n matrix A. +* On exit, the elements on and above the diagonal of the array +* contain the min(m,n) by n upper trapezoidal matrix R (R is +* upper triangular if m >= n); the elements below the diagonal, +* with the array TAU, represent the unitary matrix Q as a +* product of elementary reflectors (see Further Details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) COMPLEX array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace) COMPLEX array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(1) H(2) . . . H(k), where k = min(m,n). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), +* and tau in TAU(i). +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, K + COMPLEX ALPHA +* .. +* .. External Subroutines .. + EXTERNAL CLARF, CLARFG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEQR2', -INFO ) + RETURN + END IF +* + K = MIN( M, N ) +* + DO 10 I = 1, K +* +* Generate elementary reflector H(i) to annihilate A(i+1:m,i) +* + CALL CLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1, + $ TAU( I ) ) + IF( I.LT.N ) THEN +* +* Apply H(i)' to A(i:m,i+1:n) from the left +* + ALPHA = A( I, I ) + A( I, I ) = ONE + CALL CLARF( 'Left', M-I+1, N-I, A( I, I ), 1, + $ CONJG( TAU( I ) ), A( I, I+1 ), LDA, WORK ) + A( I, I ) = ALPHA + END IF + 10 CONTINUE + RETURN +* +* End of CGEQR2 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cgeqrf.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cgeqrf.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,196 @@ + SUBROUTINE CGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CGEQRF computes a QR factorization of a complex M-by-N matrix A: +* A = Q * R. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, the elements on and above the diagonal of the array +* contain the min(M,N)-by-N upper trapezoidal matrix R (R is +* upper triangular if m >= n); the elements below the diagonal, +* with the array TAU, represent the unitary matrix Q as a +* product of min(m,n) elementary reflectors (see Further +* Details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) COMPLEX array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace/output) COMPLEX array, dimension (MAX(1,LWORK)) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N). +* For optimum performance LWORK >= N*NB, where NB is +* the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(1) H(2) . . . H(k), where k = min(m,n). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), +* and tau in TAU(i). +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, K, LDWORK, LWKOPT, NB, + $ NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL CGEQR2, CLARFB, CLARFT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEQRF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + K = MIN( M, N ) + IF( K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = N + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'CGEQRF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'CGEQRF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code initially +* + DO 10 I = 1, K - NX, NB + IB = MIN( K-I+1, NB ) +* +* Compute the QR factorization of the current block +* A(i:m,i:i+ib-1) +* + CALL CGEQR2( M-I+1, IB, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) + IF( I+IB.LE.N ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL CLARFT( 'Forward', 'Columnwise', M-I+1, IB, + $ A( I, I ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H' to A(i:m,i+ib:n) from the left +* + CALL CLARFB( 'Left', 'Conjugate transpose', 'Forward', + $ 'Columnwise', M-I+1, N-I-IB+1, IB, + $ A( I, I ), LDA, WORK, LDWORK, A( I, I+IB ), + $ LDA, WORK( IB+1 ), LDWORK ) + END IF + 10 CONTINUE + ELSE + I = 1 + END IF +* +* Use unblocked code to factor the last or only block. +* + IF( I.LE.K ) + $ CALL CGEQR2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) +* + WORK( 1 ) = IWS + RETURN +* +* End of CGEQRF +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cgesv.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cgesv.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,107 @@ + SUBROUTINE CGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* CGESV computes the solution to a complex system of linear equations +* A * X = B, +* where A is an N-by-N matrix and X and B are N-by-NRHS matrices. +* +* The LU decomposition with partial pivoting and row interchanges is +* used to factor A as +* A = P * L * U, +* where P is a permutation matrix, L is unit lower triangular, and U is +* upper triangular. The factored form of A is then used to solve the +* system of equations A * X = B. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the N-by-N coefficient matrix A. +* On exit, the factors L and U from the factorization +* A = P*L*U; the unit diagonal elements of L are not stored. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (output) INTEGER array, dimension (N) +* The pivot indices that define the permutation matrix P; +* row i of the matrix was interchanged with row IPIV(i). +* +* B (input/output) COMPLEX array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS matrix of right hand side matrix B. +* On exit, if INFO = 0, the N-by-NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, U(i,i) is exactly zero. The factorization +* has been completed, but the factor U is exactly +* singular, so the solution could not be computed. +* +* ===================================================================== +* +* .. External Subroutines .. + EXTERNAL CGETRF, CGETRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGESV ', -INFO ) + RETURN + END IF +* +* Compute the LU factorization of A. +* + CALL CGETRF( N, N, A, LDA, IPIV, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL CGETRS( 'No transpose', N, NRHS, A, LDA, IPIV, B, LDB, + $ INFO ) + END IF + RETURN +* +* End of CGESV +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cgesvd.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cgesvd.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,3602 @@ + SUBROUTINE CGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, + $ WORK, LWORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER JOBU, JOBVT + INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N +* .. +* .. Array Arguments .. + REAL RWORK( * ), S( * ) + COMPLEX A( LDA, * ), U( LDU, * ), VT( LDVT, * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* CGESVD computes the singular value decomposition (SVD) of a complex +* M-by-N matrix A, optionally computing the left and/or right singular +* vectors. The SVD is written +* +* A = U * SIGMA * conjugate-transpose(V) +* +* where SIGMA is an M-by-N matrix which is zero except for its +* min(m,n) diagonal elements, U is an M-by-M unitary matrix, and +* V is an N-by-N unitary matrix. The diagonal elements of SIGMA +* are the singular values of A; they are real and non-negative, and +* are returned in descending order. The first min(m,n) columns of +* U and V are the left and right singular vectors of A. +* +* Note that the routine returns V**H, not V. +* +* Arguments +* ========= +* +* JOBU (input) CHARACTER*1 +* Specifies options for computing all or part of the matrix U: +* = 'A': all M columns of U are returned in array U: +* = 'S': the first min(m,n) columns of U (the left singular +* vectors) are returned in the array U; +* = 'O': the first min(m,n) columns of U (the left singular +* vectors) are overwritten on the array A; +* = 'N': no columns of U (no left singular vectors) are +* computed. +* +* JOBVT (input) CHARACTER*1 +* Specifies options for computing all or part of the matrix +* V**H: +* = 'A': all N rows of V**H are returned in the array VT; +* = 'S': the first min(m,n) rows of V**H (the right singular +* vectors) are returned in the array VT; +* = 'O': the first min(m,n) rows of V**H (the right singular +* vectors) are overwritten on the array A; +* = 'N': no rows of V**H (no right singular vectors) are +* computed. +* +* JOBVT and JOBU cannot both be 'O'. +* +* M (input) INTEGER +* The number of rows of the input matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the input matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, +* if JOBU = 'O', A is overwritten with the first min(m,n) +* columns of U (the left singular vectors, +* stored columnwise); +* if JOBVT = 'O', A is overwritten with the first min(m,n) +* rows of V**H (the right singular vectors, +* stored rowwise); +* if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A +* are destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* S (output) REAL array, dimension (min(M,N)) +* The singular values of A, sorted so that S(i) >= S(i+1). +* +* U (output) COMPLEX array, dimension (LDU,UCOL) +* (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'. +* If JOBU = 'A', U contains the M-by-M unitary matrix U; +* if JOBU = 'S', U contains the first min(m,n) columns of U +* (the left singular vectors, stored columnwise); +* if JOBU = 'N' or 'O', U is not referenced. +* +* LDU (input) INTEGER +* The leading dimension of the array U. LDU >= 1; if +* JOBU = 'S' or 'A', LDU >= M. +* +* VT (output) COMPLEX array, dimension (LDVT,N) +* If JOBVT = 'A', VT contains the N-by-N unitary matrix +* V**H; +* if JOBVT = 'S', VT contains the first min(m,n) rows of +* V**H (the right singular vectors, stored rowwise); +* if JOBVT = 'N' or 'O', VT is not referenced. +* +* LDVT (input) INTEGER +* The leading dimension of the array VT. LDVT >= 1; if +* JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N). +* +* WORK (workspace/output) COMPLEX array, dimension (MAX(1,LWORK)) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* LWORK >= MAX(1,2*MIN(M,N)+MAX(M,N)). +* For good performance, LWORK should generally be larger. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace) REAL array, dimension (5*min(M,N)) +* On exit, if INFO > 0, RWORK(1:MIN(M,N)-1) contains the +* unconverged superdiagonal elements of an upper bidiagonal +* matrix B whose diagonal is in S (not necessarily sorted). +* B satisfies A = U * B * VT, so it has the same singular +* values as A, and singular vectors related by U and VT. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if CBDSQR did not converge, INFO specifies how many +* superdiagonals of an intermediate bidiagonal form B +* did not converge to zero. See the description of RWORK +* above for details. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E0, 0.0E0 ), + $ CONE = ( 1.0E0, 0.0E0 ) ) + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS, + $ WNTVA, WNTVAS, WNTVN, WNTVO, WNTVS + INTEGER BLK, CHUNK, I, IE, IERR, IR, IRWORK, ISCL, + $ ITAU, ITAUP, ITAUQ, IU, IWORK, LDWRKR, LDWRKU, + $ MAXWRK, MINMN, MINWRK, MNTHR, NCU, NCVT, NRU, + $ NRVT, WRKBL + REAL ANRM, BIGNUM, EPS, SMLNUM +* .. +* .. Local Arrays .. + REAL DUM( 1 ) + COMPLEX CDUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL CBDSQR, CGEBRD, CGELQF, CGEMM, CGEQRF, CLACPY, + $ CLASCL, CLASET, CUNGBR, CUNGLQ, CUNGQR, CUNMBR, + $ SLASCL, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL CLANGE, SLAMCH + EXTERNAL LSAME, ILAENV, CLANGE, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + MINMN = MIN( M, N ) + WNTUA = LSAME( JOBU, 'A' ) + WNTUS = LSAME( JOBU, 'S' ) + WNTUAS = WNTUA .OR. WNTUS + WNTUO = LSAME( JOBU, 'O' ) + WNTUN = LSAME( JOBU, 'N' ) + WNTVA = LSAME( JOBVT, 'A' ) + WNTVS = LSAME( JOBVT, 'S' ) + WNTVAS = WNTVA .OR. WNTVS + WNTVO = LSAME( JOBVT, 'O' ) + WNTVN = LSAME( JOBVT, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* + IF( .NOT.( WNTUA .OR. WNTUS .OR. WNTUO .OR. WNTUN ) ) THEN + INFO = -1 + ELSE IF( .NOT.( WNTVA .OR. WNTVS .OR. WNTVO .OR. WNTVN ) .OR. + $ ( WNTVO .AND. WNTUO ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDU.LT.1 .OR. ( WNTUAS .AND. LDU.LT.M ) ) THEN + INFO = -9 + ELSE IF( LDVT.LT.1 .OR. ( WNTVA .AND. LDVT.LT.N ) .OR. + $ ( WNTVS .AND. LDVT.LT.MINMN ) ) THEN + INFO = -11 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* CWorkspace refers to complex workspace, and RWorkspace to +* real workspace. NB refers to the optimal block size for the +* immediately following subroutine, as returned by ILAENV.) +* + IF( INFO.EQ.0 ) THEN + MINWRK = 1 + MAXWRK = 1 + IF( M.GE.N .AND. MINMN.GT.0 ) THEN +* +* Space needed for CBDSQR is BDSPAC = 5*N +* + MNTHR = ILAENV( 6, 'CGESVD', JOBU // JOBVT, M, N, 0, 0 ) + IF( M.GE.MNTHR ) THEN + IF( WNTUN ) THEN +* +* Path 1 (M much larger than N, JOBU='N') +* + MAXWRK = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, + $ -1 ) + MAXWRK = MAX( MAXWRK, 2*N+2*N* + $ ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) ) + IF( WNTVO .OR. WNTVAS ) + $ MAXWRK = MAX( MAXWRK, 2*N+( N-1 )* + $ ILAENV( 1, 'CUNGBR', 'P', N, N, N, -1 ) ) + MINWRK = 3*N + ELSE IF( WNTUO .AND. WNTVN ) THEN +* +* Path 2 (M much larger than N, JOBU='O', JOBVT='N') +* + WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'CUNGQR', ' ', M, + $ N, N, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+2*N* + $ ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+N* + $ ILAENV( 1, 'CUNGBR', 'Q', N, N, N, -1 ) ) + MAXWRK = MAX( N*N+WRKBL, N*N+M*N ) + MINWRK = 2*N + M + ELSE IF( WNTUO .AND. WNTVAS ) THEN +* +* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or +* 'A') +* + WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'CUNGQR', ' ', M, + $ N, N, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+2*N* + $ ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+N* + $ ILAENV( 1, 'CUNGBR', 'Q', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+( N-1 )* + $ ILAENV( 1, 'CUNGBR', 'P', N, N, N, -1 ) ) + MAXWRK = MAX( N*N+WRKBL, N*N+M*N ) + MINWRK = 2*N + M + ELSE IF( WNTUS .AND. WNTVN ) THEN +* +* Path 4 (M much larger than N, JOBU='S', JOBVT='N') +* + WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'CUNGQR', ' ', M, + $ N, N, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+2*N* + $ ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+N* + $ ILAENV( 1, 'CUNGBR', 'Q', N, N, N, -1 ) ) + MAXWRK = N*N + WRKBL + MINWRK = 2*N + M + ELSE IF( WNTUS .AND. WNTVO ) THEN +* +* Path 5 (M much larger than N, JOBU='S', JOBVT='O') +* + WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'CUNGQR', ' ', M, + $ N, N, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+2*N* + $ ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+N* + $ ILAENV( 1, 'CUNGBR', 'Q', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+( N-1 )* + $ ILAENV( 1, 'CUNGBR', 'P', N, N, N, -1 ) ) + MAXWRK = 2*N*N + WRKBL + MINWRK = 2*N + M + ELSE IF( WNTUS .AND. WNTVAS ) THEN +* +* Path 6 (M much larger than N, JOBU='S', JOBVT='S' or +* 'A') +* + WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'CUNGQR', ' ', M, + $ N, N, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+2*N* + $ ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+N* + $ ILAENV( 1, 'CUNGBR', 'Q', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+( N-1 )* + $ ILAENV( 1, 'CUNGBR', 'P', N, N, N, -1 ) ) + MAXWRK = N*N + WRKBL + MINWRK = 2*N + M + ELSE IF( WNTUA .AND. WNTVN ) THEN +* +* Path 7 (M much larger than N, JOBU='A', JOBVT='N') +* + WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'CUNGQR', ' ', M, + $ M, N, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+2*N* + $ ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+N* + $ ILAENV( 1, 'CUNGBR', 'Q', N, N, N, -1 ) ) + MAXWRK = N*N + WRKBL + MINWRK = 2*N + M + ELSE IF( WNTUA .AND. WNTVO ) THEN +* +* Path 8 (M much larger than N, JOBU='A', JOBVT='O') +* + WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'CUNGQR', ' ', M, + $ M, N, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+2*N* + $ ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+N* + $ ILAENV( 1, 'CUNGBR', 'Q', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+( N-1 )* + $ ILAENV( 1, 'CUNGBR', 'P', N, N, N, -1 ) ) + MAXWRK = 2*N*N + WRKBL + MINWRK = 2*N + M + ELSE IF( WNTUA .AND. WNTVAS ) THEN +* +* Path 9 (M much larger than N, JOBU='A', JOBVT='S' or +* 'A') +* + WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'CUNGQR', ' ', M, + $ M, N, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+2*N* + $ ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+N* + $ ILAENV( 1, 'CUNGBR', 'Q', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+( N-1 )* + $ ILAENV( 1, 'CUNGBR', 'P', N, N, N, -1 ) ) + MAXWRK = N*N + WRKBL + MINWRK = 2*N + M + END IF + ELSE +* +* Path 10 (M at least N, but not much larger) +* + MAXWRK = 2*N + ( M+N )*ILAENV( 1, 'CGEBRD', ' ', M, N, + $ -1, -1 ) + IF( WNTUS .OR. WNTUO ) + $ MAXWRK = MAX( MAXWRK, 2*N+N* + $ ILAENV( 1, 'CUNGBR', 'Q', M, N, N, -1 ) ) + IF( WNTUA ) + $ MAXWRK = MAX( MAXWRK, 2*N+M* + $ ILAENV( 1, 'CUNGBR', 'Q', M, M, N, -1 ) ) + IF( .NOT.WNTVN ) + $ MAXWRK = MAX( MAXWRK, 2*N+( N-1 )* + $ ILAENV( 1, 'CUNGBR', 'P', N, N, N, -1 ) ) + MINWRK = 2*N + M + END IF + ELSE IF( MINMN.GT.0 ) THEN +* +* Space needed for CBDSQR is BDSPAC = 5*M +* + MNTHR = ILAENV( 6, 'CGESVD', JOBU // JOBVT, M, N, 0, 0 ) + IF( N.GE.MNTHR ) THEN + IF( WNTVN ) THEN +* +* Path 1t(N much larger than M, JOBVT='N') +* + MAXWRK = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, + $ -1 ) + MAXWRK = MAX( MAXWRK, 2*M+2*M* + $ ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) ) + IF( WNTUO .OR. WNTUAS ) + $ MAXWRK = MAX( MAXWRK, 2*M+M* + $ ILAENV( 1, 'CUNGBR', 'Q', M, M, M, -1 ) ) + MINWRK = 3*M + ELSE IF( WNTVO .AND. WNTUN ) THEN +* +* Path 2t(N much larger than M, JOBU='N', JOBVT='O') +* + WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'CUNGLQ', ' ', M, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+2*M* + $ ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+( M-1 )* + $ ILAENV( 1, 'CUNGBR', 'P', M, M, M, -1 ) ) + MAXWRK = MAX( M*M+WRKBL, M*M+M*N ) + MINWRK = 2*M + N + ELSE IF( WNTVO .AND. WNTUAS ) THEN +* +* Path 3t(N much larger than M, JOBU='S' or 'A', +* JOBVT='O') +* + WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'CUNGLQ', ' ', M, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+2*M* + $ ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+( M-1 )* + $ ILAENV( 1, 'CUNGBR', 'P', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+M* + $ ILAENV( 1, 'CUNGBR', 'Q', M, M, M, -1 ) ) + MAXWRK = MAX( M*M+WRKBL, M*M+M*N ) + MINWRK = 2*M + N + ELSE IF( WNTVS .AND. WNTUN ) THEN +* +* Path 4t(N much larger than M, JOBU='N', JOBVT='S') +* + WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'CUNGLQ', ' ', M, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+2*M* + $ ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+( M-1 )* + $ ILAENV( 1, 'CUNGBR', 'P', M, M, M, -1 ) ) + MAXWRK = M*M + WRKBL + MINWRK = 2*M + N + ELSE IF( WNTVS .AND. WNTUO ) THEN +* +* Path 5t(N much larger than M, JOBU='O', JOBVT='S') +* + WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'CUNGLQ', ' ', M, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+2*M* + $ ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+( M-1 )* + $ ILAENV( 1, 'CUNGBR', 'P', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+M* + $ ILAENV( 1, 'CUNGBR', 'Q', M, M, M, -1 ) ) + MAXWRK = 2*M*M + WRKBL + MINWRK = 2*M + N + ELSE IF( WNTVS .AND. WNTUAS ) THEN +* +* Path 6t(N much larger than M, JOBU='S' or 'A', +* JOBVT='S') +* + WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'CUNGLQ', ' ', M, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+2*M* + $ ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+( M-1 )* + $ ILAENV( 1, 'CUNGBR', 'P', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+M* + $ ILAENV( 1, 'CUNGBR', 'Q', M, M, M, -1 ) ) + MAXWRK = M*M + WRKBL + MINWRK = 2*M + N + ELSE IF( WNTVA .AND. WNTUN ) THEN +* +* Path 7t(N much larger than M, JOBU='N', JOBVT='A') +* + WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'CUNGLQ', ' ', N, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+2*M* + $ ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+( M-1 )* + $ ILAENV( 1, 'CUNGBR', 'P', M, M, M, -1 ) ) + MAXWRK = M*M + WRKBL + MINWRK = 2*M + N + ELSE IF( WNTVA .AND. WNTUO ) THEN +* +* Path 8t(N much larger than M, JOBU='O', JOBVT='A') +* + WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'CUNGLQ', ' ', N, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+2*M* + $ ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+( M-1 )* + $ ILAENV( 1, 'CUNGBR', 'P', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+M* + $ ILAENV( 1, 'CUNGBR', 'Q', M, M, M, -1 ) ) + MAXWRK = 2*M*M + WRKBL + MINWRK = 2*M + N + ELSE IF( WNTVA .AND. WNTUAS ) THEN +* +* Path 9t(N much larger than M, JOBU='S' or 'A', +* JOBVT='A') +* + WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'CUNGLQ', ' ', N, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+2*M* + $ ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+( M-1 )* + $ ILAENV( 1, 'CUNGBR', 'P', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+M* + $ ILAENV( 1, 'CUNGBR', 'Q', M, M, M, -1 ) ) + MAXWRK = M*M + WRKBL + MINWRK = 2*M + N + END IF + ELSE +* +* Path 10t(N greater than M, but not much larger) +* + MAXWRK = 2*M + ( M+N )*ILAENV( 1, 'CGEBRD', ' ', M, N, + $ -1, -1 ) + IF( WNTVS .OR. WNTVO ) + $ MAXWRK = MAX( MAXWRK, 2*M+M* + $ ILAENV( 1, 'CUNGBR', 'P', M, N, M, -1 ) ) + IF( WNTVA ) + $ MAXWRK = MAX( MAXWRK, 2*M+N* + $ ILAENV( 1, 'CUNGBR', 'P', N, N, M, -1 ) ) + IF( .NOT.WNTUN ) + $ MAXWRK = MAX( MAXWRK, 2*M+( M-1 )* + $ ILAENV( 1, 'CUNGBR', 'Q', M, M, M, -1 ) ) + MINWRK = 2*M + N + END IF + END IF + MAXWRK = MAX( MINWRK, MAXWRK ) + WORK( 1 ) = MAXWRK +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGESVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + RETURN + END IF +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SQRT( SLAMCH( 'S' ) ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = CLANGE( 'M', M, N, A, LDA, DUM ) + ISCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ISCL = 1 + CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR ) + ELSE IF( ANRM.GT.BIGNUM ) THEN + ISCL = 1 + CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR ) + END IF +* + IF( M.GE.N ) THEN +* +* A has at least as many rows as columns. If A has sufficiently +* more rows than columns, first reduce using the QR +* decomposition (if sufficient workspace available) +* + IF( M.GE.MNTHR ) THEN +* + IF( WNTUN ) THEN +* +* Path 1 (M much larger than N, JOBU='N') +* No left singular vectors to be computed +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: need 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Zero out below R +* + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, A( 2, 1 ), + $ LDA ) + IE = 1 + ITAUQ = 1 + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in A +* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( N, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ IERR ) + NCVT = 0 + IF( WNTVO .OR. WNTVAS ) THEN +* +* If right singular vectors desired, generate P'. +* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + NCVT = N + END IF + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of A in A if desired +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, NCVT, 0, 0, S, RWORK( IE ), A, LDA, + $ CDUM, 1, CDUM, 1, RWORK( IRWORK ), INFO ) +* +* If right singular vectors desired in VT, copy them there +* + IF( WNTVAS ) + $ CALL CLACPY( 'F', N, N, A, LDA, VT, LDVT ) +* + ELSE IF( WNTUO .AND. WNTVN ) THEN +* +* Path 2 (M much larger than N, JOBU='O', JOBVT='N') +* N left singular vectors to be overwritten on A and +* no right singular vectors to be computed +* + IF( LWORK.GE.N*N+3*N ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*N ) THEN +* +* WORK(IU) is LDA by N, WORK(IR) is LDA by N +* + LDWRKU = LDA + LDWRKR = LDA + ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+N*N ) THEN +* +* WORK(IU) is LDA by N, WORK(IR) is N by N +* + LDWRKU = LDA + LDWRKR = N + ELSE +* +* WORK(IU) is LDWRKU by N, WORK(IR) is N by N +* + LDWRKU = ( LWORK-N*N ) / N + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IR) and zero out below it +* + CALL CLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR ) + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, + $ WORK( IR+1 ), LDWRKR ) +* +* Generate Q in A +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IR) +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( N, N, WORK( IR ), LDWRKR, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing R +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) +* (RWorkspace: need 0) +* + CALL CUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IR) +* (CWorkspace: need N*N) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, 0, N, 0, S, RWORK( IE ), CDUM, 1, + $ WORK( IR ), LDWRKR, CDUM, 1, + $ RWORK( IRWORK ), INFO ) + IU = ITAUQ +* +* Multiply Q in A by left singular vectors of R in +* WORK(IR), storing result in WORK(IU) and copying to A +* (CWorkspace: need N*N+N, prefer N*N+M*N) +* (RWorkspace: 0) +* + DO 10 I = 1, M, LDWRKU + CHUNK = MIN( M-I+1, LDWRKU ) + CALL CGEMM( 'N', 'N', CHUNK, N, N, CONE, A( I, 1 ), + $ LDA, WORK( IR ), LDWRKR, CZERO, + $ WORK( IU ), LDWRKU ) + CALL CLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU, + $ A( I, 1 ), LDA ) + 10 CONTINUE +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + IE = 1 + ITAUQ = 1 + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize A +* (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB) +* (RWorkspace: N) +* + CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing A +* (CWorkspace: need 3*N, prefer 2*N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in A +* (CWorkspace: need 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, 0, M, 0, S, RWORK( IE ), CDUM, 1, + $ A, LDA, CDUM, 1, RWORK( IRWORK ), INFO ) +* + END IF +* + ELSE IF( WNTUO .AND. WNTVAS ) THEN +* +* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A') +* N left singular vectors to be overwritten on A and +* N right singular vectors to be computed in VT +* + IF( LWORK.GE.N*N+3*N ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by N +* + LDWRKU = LDA + LDWRKR = LDA + ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+N*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is N by N +* + LDWRKU = LDA + LDWRKR = N + ELSE +* +* WORK(IU) is LDWRKU by N and WORK(IR) is N by N +* + LDWRKU = ( LWORK-N*N ) / N + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to VT, zeroing out below it +* + CALL CLACPY( 'U', N, N, A, LDA, VT, LDVT ) + IF( N.GT.1 ) + $ CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, + $ VT( 2, 1 ), LDVT ) +* +* Generate Q in A +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in VT, copying result to WORK(IR) +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( N, N, VT, LDVT, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'L', N, N, VT, LDVT, WORK( IR ), LDWRKR ) +* +* Generate left vectors bidiagonalizing R in WORK(IR) +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing R in VT +* (CWorkspace: need N*N+3*N-1, prefer N*N+2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IR) and computing right +* singular vectors of R in VT +* (CWorkspace: need N*N) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, N, N, 0, S, RWORK( IE ), VT, + $ LDVT, WORK( IR ), LDWRKR, CDUM, 1, + $ RWORK( IRWORK ), INFO ) + IU = ITAUQ +* +* Multiply Q in A by left singular vectors of R in +* WORK(IR), storing result in WORK(IU) and copying to A +* (CWorkspace: need N*N+N, prefer N*N+M*N) +* (RWorkspace: 0) +* + DO 20 I = 1, M, LDWRKU + CHUNK = MIN( M-I+1, LDWRKU ) + CALL CGEMM( 'N', 'N', CHUNK, N, N, CONE, A( I, 1 ), + $ LDA, WORK( IR ), LDWRKR, CZERO, + $ WORK( IU ), LDWRKU ) + CALL CLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU, + $ A( I, 1 ), LDA ) + 20 CONTINUE +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to VT, zeroing out below it +* + CALL CLACPY( 'U', N, N, A, LDA, VT, LDVT ) + IF( N.GT.1 ) + $ CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, + $ VT( 2, 1 ), LDVT ) +* +* Generate Q in A +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in VT +* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) +* (RWorkspace: N) +* + CALL CGEBRD( N, N, VT, LDVT, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in A by left vectors bidiagonalizing R +* (CWorkspace: need 2*N+M, prefer 2*N+M*NB) +* (RWorkspace: 0) +* + CALL CUNMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT, + $ WORK( ITAUQ ), A, LDA, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing R in VT +* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in A and computing right +* singular vectors of A in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), VT, + $ LDVT, A, LDA, CDUM, 1, RWORK( IRWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTUS ) THEN +* + IF( WNTVN ) THEN +* +* Path 4 (M much larger than N, JOBU='S', JOBVT='N') +* N left singular vectors to be computed in U and +* no right singular vectors to be computed +* + IF( LWORK.GE.N*N+3*N ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.WRKBL+LDA*N ) THEN +* +* WORK(IR) is LDA by N +* + LDWRKR = LDA + ELSE +* +* WORK(IR) is N by N +* + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IR), zeroing out below it +* + CALL CLACPY( 'U', N, N, A, LDA, WORK( IR ), + $ LDWRKR ) + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, + $ WORK( IR+1 ), LDWRKR ) +* +* Generate Q in A +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IR) +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( N, N, WORK( IR ), LDWRKR, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing R in WORK(IR) +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IR) +* (CWorkspace: need N*N) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, 0, N, 0, S, RWORK( IE ), CDUM, + $ 1, WORK( IR ), LDWRKR, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* +* Multiply Q in A by left singular vectors of R in +* WORK(IR), storing result in U +* (CWorkspace: need N*N) +* (RWorkspace: 0) +* + CALL CGEMM( 'N', 'N', M, N, N, CONE, A, LDA, + $ WORK( IR ), LDWRKR, CZERO, U, LDU ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGQR( M, N, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Zero out below R in A +* + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, + $ A( 2, 1 ), LDA ) +* +* Bidiagonalize R in A +* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( N, N, A, LDA, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left vectors bidiagonalizing R +* (CWorkspace: need 2*N+M, prefer 2*N+M*NB) +* (RWorkspace: 0) +* + CALL CUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, 0, M, 0, S, RWORK( IE ), CDUM, + $ 1, U, LDU, CDUM, 1, RWORK( IRWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTVO ) THEN +* +* Path 5 (M much larger than N, JOBU='S', JOBVT='O') +* N left singular vectors to be computed in U and +* N right singular vectors to be overwritten on A +* + IF( LWORK.GE.2*N*N+3*N ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+2*LDA*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by N +* + LDWRKU = LDA + IR = IU + LDWRKU*N + LDWRKR = LDA + ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is N by N +* + LDWRKU = LDA + IR = IU + LDWRKU*N + LDWRKR = N + ELSE +* +* WORK(IU) is N by N and WORK(IR) is N by N +* + LDWRKU = N + IR = IU + LDWRKU*N + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IU), zeroing out below it +* + CALL CLACPY( 'U', N, N, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, + $ WORK( IU+1 ), LDWRKU ) +* +* Generate Q in A +* (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IU), copying result to +* WORK(IR) +* (CWorkspace: need 2*N*N+3*N, +* prefer 2*N*N+2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( N, N, WORK( IU ), LDWRKU, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL CLACPY( 'U', N, N, WORK( IU ), LDWRKU, + $ WORK( IR ), LDWRKR ) +* +* Generate left bidiagonalizing vectors in WORK(IU) +* (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in WORK(IR) +* (CWorkspace: need 2*N*N+3*N-1, +* prefer 2*N*N+2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IU) and computing +* right singular vectors of R in WORK(IR) +* (CWorkspace: need 2*N*N) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, N, N, 0, S, RWORK( IE ), + $ WORK( IR ), LDWRKR, WORK( IU ), + $ LDWRKU, CDUM, 1, RWORK( IRWORK ), + $ INFO ) +* +* Multiply Q in A by left singular vectors of R in +* WORK(IU), storing result in U +* (CWorkspace: need N*N) +* (RWorkspace: 0) +* + CALL CGEMM( 'N', 'N', M, N, N, CONE, A, LDA, + $ WORK( IU ), LDWRKU, CZERO, U, LDU ) +* +* Copy right singular vectors of R to A +* (CWorkspace: need N*N) +* (RWorkspace: 0) +* + CALL CLACPY( 'F', N, N, WORK( IR ), LDWRKR, A, + $ LDA ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGQR( M, N, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Zero out below R in A +* + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, + $ A( 2, 1 ), LDA ) +* +* Bidiagonalize R in A +* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( N, N, A, LDA, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left vectors bidiagonalizing R +* (CWorkspace: need 2*N+M, prefer 2*N+M*NB) +* (RWorkspace: 0) +* + CALL CUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing R in A +* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in A +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), A, + $ LDA, U, LDU, CDUM, 1, RWORK( IRWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTVAS ) THEN +* +* Path 6 (M much larger than N, JOBU='S', JOBVT='S' +* or 'A') +* N left singular vectors to be computed in U and +* N right singular vectors to be computed in VT +* + IF( LWORK.GE.N*N+3*N ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+LDA*N ) THEN +* +* WORK(IU) is LDA by N +* + LDWRKU = LDA + ELSE +* +* WORK(IU) is N by N +* + LDWRKU = N + END IF + ITAU = IU + LDWRKU*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IU), zeroing out below it +* + CALL CLACPY( 'U', N, N, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, + $ WORK( IU+1 ), LDWRKU ) +* +* Generate Q in A +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IU), copying result to VT +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( N, N, WORK( IU ), LDWRKU, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL CLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT, + $ LDVT ) +* +* Generate left bidiagonalizing vectors in WORK(IU) +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in VT +* (CWorkspace: need N*N+3*N-1, +* prefer N*N+2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IU) and computing +* right singular vectors of R in VT +* (CWorkspace: need N*N) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, N, N, 0, S, RWORK( IE ), VT, + $ LDVT, WORK( IU ), LDWRKU, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* +* Multiply Q in A by left singular vectors of R in +* WORK(IU), storing result in U +* (CWorkspace: need N*N) +* (RWorkspace: 0) +* + CALL CGEMM( 'N', 'N', M, N, N, CONE, A, LDA, + $ WORK( IU ), LDWRKU, CZERO, U, LDU ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGQR( M, N, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to VT, zeroing out below it +* + CALL CLACPY( 'U', N, N, A, LDA, VT, LDVT ) + IF( N.GT.1 ) + $ CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, + $ VT( 2, 1 ), LDVT ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in VT +* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( N, N, VT, LDVT, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left bidiagonalizing vectors +* in VT +* (CWorkspace: need 2*N+M, prefer 2*N+M*NB) +* (RWorkspace: 0) +* + CALL CUNMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in VT +* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), VT, + $ LDVT, U, LDU, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* + END IF +* + END IF +* + ELSE IF( WNTUA ) THEN +* + IF( WNTVN ) THEN +* +* Path 7 (M much larger than N, JOBU='A', JOBVT='N') +* M left singular vectors to be computed in U and +* no right singular vectors to be computed +* + IF( LWORK.GE.N*N+MAX( N+M, 3*N ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.WRKBL+LDA*N ) THEN +* +* WORK(IR) is LDA by N +* + LDWRKR = LDA + ELSE +* +* WORK(IR) is N by N +* + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Copy R to WORK(IR), zeroing out below it +* + CALL CLACPY( 'U', N, N, A, LDA, WORK( IR ), + $ LDWRKR ) + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, + $ WORK( IR+1 ), LDWRKR ) +* +* Generate Q in U +* (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB) +* (RWorkspace: 0) +* + CALL CUNGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IR) +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( N, N, WORK( IR ), LDWRKR, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in WORK(IR) +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IR) +* (CWorkspace: need N*N) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, 0, N, 0, S, RWORK( IE ), CDUM, + $ 1, WORK( IR ), LDWRKR, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* +* Multiply Q in U by left singular vectors of R in +* WORK(IR), storing result in A +* (CWorkspace: need N*N) +* (RWorkspace: 0) +* + CALL CGEMM( 'N', 'N', M, N, N, CONE, U, LDU, + $ WORK( IR ), LDWRKR, CZERO, A, LDA ) +* +* Copy left singular vectors of A from A to U +* + CALL CLACPY( 'F', M, N, A, LDA, U, LDU ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (CWorkspace: need N+M, prefer N+M*NB) +* (RWorkspace: 0) +* + CALL CUNGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Zero out below R in A +* + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, + $ A( 2, 1 ), LDA ) +* +* Bidiagonalize R in A +* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( N, N, A, LDA, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left bidiagonalizing vectors +* in A +* (CWorkspace: need 2*N+M, prefer 2*N+M*NB) +* (RWorkspace: 0) +* + CALL CUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, 0, M, 0, S, RWORK( IE ), CDUM, + $ 1, U, LDU, CDUM, 1, RWORK( IRWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTVO ) THEN +* +* Path 8 (M much larger than N, JOBU='A', JOBVT='O') +* M left singular vectors to be computed in U and +* N right singular vectors to be overwritten on A +* + IF( LWORK.GE.2*N*N+MAX( N+M, 3*N ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+2*LDA*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by N +* + LDWRKU = LDA + IR = IU + LDWRKU*N + LDWRKR = LDA + ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is N by N +* + LDWRKU = LDA + IR = IU + LDWRKU*N + LDWRKR = N + ELSE +* +* WORK(IU) is N by N and WORK(IR) is N by N +* + LDWRKU = N + IR = IU + LDWRKU*N + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (CWorkspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB) +* (RWorkspace: 0) +* + CALL CUNGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IU), zeroing out below it +* + CALL CLACPY( 'U', N, N, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, + $ WORK( IU+1 ), LDWRKU ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IU), copying result to +* WORK(IR) +* (CWorkspace: need 2*N*N+3*N, +* prefer 2*N*N+2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( N, N, WORK( IU ), LDWRKU, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL CLACPY( 'U', N, N, WORK( IU ), LDWRKU, + $ WORK( IR ), LDWRKR ) +* +* Generate left bidiagonalizing vectors in WORK(IU) +* (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in WORK(IR) +* (CWorkspace: need 2*N*N+3*N-1, +* prefer 2*N*N+2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IU) and computing +* right singular vectors of R in WORK(IR) +* (CWorkspace: need 2*N*N) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, N, N, 0, S, RWORK( IE ), + $ WORK( IR ), LDWRKR, WORK( IU ), + $ LDWRKU, CDUM, 1, RWORK( IRWORK ), + $ INFO ) +* +* Multiply Q in U by left singular vectors of R in +* WORK(IU), storing result in A +* (CWorkspace: need N*N) +* (RWorkspace: 0) +* + CALL CGEMM( 'N', 'N', M, N, N, CONE, U, LDU, + $ WORK( IU ), LDWRKU, CZERO, A, LDA ) +* +* Copy left singular vectors of A from A to U +* + CALL CLACPY( 'F', M, N, A, LDA, U, LDU ) +* +* Copy right singular vectors of R from WORK(IR) to A +* + CALL CLACPY( 'F', N, N, WORK( IR ), LDWRKR, A, + $ LDA ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (CWorkspace: need N+M, prefer N+M*NB) +* (RWorkspace: 0) +* + CALL CUNGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Zero out below R in A +* + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, + $ A( 2, 1 ), LDA ) +* +* Bidiagonalize R in A +* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( N, N, A, LDA, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left bidiagonalizing vectors +* in A +* (CWorkspace: need 2*N+M, prefer 2*N+M*NB) +* (RWorkspace: 0) +* + CALL CUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in A +* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in A +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), A, + $ LDA, U, LDU, CDUM, 1, RWORK( IRWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTVAS ) THEN +* +* Path 9 (M much larger than N, JOBU='A', JOBVT='S' +* or 'A') +* M left singular vectors to be computed in U and +* N right singular vectors to be computed in VT +* + IF( LWORK.GE.N*N+MAX( N+M, 3*N ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+LDA*N ) THEN +* +* WORK(IU) is LDA by N +* + LDWRKU = LDA + ELSE +* +* WORK(IU) is N by N +* + LDWRKU = N + END IF + ITAU = IU + LDWRKU*N + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB) +* (RWorkspace: 0) +* + CALL CUNGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IU), zeroing out below it +* + CALL CLACPY( 'U', N, N, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, + $ WORK( IU+1 ), LDWRKU ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IU), copying result to VT +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( N, N, WORK( IU ), LDWRKU, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL CLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT, + $ LDVT ) +* +* Generate left bidiagonalizing vectors in WORK(IU) +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in VT +* (CWorkspace: need N*N+3*N-1, +* prefer N*N+2*N+(N-1)*NB) +* (RWorkspace: need 0) +* + CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IU) and computing +* right singular vectors of R in VT +* (CWorkspace: need N*N) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, N, N, 0, S, RWORK( IE ), VT, + $ LDVT, WORK( IU ), LDWRKU, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* +* Multiply Q in U by left singular vectors of R in +* WORK(IU), storing result in A +* (CWorkspace: need N*N) +* (RWorkspace: 0) +* + CALL CGEMM( 'N', 'N', M, N, N, CONE, U, LDU, + $ WORK( IU ), LDWRKU, CZERO, A, LDA ) +* +* Copy left singular vectors of A from A to U +* + CALL CLACPY( 'F', M, N, A, LDA, U, LDU ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (CWorkspace: need N+M, prefer N+M*NB) +* (RWorkspace: 0) +* + CALL CUNGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R from A to VT, zeroing out below it +* + CALL CLACPY( 'U', N, N, A, LDA, VT, LDVT ) + IF( N.GT.1 ) + $ CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, + $ VT( 2, 1 ), LDVT ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in VT +* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( N, N, VT, LDVT, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left bidiagonalizing vectors +* in VT +* (CWorkspace: need 2*N+M, prefer 2*N+M*NB) +* (RWorkspace: 0) +* + CALL CUNMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in VT +* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), VT, + $ LDVT, U, LDU, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* + END IF +* + END IF +* + END IF +* + ELSE +* +* M .LT. MNTHR +* +* Path 10 (M at least N, but not much larger) +* Reduce to bidiagonal form without QR decomposition +* + IE = 1 + ITAUQ = 1 + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize A +* (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ IERR ) + IF( WNTUAS ) THEN +* +* If left singular vectors desired in U, copy result to U +* and generate left bidiagonalizing vectors in U +* (CWorkspace: need 2*N+NCU, prefer 2*N+NCU*NB) +* (RWorkspace: 0) +* + CALL CLACPY( 'L', M, N, A, LDA, U, LDU ) + IF( WNTUS ) + $ NCU = N + IF( WNTUA ) + $ NCU = M + CALL CUNGBR( 'Q', M, NCU, N, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTVAS ) THEN +* +* If right singular vectors desired in VT, copy result to +* VT and generate right bidiagonalizing vectors in VT +* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL CLACPY( 'U', N, N, A, LDA, VT, LDVT ) + CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTUO ) THEN +* +* If left singular vectors desired in A, generate left +* bidiagonalizing vectors in A +* (CWorkspace: need 3*N, prefer 2*N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTVO ) THEN +* +* If right singular vectors desired in A, generate right +* bidiagonalizing vectors in A +* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IRWORK = IE + N + IF( WNTUAS .OR. WNTUO ) + $ NRU = M + IF( WNTUN ) + $ NRU = 0 + IF( WNTVAS .OR. WNTVO ) + $ NCVT = N + IF( WNTVN ) + $ NCVT = 0 + IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in U and computing right singular +* vectors in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, NCVT, NRU, 0, S, RWORK( IE ), VT, + $ LDVT, U, LDU, CDUM, 1, RWORK( IRWORK ), + $ INFO ) + ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in U and computing right singular +* vectors in A +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, NCVT, NRU, 0, S, RWORK( IE ), A, + $ LDA, U, LDU, CDUM, 1, RWORK( IRWORK ), + $ INFO ) + ELSE +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in A and computing right singular +* vectors in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, NCVT, NRU, 0, S, RWORK( IE ), VT, + $ LDVT, A, LDA, CDUM, 1, RWORK( IRWORK ), + $ INFO ) + END IF +* + END IF +* + ELSE +* +* A has more columns than rows. If A has sufficiently more +* columns than rows, first reduce using the LQ decomposition (if +* sufficient workspace available) +* + IF( N.GE.MNTHR ) THEN +* + IF( WNTVN ) THEN +* +* Path 1t(N much larger than M, JOBVT='N') +* No right singular vectors to be computed +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Zero out above L +* + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, A( 1, 2 ), + $ LDA ) + IE = 1 + ITAUQ = 1 + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in A +* (CWorkspace: need 3*M, prefer 2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ IERR ) + IF( WNTUO .OR. WNTUAS ) THEN +* +* If left singular vectors desired, generate Q +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IRWORK = IE + M + NRU = 0 + IF( WNTUO .OR. WNTUAS ) + $ NRU = M +* +* Perform bidiagonal QR iteration, computing left singular +* vectors of A in A if desired +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', M, 0, NRU, 0, S, RWORK( IE ), CDUM, 1, + $ A, LDA, CDUM, 1, RWORK( IRWORK ), INFO ) +* +* If left singular vectors desired in U, copy them there +* + IF( WNTUAS ) + $ CALL CLACPY( 'F', M, M, A, LDA, U, LDU ) +* + ELSE IF( WNTVO .AND. WNTUN ) THEN +* +* Path 2t(N much larger than M, JOBU='N', JOBVT='O') +* M right singular vectors to be overwritten on A and +* no left singular vectors to be computed +* + IF( LWORK.GE.M*M+3*M ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*M ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by M +* + LDWRKU = LDA + CHUNK = N + LDWRKR = LDA + ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+M*M ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is M by M +* + LDWRKU = LDA + CHUNK = N + LDWRKR = M + ELSE +* +* WORK(IU) is M by CHUNK and WORK(IR) is M by M +* + LDWRKU = M + CHUNK = ( LWORK-M*M ) / M + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IR) and zero out above it +* + CALL CLACPY( 'L', M, M, A, LDA, WORK( IR ), LDWRKR ) + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, + $ WORK( IR+LDWRKR ), LDWRKR ) +* +* Generate Q in A +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IR) +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, WORK( IR ), LDWRKR, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing L +* (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of L in WORK(IR) +* (CWorkspace: need M*M) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', M, M, 0, 0, S, RWORK( IE ), + $ WORK( IR ), LDWRKR, CDUM, 1, CDUM, 1, + $ RWORK( IRWORK ), INFO ) + IU = ITAUQ +* +* Multiply right singular vectors of L in WORK(IR) by Q +* in A, storing result in WORK(IU) and copying to A +* (CWorkspace: need M*M+M, prefer M*M+M*N) +* (RWorkspace: 0) +* + DO 30 I = 1, N, CHUNK + BLK = MIN( N-I+1, CHUNK ) + CALL CGEMM( 'N', 'N', M, BLK, M, CONE, WORK( IR ), + $ LDWRKR, A( 1, I ), LDA, CZERO, + $ WORK( IU ), LDWRKU ) + CALL CLACPY( 'F', M, BLK, WORK( IU ), LDWRKU, + $ A( 1, I ), LDA ) + 30 CONTINUE +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + IE = 1 + ITAUQ = 1 + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize A +* (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing A +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of A in A +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'L', M, N, 0, 0, S, RWORK( IE ), A, LDA, + $ CDUM, 1, CDUM, 1, RWORK( IRWORK ), INFO ) +* + END IF +* + ELSE IF( WNTVO .AND. WNTUAS ) THEN +* +* Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O') +* M right singular vectors to be overwritten on A and +* M left singular vectors to be computed in U +* + IF( LWORK.GE.M*M+3*M ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*M ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by M +* + LDWRKU = LDA + CHUNK = N + LDWRKR = LDA + ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+M*M ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is M by M +* + LDWRKU = LDA + CHUNK = N + LDWRKR = M + ELSE +* +* WORK(IU) is M by CHUNK and WORK(IR) is M by M +* + LDWRKU = M + CHUNK = ( LWORK-M*M ) / M + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to U, zeroing about above it +* + CALL CLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, U( 1, 2 ), + $ LDU ) +* +* Generate Q in A +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in U, copying result to WORK(IR) +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, U, LDU, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'U', M, M, U, LDU, WORK( IR ), LDWRKR ) +* +* Generate right vectors bidiagonalizing L in WORK(IR) +* (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing L in U +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in U, and computing right +* singular vectors of L in WORK(IR) +* (CWorkspace: need M*M) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', M, M, M, 0, S, RWORK( IE ), + $ WORK( IR ), LDWRKR, U, LDU, CDUM, 1, + $ RWORK( IRWORK ), INFO ) + IU = ITAUQ +* +* Multiply right singular vectors of L in WORK(IR) by Q +* in A, storing result in WORK(IU) and copying to A +* (CWorkspace: need M*M+M, prefer M*M+M*N)) +* (RWorkspace: 0) +* + DO 40 I = 1, N, CHUNK + BLK = MIN( N-I+1, CHUNK ) + CALL CGEMM( 'N', 'N', M, BLK, M, CONE, WORK( IR ), + $ LDWRKR, A( 1, I ), LDA, CZERO, + $ WORK( IU ), LDWRKU ) + CALL CLACPY( 'F', M, BLK, WORK( IU ), LDWRKU, + $ A( 1, I ), LDA ) + 40 CONTINUE +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to U, zeroing out above it +* + CALL CLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, U( 1, 2 ), + $ LDU ) +* +* Generate Q in A +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in U +* (CWorkspace: need 3*M, prefer 2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, U, LDU, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right vectors bidiagonalizing L by Q in A +* (CWorkspace: need 2*M+N, prefer 2*M+N*NB) +* (RWorkspace: 0) +* + CALL CUNMBR( 'P', 'L', 'C', M, N, M, U, LDU, + $ WORK( ITAUP ), A, LDA, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing L in U +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in A +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), A, LDA, + $ U, LDU, CDUM, 1, RWORK( IRWORK ), INFO ) +* + END IF +* + ELSE IF( WNTVS ) THEN +* + IF( WNTUN ) THEN +* +* Path 4t(N much larger than M, JOBU='N', JOBVT='S') +* M right singular vectors to be computed in VT and +* no left singular vectors to be computed +* + IF( LWORK.GE.M*M+3*M ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.WRKBL+LDA*M ) THEN +* +* WORK(IR) is LDA by M +* + LDWRKR = LDA + ELSE +* +* WORK(IR) is M by M +* + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IR), zeroing out above it +* + CALL CLACPY( 'L', M, M, A, LDA, WORK( IR ), + $ LDWRKR ) + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, + $ WORK( IR+LDWRKR ), LDWRKR ) +* +* Generate Q in A +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IR) +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, WORK( IR ), LDWRKR, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing L in +* WORK(IR) +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of L in WORK(IR) +* (CWorkspace: need M*M) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', M, M, 0, 0, S, RWORK( IE ), + $ WORK( IR ), LDWRKR, CDUM, 1, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IR) by +* Q in A, storing result in VT +* (CWorkspace: need M*M) +* (RWorkspace: 0) +* + CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IR ), + $ LDWRKR, A, LDA, CZERO, VT, LDVT ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy result to VT +* + CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGLQ( M, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Zero out above L in A +* + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, + $ A( 1, 2 ), LDA ) +* +* Bidiagonalize L in A +* (CWorkspace: need 3*M, prefer 2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, A, LDA, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right vectors bidiagonalizing L by Q in VT +* (CWorkspace: need 2*M+N, prefer 2*M+N*NB) +* (RWorkspace: 0) +* + CALL CUNMBR( 'P', 'L', 'C', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of A in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', M, N, 0, 0, S, RWORK( IE ), VT, + $ LDVT, CDUM, 1, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* + END IF +* + ELSE IF( WNTUO ) THEN +* +* Path 5t(N much larger than M, JOBU='O', JOBVT='S') +* M right singular vectors to be computed in VT and +* M left singular vectors to be overwritten on A +* + IF( LWORK.GE.2*M*M+3*M ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+2*LDA*M ) THEN +* +* WORK(IU) is LDA by M and WORK(IR) is LDA by M +* + LDWRKU = LDA + IR = IU + LDWRKU*M + LDWRKR = LDA + ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN +* +* WORK(IU) is LDA by M and WORK(IR) is M by M +* + LDWRKU = LDA + IR = IU + LDWRKU*M + LDWRKR = M + ELSE +* +* WORK(IU) is M by M and WORK(IR) is M by M +* + LDWRKU = M + IR = IU + LDWRKU*M + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IU), zeroing out below it +* + CALL CLACPY( 'L', M, M, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, + $ WORK( IU+LDWRKU ), LDWRKU ) +* +* Generate Q in A +* (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IU), copying result to +* WORK(IR) +* (CWorkspace: need 2*M*M+3*M, +* prefer 2*M*M+2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, WORK( IU ), LDWRKU, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL CLACPY( 'L', M, M, WORK( IU ), LDWRKU, + $ WORK( IR ), LDWRKR ) +* +* Generate right bidiagonalizing vectors in WORK(IU) +* (CWorkspace: need 2*M*M+3*M-1, +* prefer 2*M*M+2*M+(M-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in WORK(IR) +* (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in WORK(IR) and computing +* right singular vectors of L in WORK(IU) +* (CWorkspace: need 2*M*M) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', M, M, M, 0, S, RWORK( IE ), + $ WORK( IU ), LDWRKU, WORK( IR ), + $ LDWRKR, CDUM, 1, RWORK( IRWORK ), + $ INFO ) +* +* Multiply right singular vectors of L in WORK(IU) by +* Q in A, storing result in VT +* (CWorkspace: need M*M) +* (RWorkspace: 0) +* + CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ), + $ LDWRKU, A, LDA, CZERO, VT, LDVT ) +* +* Copy left singular vectors of L to A +* (CWorkspace: need M*M) +* (RWorkspace: 0) +* + CALL CLACPY( 'F', M, M, WORK( IR ), LDWRKR, A, + $ LDA ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGLQ( M, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Zero out above L in A +* + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, + $ A( 1, 2 ), LDA ) +* +* Bidiagonalize L in A +* (CWorkspace: need 3*M, prefer 2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, A, LDA, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right vectors bidiagonalizing L by Q in VT +* (CWorkspace: need 2*M+N, prefer 2*M+N*NB) +* (RWorkspace: 0) +* + CALL CUNMBR( 'P', 'L', 'C', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors of L in A +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in A and computing right +* singular vectors of A in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT, + $ LDVT, A, LDA, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* + END IF +* + ELSE IF( WNTUAS ) THEN +* +* Path 6t(N much larger than M, JOBU='S' or 'A', +* JOBVT='S') +* M right singular vectors to be computed in VT and +* M left singular vectors to be computed in U +* + IF( LWORK.GE.M*M+3*M ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+LDA*M ) THEN +* +* WORK(IU) is LDA by N +* + LDWRKU = LDA + ELSE +* +* WORK(IU) is LDA by M +* + LDWRKU = M + END IF + ITAU = IU + LDWRKU*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IU), zeroing out above it +* + CALL CLACPY( 'L', M, M, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, + $ WORK( IU+LDWRKU ), LDWRKU ) +* +* Generate Q in A +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IU), copying result to U +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, WORK( IU ), LDWRKU, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL CLACPY( 'L', M, M, WORK( IU ), LDWRKU, U, + $ LDU ) +* +* Generate right bidiagonalizing vectors in WORK(IU) +* (CWorkspace: need M*M+3*M-1, +* prefer M*M+2*M+(M-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in U +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in U and computing right +* singular vectors of L in WORK(IU) +* (CWorkspace: need M*M) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', M, M, M, 0, S, RWORK( IE ), + $ WORK( IU ), LDWRKU, U, LDU, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IU) by +* Q in A, storing result in VT +* (CWorkspace: need M*M) +* (RWorkspace: 0) +* + CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ), + $ LDWRKU, A, LDA, CZERO, VT, LDVT ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGLQ( M, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to U, zeroing out above it +* + CALL CLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, + $ U( 1, 2 ), LDU ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in U +* (CWorkspace: need 3*M, prefer 2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, U, LDU, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right bidiagonalizing vectors in U by Q +* in VT +* (CWorkspace: need 2*M+N, prefer 2*M+N*NB) +* (RWorkspace: 0) +* + CALL CUNMBR( 'P', 'L', 'C', M, N, M, U, LDU, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in U +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT, + $ LDVT, U, LDU, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* + END IF +* + END IF +* + ELSE IF( WNTVA ) THEN +* + IF( WNTUN ) THEN +* +* Path 7t(N much larger than M, JOBU='N', JOBVT='A') +* N right singular vectors to be computed in VT and +* no left singular vectors to be computed +* + IF( LWORK.GE.M*M+MAX( N+M, 3*M ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.WRKBL+LDA*M ) THEN +* +* WORK(IR) is LDA by M +* + LDWRKR = LDA + ELSE +* +* WORK(IR) is M by M +* + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Copy L to WORK(IR), zeroing out above it +* + CALL CLACPY( 'L', M, M, A, LDA, WORK( IR ), + $ LDWRKR ) + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, + $ WORK( IR+LDWRKR ), LDWRKR ) +* +* Generate Q in VT +* (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB) +* (RWorkspace: 0) +* + CALL CUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IR) +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, WORK( IR ), LDWRKR, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in WORK(IR) +* (CWorkspace: need M*M+3*M-1, +* prefer M*M+2*M+(M-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of L in WORK(IR) +* (CWorkspace: need M*M) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', M, M, 0, 0, S, RWORK( IE ), + $ WORK( IR ), LDWRKR, CDUM, 1, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IR) by +* Q in VT, storing result in A +* (CWorkspace: need M*M) +* (RWorkspace: 0) +* + CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IR ), + $ LDWRKR, VT, LDVT, CZERO, A, LDA ) +* +* Copy right singular vectors of A from A to VT +* + CALL CLACPY( 'F', M, N, A, LDA, VT, LDVT ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (CWorkspace: need M+N, prefer M+N*NB) +* (RWorkspace: 0) +* + CALL CUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Zero out above L in A +* + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, + $ A( 1, 2 ), LDA ) +* +* Bidiagonalize L in A +* (CWorkspace: need 3*M, prefer 2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, A, LDA, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right bidiagonalizing vectors in A by Q +* in VT +* (CWorkspace: need 2*M+N, prefer 2*M+N*NB) +* (RWorkspace: 0) +* + CALL CUNMBR( 'P', 'L', 'C', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of A in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', M, N, 0, 0, S, RWORK( IE ), VT, + $ LDVT, CDUM, 1, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* + END IF +* + ELSE IF( WNTUO ) THEN +* +* Path 8t(N much larger than M, JOBU='O', JOBVT='A') +* N right singular vectors to be computed in VT and +* M left singular vectors to be overwritten on A +* + IF( LWORK.GE.2*M*M+MAX( N+M, 3*M ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+2*LDA*M ) THEN +* +* WORK(IU) is LDA by M and WORK(IR) is LDA by M +* + LDWRKU = LDA + IR = IU + LDWRKU*M + LDWRKR = LDA + ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN +* +* WORK(IU) is LDA by M and WORK(IR) is M by M +* + LDWRKU = LDA + IR = IU + LDWRKU*M + LDWRKR = M + ELSE +* +* WORK(IU) is M by M and WORK(IR) is M by M +* + LDWRKU = M + IR = IU + LDWRKU*M + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (CWorkspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB) +* (RWorkspace: 0) +* + CALL CUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IU), zeroing out above it +* + CALL CLACPY( 'L', M, M, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, + $ WORK( IU+LDWRKU ), LDWRKU ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IU), copying result to +* WORK(IR) +* (CWorkspace: need 2*M*M+3*M, +* prefer 2*M*M+2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, WORK( IU ), LDWRKU, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL CLACPY( 'L', M, M, WORK( IU ), LDWRKU, + $ WORK( IR ), LDWRKR ) +* +* Generate right bidiagonalizing vectors in WORK(IU) +* (CWorkspace: need 2*M*M+3*M-1, +* prefer 2*M*M+2*M+(M-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in WORK(IR) +* (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in WORK(IR) and computing +* right singular vectors of L in WORK(IU) +* (CWorkspace: need 2*M*M) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', M, M, M, 0, S, RWORK( IE ), + $ WORK( IU ), LDWRKU, WORK( IR ), + $ LDWRKR, CDUM, 1, RWORK( IRWORK ), + $ INFO ) +* +* Multiply right singular vectors of L in WORK(IU) by +* Q in VT, storing result in A +* (CWorkspace: need M*M) +* (RWorkspace: 0) +* + CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ), + $ LDWRKU, VT, LDVT, CZERO, A, LDA ) +* +* Copy right singular vectors of A from A to VT +* + CALL CLACPY( 'F', M, N, A, LDA, VT, LDVT ) +* +* Copy left singular vectors of A from WORK(IR) to A +* + CALL CLACPY( 'F', M, M, WORK( IR ), LDWRKR, A, + $ LDA ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (CWorkspace: need M+N, prefer M+N*NB) +* (RWorkspace: 0) +* + CALL CUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Zero out above L in A +* + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, + $ A( 1, 2 ), LDA ) +* +* Bidiagonalize L in A +* (CWorkspace: need 3*M, prefer 2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, A, LDA, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right bidiagonalizing vectors in A by Q +* in VT +* (CWorkspace: need 2*M+N, prefer 2*M+N*NB) +* (RWorkspace: 0) +* + CALL CUNMBR( 'P', 'L', 'C', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in A +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in A and computing right +* singular vectors of A in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT, + $ LDVT, A, LDA, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* + END IF +* + ELSE IF( WNTUAS ) THEN +* +* Path 9t(N much larger than M, JOBU='S' or 'A', +* JOBVT='A') +* N right singular vectors to be computed in VT and +* M left singular vectors to be computed in U +* + IF( LWORK.GE.M*M+MAX( N+M, 3*M ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+LDA*M ) THEN +* +* WORK(IU) is LDA by M +* + LDWRKU = LDA + ELSE +* +* WORK(IU) is M by M +* + LDWRKU = M + END IF + ITAU = IU + LDWRKU*M + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB) +* (RWorkspace: 0) +* + CALL CUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IU), zeroing out above it +* + CALL CLACPY( 'L', M, M, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, + $ WORK( IU+LDWRKU ), LDWRKU ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IU), copying result to U +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, WORK( IU ), LDWRKU, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL CLACPY( 'L', M, M, WORK( IU ), LDWRKU, U, + $ LDU ) +* +* Generate right bidiagonalizing vectors in WORK(IU) +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in U +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in U and computing right +* singular vectors of L in WORK(IU) +* (CWorkspace: need M*M) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', M, M, M, 0, S, RWORK( IE ), + $ WORK( IU ), LDWRKU, U, LDU, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IU) by +* Q in VT, storing result in A +* (CWorkspace: need M*M) +* (RWorkspace: 0) +* + CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ), + $ LDWRKU, VT, LDVT, CZERO, A, LDA ) +* +* Copy right singular vectors of A from A to VT +* + CALL CLACPY( 'F', M, N, A, LDA, VT, LDVT ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (CWorkspace: need M+N, prefer M+N*NB) +* (RWorkspace: 0) +* + CALL CUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to U, zeroing out above it +* + CALL CLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, + $ U( 1, 2 ), LDU ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in U +* (CWorkspace: need 3*M, prefer 2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, U, LDU, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right bidiagonalizing vectors in U by Q +* in VT +* (CWorkspace: need 2*M+N, prefer 2*M+N*NB) +* (RWorkspace: 0) +* + CALL CUNMBR( 'P', 'L', 'C', M, N, M, U, LDU, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in U +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT, + $ LDVT, U, LDU, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* + END IF +* + END IF +* + END IF +* + ELSE +* +* N .LT. MNTHR +* +* Path 10t(N greater than M, but not much larger) +* Reduce to bidiagonal form without LQ decomposition +* + IE = 1 + ITAUQ = 1 + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize A +* (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB) +* (RWorkspace: M) +* + CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ IERR ) + IF( WNTUAS ) THEN +* +* If left singular vectors desired in U, copy result to U +* and generate left bidiagonalizing vectors in U +* (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB) +* (RWorkspace: 0) +* + CALL CLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL CUNGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTVAS ) THEN +* +* If right singular vectors desired in VT, copy result to +* VT and generate right bidiagonalizing vectors in VT +* (CWorkspace: need 2*M+NRVT, prefer 2*M+NRVT*NB) +* (RWorkspace: 0) +* + CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT ) + IF( WNTVA ) + $ NRVT = N + IF( WNTVS ) + $ NRVT = M + CALL CUNGBR( 'P', NRVT, N, M, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTUO ) THEN +* +* If left singular vectors desired in A, generate left +* bidiagonalizing vectors in A +* (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', M, M, N, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTVO ) THEN +* +* If right singular vectors desired in A, generate right +* bidiagonalizing vectors in A +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IRWORK = IE + M + IF( WNTUAS .OR. WNTUO ) + $ NRU = M + IF( WNTUN ) + $ NRU = 0 + IF( WNTVAS .OR. WNTVO ) + $ NCVT = N + IF( WNTVN ) + $ NCVT = 0 + IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in U and computing right singular +* vectors in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'L', M, NCVT, NRU, 0, S, RWORK( IE ), VT, + $ LDVT, U, LDU, CDUM, 1, RWORK( IRWORK ), + $ INFO ) + ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in U and computing right singular +* vectors in A +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'L', M, NCVT, NRU, 0, S, RWORK( IE ), A, + $ LDA, U, LDU, CDUM, 1, RWORK( IRWORK ), + $ INFO ) + ELSE +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in A and computing right singular +* vectors in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'L', M, NCVT, NRU, 0, S, RWORK( IE ), VT, + $ LDVT, A, LDA, CDUM, 1, RWORK( IRWORK ), + $ INFO ) + END IF +* + END IF +* + END IF +* +* Undo scaling if necessary +* + IF( ISCL.EQ.1 ) THEN + IF( ANRM.GT.BIGNUM ) + $ CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN, + $ IERR ) + IF( INFO.NE.0 .AND. ANRM.GT.BIGNUM ) + $ CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN-1, 1, + $ RWORK( IE ), MINMN, IERR ) + IF( ANRM.LT.SMLNUM ) + $ CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN, + $ IERR ) + IF( INFO.NE.0 .AND. ANRM.LT.SMLNUM ) + $ CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN-1, 1, + $ RWORK( IE ), MINMN, IERR ) + END IF +* +* Return optimal workspace in WORK(1) +* + WORK( 1 ) = MAXWRK +* + RETURN +* +* End of CGESVD +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cgetf2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cgetf2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,148 @@ + SUBROUTINE CGETF2( M, N, A, LDA, IPIV, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* CGETF2 computes an LU factorization of a general m-by-n matrix A +* using partial pivoting with row interchanges. +* +* The factorization has the form +* A = P * L * U +* where P is a permutation matrix, L is lower triangular with unit +* diagonal elements (lower trapezoidal if m > n), and U is upper +* triangular (upper trapezoidal if m < n). +* +* This is the right-looking Level 2 BLAS version of the algorithm. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the m by n matrix to be factored. +* On exit, the factors L and U from the factorization +* A = P*L*U; the unit diagonal elements of L are not stored. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* IPIV (output) INTEGER array, dimension (min(M,N)) +* The pivot indices; for 1 <= i <= min(M,N), row i of the +* matrix was interchanged with row IPIV(i). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* > 0: if INFO = k, U(k,k) is exactly zero. The factorization +* has been completed, but the factor U is exactly +* singular, and division by zero will occur if it is used +* to solve a system of equations. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + REAL SFMIN + INTEGER I, J, JP +* .. +* .. External Functions .. + REAL SLAMCH + INTEGER ICAMAX + EXTERNAL SLAMCH, ICAMAX +* .. +* .. External Subroutines .. + EXTERNAL CGERU, CSCAL, CSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGETF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Compute machine safe minimum +* + SFMIN = SLAMCH('S') +* + DO 10 J = 1, MIN( M, N ) +* +* Find pivot and test for singularity. +* + JP = J - 1 + ICAMAX( M-J+1, A( J, J ), 1 ) + IPIV( J ) = JP + IF( A( JP, J ).NE.ZERO ) THEN +* +* Apply the interchange to columns 1:N. +* + IF( JP.NE.J ) + $ CALL CSWAP( N, A( J, 1 ), LDA, A( JP, 1 ), LDA ) +* +* Compute elements J+1:M of J-th column. +* + IF( J.LT.M ) THEN + IF( ABS(A( J, J )) .GE. SFMIN ) THEN + CALL CSCAL( M-J, ONE / A( J, J ), A( J+1, J ), 1 ) + ELSE + DO 20 I = 1, M-J + A( J+I, J ) = A( J+I, J ) / A( J, J ) + 20 CONTINUE + END IF + END IF +* + ELSE IF( INFO.EQ.0 ) THEN +* + INFO = J + END IF +* + IF( J.LT.MIN( M, N ) ) THEN +* +* Update trailing submatrix. +* + CALL CGERU( M-J, N-J, -ONE, A( J+1, J ), 1, A( J, J+1 ), + $ LDA, A( J+1, J+1 ), LDA ) + END IF + 10 CONTINUE + RETURN +* +* End of CGETF2 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cgetrf.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cgetrf.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,159 @@ + SUBROUTINE CGETRF( M, N, A, LDA, IPIV, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* CGETRF computes an LU factorization of a general M-by-N matrix A +* using partial pivoting with row interchanges. +* +* The factorization has the form +* A = P * L * U +* where P is a permutation matrix, L is lower triangular with unit +* diagonal elements (lower trapezoidal if m > n), and U is upper +* triangular (upper trapezoidal if m < n). +* +* This is the right-looking Level 3 BLAS version of the algorithm. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the M-by-N matrix to be factored. +* On exit, the factors L and U from the factorization +* A = P*L*U; the unit diagonal elements of L are not stored. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* IPIV (output) INTEGER array, dimension (min(M,N)) +* The pivot indices; for 1 <= i <= min(M,N), row i of the +* matrix was interchanged with row IPIV(i). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, U(i,i) is exactly zero. The factorization +* has been completed, but the factor U is exactly +* singular, and division by zero will occur if it is used +* to solve a system of equations. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, IINFO, J, JB, NB +* .. +* .. External Subroutines .. + EXTERNAL CGEMM, CGETF2, CLASWP, CTRSM, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGETRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'CGETRF', ' ', M, N, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.MIN( M, N ) ) THEN +* +* Use unblocked code. +* + CALL CGETF2( M, N, A, LDA, IPIV, INFO ) + ELSE +* +* Use blocked code. +* + DO 20 J = 1, MIN( M, N ), NB + JB = MIN( MIN( M, N )-J+1, NB ) +* +* Factor diagonal and subdiagonal blocks and test for exact +* singularity. +* + CALL CGETF2( M-J+1, JB, A( J, J ), LDA, IPIV( J ), IINFO ) +* +* Adjust INFO and the pivot indices. +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO + J - 1 + DO 10 I = J, MIN( M, J+JB-1 ) + IPIV( I ) = J - 1 + IPIV( I ) + 10 CONTINUE +* +* Apply interchanges to columns 1:J-1. +* + CALL CLASWP( J-1, A, LDA, J, J+JB-1, IPIV, 1 ) +* + IF( J+JB.LE.N ) THEN +* +* Apply interchanges to columns J+JB:N. +* + CALL CLASWP( N-J-JB+1, A( 1, J+JB ), LDA, J, J+JB-1, + $ IPIV, 1 ) +* +* Compute block row of U. +* + CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit', JB, + $ N-J-JB+1, ONE, A( J, J ), LDA, A( J, J+JB ), + $ LDA ) + IF( J+JB.LE.M ) THEN +* +* Update trailing submatrix. +* + CALL CGEMM( 'No transpose', 'No transpose', M-J-JB+1, + $ N-J-JB+1, JB, -ONE, A( J+JB, J ), LDA, + $ A( J, J+JB ), LDA, ONE, A( J+JB, J+JB ), + $ LDA ) + END IF + END IF + 20 CONTINUE + END IF + RETURN +* +* End of CGETRF +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cgetri.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cgetri.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,193 @@ + SUBROUTINE CGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CGETRI computes the inverse of a matrix using the LU factorization +* computed by CGETRF. +* +* This method inverts U and then computes inv(A) by solving the system +* inv(A)*L = inv(U) for inv(A). +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the factors L and U from the factorization +* A = P*L*U as computed by CGETRF. +* On exit, if INFO = 0, the inverse of the original matrix A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices from CGETRF; for 1<=i<=N, row i of the +* matrix was interchanged with row IPIV(i). +* +* WORK (workspace/output) COMPLEX array, dimension (MAX(1,LWORK)) +* On exit, if INFO=0, then WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N). +* For optimal performance LWORK >= N*NB, where NB is +* the optimal blocksize returned by ILAENV. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, U(i,i) is exactly zero; the matrix is +* singular and its inverse could not be computed. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), + $ ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IWS, J, JB, JJ, JP, LDWORK, LWKOPT, NB, + $ NBMIN, NN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CGEMM, CGEMV, CSWAP, CTRSM, CTRTRI, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NB = ILAENV( 1, 'CGETRI', ' ', N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -3 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGETRI', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form inv(U). If INFO > 0 from CTRTRI, then U is singular, +* and the inverse is not computed. +* + CALL CTRTRI( 'Upper', 'Non-unit', N, A, LDA, INFO ) + IF( INFO.GT.0 ) + $ RETURN +* + NBMIN = 2 + LDWORK = N + IF( NB.GT.1 .AND. NB.LT.N ) THEN + IWS = MAX( LDWORK*NB, 1 ) + IF( LWORK.LT.IWS ) THEN + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'CGETRI', ' ', N, -1, -1, -1 ) ) + END IF + ELSE + IWS = N + END IF +* +* Solve the equation inv(A)*L = inv(U) for inv(A). +* + IF( NB.LT.NBMIN .OR. NB.GE.N ) THEN +* +* Use unblocked code. +* + DO 20 J = N, 1, -1 +* +* Copy current column of L to WORK and replace with zeros. +* + DO 10 I = J + 1, N + WORK( I ) = A( I, J ) + A( I, J ) = ZERO + 10 CONTINUE +* +* Compute current column of inv(A). +* + IF( J.LT.N ) + $ CALL CGEMV( 'No transpose', N, N-J, -ONE, A( 1, J+1 ), + $ LDA, WORK( J+1 ), 1, ONE, A( 1, J ), 1 ) + 20 CONTINUE + ELSE +* +* Use blocked code. +* + NN = ( ( N-1 ) / NB )*NB + 1 + DO 50 J = NN, 1, -NB + JB = MIN( NB, N-J+1 ) +* +* Copy current block column of L to WORK and replace with +* zeros. +* + DO 40 JJ = J, J + JB - 1 + DO 30 I = JJ + 1, N + WORK( I+( JJ-J )*LDWORK ) = A( I, JJ ) + A( I, JJ ) = ZERO + 30 CONTINUE + 40 CONTINUE +* +* Compute current block column of inv(A). +* + IF( J+JB.LE.N ) + $ CALL CGEMM( 'No transpose', 'No transpose', N, JB, + $ N-J-JB+1, -ONE, A( 1, J+JB ), LDA, + $ WORK( J+JB ), LDWORK, ONE, A( 1, J ), LDA ) + CALL CTRSM( 'Right', 'Lower', 'No transpose', 'Unit', N, JB, + $ ONE, WORK( J ), LDWORK, A( 1, J ), LDA ) + 50 CONTINUE + END IF +* +* Apply column interchanges. +* + DO 60 J = N - 1, 1, -1 + JP = IPIV( J ) + IF( JP.NE.J ) + $ CALL CSWAP( N, A( 1, J ), 1, A( 1, JP ), 1 ) + 60 CONTINUE +* + WORK( 1 ) = IWS + RETURN +* +* End of CGETRI +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cgetrs.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cgetrs.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,149 @@ + SUBROUTINE CGETRS( TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* CGETRS solves a system of linear equations +* A * X = B, A**T * X = B, or A**H * X = B +* with a general N-by-N matrix A using the LU factorization computed +* by CGETRF. +* +* Arguments +* ========= +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose) +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* A (input) COMPLEX array, dimension (LDA,N) +* The factors L and U from the factorization A = P*L*U +* as computed by CGETRF. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices from CGETRF; for 1<=i<=N, row i of the +* matrix was interchanged with row IPIV(i). +* +* B (input/output) COMPLEX array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLASWP, CTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGETRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( NOTRAN ) THEN +* +* Solve A * X = B. +* +* Apply row interchanges to the right hand sides. +* + CALL CLASWP( NRHS, B, LDB, 1, N, IPIV, 1 ) +* +* Solve L*X = B, overwriting B with X. +* + CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit', N, NRHS, + $ ONE, A, LDA, B, LDB ) +* +* Solve U*X = B, overwriting B with X. +* + CALL CTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N, + $ NRHS, ONE, A, LDA, B, LDB ) + ELSE +* +* Solve A**T * X = B or A**H * X = B. +* +* Solve U'*X = B, overwriting B with X. +* + CALL CTRSM( 'Left', 'Upper', TRANS, 'Non-unit', N, NRHS, ONE, + $ A, LDA, B, LDB ) +* +* Solve L'*X = B, overwriting B with X. +* + CALL CTRSM( 'Left', 'Lower', TRANS, 'Unit', N, NRHS, ONE, A, + $ LDA, B, LDB ) +* +* Apply row interchanges to the solution vectors. +* + CALL CLASWP( NRHS, B, LDB, 1, N, IPIV, -1 ) + END IF +* + RETURN +* +* End of CGETRS +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cggbal.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cggbal.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,482 @@ + SUBROUTINE CGGBAL( JOB, N, A, LDA, B, LDB, ILO, IHI, LSCALE, + $ RSCALE, WORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER JOB + INTEGER IHI, ILO, INFO, LDA, LDB, N +* .. +* .. Array Arguments .. + REAL LSCALE( * ), RSCALE( * ), WORK( * ) + COMPLEX A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* CGGBAL balances a pair of general complex matrices (A,B). This +* involves, first, permuting A and B by similarity transformations to +* isolate eigenvalues in the first 1 to ILO$-$1 and last IHI+1 to N +* elements on the diagonal; and second, applying a diagonal similarity +* transformation to rows and columns ILO to IHI to make the rows +* and columns as close in norm as possible. Both steps are optional. +* +* Balancing may reduce the 1-norm of the matrices, and improve the +* accuracy of the computed eigenvalues and/or eigenvectors in the +* generalized eigenvalue problem A*x = lambda*B*x. +* +* Arguments +* ========= +* +* JOB (input) CHARACTER*1 +* Specifies the operations to be performed on A and B: +* = 'N': none: simply set ILO = 1, IHI = N, LSCALE(I) = 1.0 +* and RSCALE(I) = 1.0 for i=1,...,N; +* = 'P': permute only; +* = 'S': scale only; +* = 'B': both permute and scale. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the input matrix A. +* On exit, A is overwritten by the balanced matrix. +* If JOB = 'N', A is not referenced. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) COMPLEX array, dimension (LDB,N) +* On entry, the input matrix B. +* On exit, B is overwritten by the balanced matrix. +* If JOB = 'N', B is not referenced. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* ILO (output) INTEGER +* IHI (output) INTEGER +* ILO and IHI are set to integers such that on exit +* A(i,j) = 0 and B(i,j) = 0 if i > j and +* j = 1,...,ILO-1 or i = IHI+1,...,N. +* If JOB = 'N' or 'S', ILO = 1 and IHI = N. +* +* LSCALE (output) REAL array, dimension (N) +* Details of the permutations and scaling factors applied +* to the left side of A and B. If P(j) is the index of the +* row interchanged with row j, and D(j) is the scaling factor +* applied to row j, then +* LSCALE(j) = P(j) for J = 1,...,ILO-1 +* = D(j) for J = ILO,...,IHI +* = P(j) for J = IHI+1,...,N. +* The order in which the interchanges are made is N to IHI+1, +* then 1 to ILO-1. +* +* RSCALE (output) REAL array, dimension (N) +* Details of the permutations and scaling factors applied +* to the right side of A and B. If P(j) is the index of the +* column interchanged with column j, and D(j) is the scaling +* factor applied to column j, then +* RSCALE(j) = P(j) for J = 1,...,ILO-1 +* = D(j) for J = ILO,...,IHI +* = P(j) for J = IHI+1,...,N. +* The order in which the interchanges are made is N to IHI+1, +* then 1 to ILO-1. +* +* WORK (workspace) REAL array, dimension (lwork) +* lwork must be at least max(1,6*N) when JOB = 'S' or 'B', and +* at least 1 when JOB = 'N' or 'P'. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* See R.C. WARD, Balancing the generalized eigenvalue problem, +* SIAM J. Sci. Stat. Comp. 2 (1981), 141-152. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, HALF, ONE + PARAMETER ( ZERO = 0.0E+0, HALF = 0.5E+0, ONE = 1.0E+0 ) + REAL THREE, SCLFAC + PARAMETER ( THREE = 3.0E+0, SCLFAC = 1.0E+1 ) + COMPLEX CZERO + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, ICAB, IFLOW, IP1, IR, IRAB, IT, J, JC, JP1, + $ K, KOUNT, L, LCAB, LM1, LRAB, LSFMAX, LSFMIN, + $ M, NR, NRP2 + REAL ALPHA, BASL, BETA, CAB, CMAX, COEF, COEF2, + $ COEF5, COR, EW, EWC, GAMMA, PGAMMA, RAB, SFMAX, + $ SFMIN, SUM, T, TA, TB, TC + COMPLEX CDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX + REAL SDOT, SLAMCH + EXTERNAL LSAME, ICAMAX, SDOT, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CSSCAL, CSWAP, SAXPY, SSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, INT, LOG10, MAX, MIN, REAL, SIGN +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND. + $ .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGGBAL', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + ILO = 1 + IHI = N + RETURN + END IF +* + IF( N.EQ.1 ) THEN + ILO = 1 + IHI = N + LSCALE( 1 ) = ONE + RSCALE( 1 ) = ONE + RETURN + END IF +* + IF( LSAME( JOB, 'N' ) ) THEN + ILO = 1 + IHI = N + DO 10 I = 1, N + LSCALE( I ) = ONE + RSCALE( I ) = ONE + 10 CONTINUE + RETURN + END IF +* + K = 1 + L = N + IF( LSAME( JOB, 'S' ) ) + $ GO TO 190 +* + GO TO 30 +* +* Permute the matrices A and B to isolate the eigenvalues. +* +* Find row with one nonzero in columns 1 through L +* + 20 CONTINUE + L = LM1 + IF( L.NE.1 ) + $ GO TO 30 +* + RSCALE( 1 ) = ONE + LSCALE( 1 ) = ONE + GO TO 190 +* + 30 CONTINUE + LM1 = L - 1 + DO 80 I = L, 1, -1 + DO 40 J = 1, LM1 + JP1 = J + 1 + IF( A( I, J ).NE.CZERO .OR. B( I, J ).NE.CZERO ) + $ GO TO 50 + 40 CONTINUE + J = L + GO TO 70 +* + 50 CONTINUE + DO 60 J = JP1, L + IF( A( I, J ).NE.CZERO .OR. B( I, J ).NE.CZERO ) + $ GO TO 80 + 60 CONTINUE + J = JP1 - 1 +* + 70 CONTINUE + M = L + IFLOW = 1 + GO TO 160 + 80 CONTINUE + GO TO 100 +* +* Find column with one nonzero in rows K through N +* + 90 CONTINUE + K = K + 1 +* + 100 CONTINUE + DO 150 J = K, L + DO 110 I = K, LM1 + IP1 = I + 1 + IF( A( I, J ).NE.CZERO .OR. B( I, J ).NE.CZERO ) + $ GO TO 120 + 110 CONTINUE + I = L + GO TO 140 + 120 CONTINUE + DO 130 I = IP1, L + IF( A( I, J ).NE.CZERO .OR. B( I, J ).NE.CZERO ) + $ GO TO 150 + 130 CONTINUE + I = IP1 - 1 + 140 CONTINUE + M = K + IFLOW = 2 + GO TO 160 + 150 CONTINUE + GO TO 190 +* +* Permute rows M and I +* + 160 CONTINUE + LSCALE( M ) = I + IF( I.EQ.M ) + $ GO TO 170 + CALL CSWAP( N-K+1, A( I, K ), LDA, A( M, K ), LDA ) + CALL CSWAP( N-K+1, B( I, K ), LDB, B( M, K ), LDB ) +* +* Permute columns M and J +* + 170 CONTINUE + RSCALE( M ) = J + IF( J.EQ.M ) + $ GO TO 180 + CALL CSWAP( L, A( 1, J ), 1, A( 1, M ), 1 ) + CALL CSWAP( L, B( 1, J ), 1, B( 1, M ), 1 ) +* + 180 CONTINUE + GO TO ( 20, 90 )IFLOW +* + 190 CONTINUE + ILO = K + IHI = L +* + IF( LSAME( JOB, 'P' ) ) THEN + DO 195 I = ILO, IHI + LSCALE( I ) = ONE + RSCALE( I ) = ONE + 195 CONTINUE + RETURN + END IF +* + IF( ILO.EQ.IHI ) + $ RETURN +* +* Balance the submatrix in rows ILO to IHI. +* + NR = IHI - ILO + 1 + DO 200 I = ILO, IHI + RSCALE( I ) = ZERO + LSCALE( I ) = ZERO +* + WORK( I ) = ZERO + WORK( I+N ) = ZERO + WORK( I+2*N ) = ZERO + WORK( I+3*N ) = ZERO + WORK( I+4*N ) = ZERO + WORK( I+5*N ) = ZERO + 200 CONTINUE +* +* Compute right side vector in resulting linear equations +* + BASL = LOG10( SCLFAC ) + DO 240 I = ILO, IHI + DO 230 J = ILO, IHI + IF( A( I, J ).EQ.CZERO ) THEN + TA = ZERO + GO TO 210 + END IF + TA = LOG10( CABS1( A( I, J ) ) ) / BASL +* + 210 CONTINUE + IF( B( I, J ).EQ.CZERO ) THEN + TB = ZERO + GO TO 220 + END IF + TB = LOG10( CABS1( B( I, J ) ) ) / BASL +* + 220 CONTINUE + WORK( I+4*N ) = WORK( I+4*N ) - TA - TB + WORK( J+5*N ) = WORK( J+5*N ) - TA - TB + 230 CONTINUE + 240 CONTINUE +* + COEF = ONE / REAL( 2*NR ) + COEF2 = COEF*COEF + COEF5 = HALF*COEF2 + NRP2 = NR + 2 + BETA = ZERO + IT = 1 +* +* Start generalized conjugate gradient iteration +* + 250 CONTINUE +* + GAMMA = SDOT( NR, WORK( ILO+4*N ), 1, WORK( ILO+4*N ), 1 ) + + $ SDOT( NR, WORK( ILO+5*N ), 1, WORK( ILO+5*N ), 1 ) +* + EW = ZERO + EWC = ZERO + DO 260 I = ILO, IHI + EW = EW + WORK( I+4*N ) + EWC = EWC + WORK( I+5*N ) + 260 CONTINUE +* + GAMMA = COEF*GAMMA - COEF2*( EW**2+EWC**2 ) - COEF5*( EW-EWC )**2 + IF( GAMMA.EQ.ZERO ) + $ GO TO 350 + IF( IT.NE.1 ) + $ BETA = GAMMA / PGAMMA + T = COEF5*( EWC-THREE*EW ) + TC = COEF5*( EW-THREE*EWC ) +* + CALL SSCAL( NR, BETA, WORK( ILO ), 1 ) + CALL SSCAL( NR, BETA, WORK( ILO+N ), 1 ) +* + CALL SAXPY( NR, COEF, WORK( ILO+4*N ), 1, WORK( ILO+N ), 1 ) + CALL SAXPY( NR, COEF, WORK( ILO+5*N ), 1, WORK( ILO ), 1 ) +* + DO 270 I = ILO, IHI + WORK( I ) = WORK( I ) + TC + WORK( I+N ) = WORK( I+N ) + T + 270 CONTINUE +* +* Apply matrix to vector +* + DO 300 I = ILO, IHI + KOUNT = 0 + SUM = ZERO + DO 290 J = ILO, IHI + IF( A( I, J ).EQ.CZERO ) + $ GO TO 280 + KOUNT = KOUNT + 1 + SUM = SUM + WORK( J ) + 280 CONTINUE + IF( B( I, J ).EQ.CZERO ) + $ GO TO 290 + KOUNT = KOUNT + 1 + SUM = SUM + WORK( J ) + 290 CONTINUE + WORK( I+2*N ) = REAL( KOUNT )*WORK( I+N ) + SUM + 300 CONTINUE +* + DO 330 J = ILO, IHI + KOUNT = 0 + SUM = ZERO + DO 320 I = ILO, IHI + IF( A( I, J ).EQ.CZERO ) + $ GO TO 310 + KOUNT = KOUNT + 1 + SUM = SUM + WORK( I+N ) + 310 CONTINUE + IF( B( I, J ).EQ.CZERO ) + $ GO TO 320 + KOUNT = KOUNT + 1 + SUM = SUM + WORK( I+N ) + 320 CONTINUE + WORK( J+3*N ) = REAL( KOUNT )*WORK( J ) + SUM + 330 CONTINUE +* + SUM = SDOT( NR, WORK( ILO+N ), 1, WORK( ILO+2*N ), 1 ) + + $ SDOT( NR, WORK( ILO ), 1, WORK( ILO+3*N ), 1 ) + ALPHA = GAMMA / SUM +* +* Determine correction to current iteration +* + CMAX = ZERO + DO 340 I = ILO, IHI + COR = ALPHA*WORK( I+N ) + IF( ABS( COR ).GT.CMAX ) + $ CMAX = ABS( COR ) + LSCALE( I ) = LSCALE( I ) + COR + COR = ALPHA*WORK( I ) + IF( ABS( COR ).GT.CMAX ) + $ CMAX = ABS( COR ) + RSCALE( I ) = RSCALE( I ) + COR + 340 CONTINUE + IF( CMAX.LT.HALF ) + $ GO TO 350 +* + CALL SAXPY( NR, -ALPHA, WORK( ILO+2*N ), 1, WORK( ILO+4*N ), 1 ) + CALL SAXPY( NR, -ALPHA, WORK( ILO+3*N ), 1, WORK( ILO+5*N ), 1 ) +* + PGAMMA = GAMMA + IT = IT + 1 + IF( IT.LE.NRP2 ) + $ GO TO 250 +* +* End generalized conjugate gradient iteration +* + 350 CONTINUE + SFMIN = SLAMCH( 'S' ) + SFMAX = ONE / SFMIN + LSFMIN = INT( LOG10( SFMIN ) / BASL+ONE ) + LSFMAX = INT( LOG10( SFMAX ) / BASL ) + DO 360 I = ILO, IHI + IRAB = ICAMAX( N-ILO+1, A( I, ILO ), LDA ) + RAB = ABS( A( I, IRAB+ILO-1 ) ) + IRAB = ICAMAX( N-ILO+1, B( I, ILO ), LDB ) + RAB = MAX( RAB, ABS( B( I, IRAB+ILO-1 ) ) ) + LRAB = INT( LOG10( RAB+SFMIN ) / BASL+ONE ) + IR = LSCALE( I ) + SIGN( HALF, LSCALE( I ) ) + IR = MIN( MAX( IR, LSFMIN ), LSFMAX, LSFMAX-LRAB ) + LSCALE( I ) = SCLFAC**IR + ICAB = ICAMAX( IHI, A( 1, I ), 1 ) + CAB = ABS( A( ICAB, I ) ) + ICAB = ICAMAX( IHI, B( 1, I ), 1 ) + CAB = MAX( CAB, ABS( B( ICAB, I ) ) ) + LCAB = INT( LOG10( CAB+SFMIN ) / BASL+ONE ) + JC = RSCALE( I ) + SIGN( HALF, RSCALE( I ) ) + JC = MIN( MAX( JC, LSFMIN ), LSFMAX, LSFMAX-LCAB ) + RSCALE( I ) = SCLFAC**JC + 360 CONTINUE +* +* Row scaling of matrices A and B +* + DO 370 I = ILO, IHI + CALL CSSCAL( N-ILO+1, LSCALE( I ), A( I, ILO ), LDA ) + CALL CSSCAL( N-ILO+1, LSCALE( I ), B( I, ILO ), LDB ) + 370 CONTINUE +* +* Column scaling of matrices A and B +* + DO 380 J = ILO, IHI + CALL CSSCAL( IHI, RSCALE( J ), A( 1, J ), 1 ) + CALL CSSCAL( IHI, RSCALE( J ), B( 1, J ), 1 ) + 380 CONTINUE +* + RETURN +* +* End of CGGBAL +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cgtsv.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cgtsv.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,173 @@ + SUBROUTINE CGTSV( N, NRHS, DL, D, DU, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX B( LDB, * ), D( * ), DL( * ), DU( * ) +* .. +* +* Purpose +* ======= +* +* CGTSV solves the equation +* +* A*X = B, +* +* where A is an N-by-N tridiagonal matrix, by Gaussian elimination with +* partial pivoting. +* +* Note that the equation A'*X = B may be solved by interchanging the +* order of the arguments DU and DL. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* DL (input/output) COMPLEX array, dimension (N-1) +* On entry, DL must contain the (n-1) subdiagonal elements of +* A. +* On exit, DL is overwritten by the (n-2) elements of the +* second superdiagonal of the upper triangular matrix U from +* the LU factorization of A, in DL(1), ..., DL(n-2). +* +* D (input/output) COMPLEX array, dimension (N) +* On entry, D must contain the diagonal elements of A. +* On exit, D is overwritten by the n diagonal elements of U. +* +* DU (input/output) COMPLEX array, dimension (N-1) +* On entry, DU must contain the (n-1) superdiagonal elements +* of A. +* On exit, DU is overwritten by the (n-1) elements of the first +* superdiagonal of U. +* +* B (input/output) COMPLEX array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS right hand side matrix B. +* On exit, if INFO = 0, the N-by-NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, U(i,i) is exactly zero, and the solution +* has not been computed. The factorization has not been +* completed unless i = N. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER J, K + COMPLEX MULT, TEMP, ZDUM +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, REAL +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -2 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGTSV ', -INFO ) + RETURN + END IF +* + IF( N.EQ.0 ) + $ RETURN +* + DO 30 K = 1, N - 1 + IF( DL( K ).EQ.ZERO ) THEN +* +* Subdiagonal is zero, no elimination is required. +* + IF( D( K ).EQ.ZERO ) THEN +* +* Diagonal is zero: set INFO = K and return; a unique +* solution can not be found. +* + INFO = K + RETURN + END IF + ELSE IF( CABS1( D( K ) ).GE.CABS1( DL( K ) ) ) THEN +* +* No row interchange required +* + MULT = DL( K ) / D( K ) + D( K+1 ) = D( K+1 ) - MULT*DU( K ) + DO 10 J = 1, NRHS + B( K+1, J ) = B( K+1, J ) - MULT*B( K, J ) + 10 CONTINUE + IF( K.LT.( N-1 ) ) + $ DL( K ) = ZERO + ELSE +* +* Interchange rows K and K+1 +* + MULT = D( K ) / DL( K ) + D( K ) = DL( K ) + TEMP = D( K+1 ) + D( K+1 ) = DU( K ) - MULT*TEMP + IF( K.LT.( N-1 ) ) THEN + DL( K ) = DU( K+1 ) + DU( K+1 ) = -MULT*DL( K ) + END IF + DU( K ) = TEMP + DO 20 J = 1, NRHS + TEMP = B( K, J ) + B( K, J ) = B( K+1, J ) + B( K+1, J ) = TEMP - MULT*B( K+1, J ) + 20 CONTINUE + END IF + 30 CONTINUE + IF( D( N ).EQ.ZERO ) THEN + INFO = N + RETURN + END IF +* +* Back solve with the matrix U from the factorization. +* + DO 50 J = 1, NRHS + B( N, J ) = B( N, J ) / D( N ) + IF( N.GT.1 ) + $ B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) / D( N-1 ) + DO 40 K = N - 2, 1, -1 + B( K, J ) = ( B( K, J )-DU( K )*B( K+1, J )-DL( K )* + $ B( K+2, J ) ) / D( K ) + 40 CONTINUE + 50 CONTINUE +* + RETURN +* +* End of CGTSV +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cgttrf.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cgttrf.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,174 @@ + SUBROUTINE CGTTRF( N, DL, D, DU, DU2, IPIV, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX D( * ), DL( * ), DU( * ), DU2( * ) +* .. +* +* Purpose +* ======= +* +* CGTTRF computes an LU factorization of a complex tridiagonal matrix A +* using elimination with partial pivoting and row interchanges. +* +* The factorization has the form +* A = L * U +* where L is a product of permutation and unit lower bidiagonal +* matrices and U is upper triangular with nonzeros in only the main +* diagonal and first two superdiagonals. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. +* +* DL (input/output) COMPLEX array, dimension (N-1) +* On entry, DL must contain the (n-1) sub-diagonal elements of +* A. +* +* On exit, DL is overwritten by the (n-1) multipliers that +* define the matrix L from the LU factorization of A. +* +* D (input/output) COMPLEX array, dimension (N) +* On entry, D must contain the diagonal elements of A. +* +* On exit, D is overwritten by the n diagonal elements of the +* upper triangular matrix U from the LU factorization of A. +* +* DU (input/output) COMPLEX array, dimension (N-1) +* On entry, DU must contain the (n-1) super-diagonal elements +* of A. +* +* On exit, DU is overwritten by the (n-1) elements of the first +* super-diagonal of U. +* +* DU2 (output) COMPLEX array, dimension (N-2) +* On exit, DU2 is overwritten by the (n-2) elements of the +* second super-diagonal of U. +* +* IPIV (output) INTEGER array, dimension (N) +* The pivot indices; for 1 <= i <= n, row i of the matrix was +* interchanged with row IPIV(i). IPIV(i) will always be either +* i or i+1; IPIV(i) = i indicates a row interchange was not +* required. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* > 0: if INFO = k, U(k,k) is exactly zero. The factorization +* has been completed, but the factor U is exactly +* singular, and division by zero will occur if it is used +* to solve a system of equations. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I + COMPLEX FACT, TEMP, ZDUM +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + CALL XERBLA( 'CGTTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Initialize IPIV(i) = i and DU2(i) = 0 +* + DO 10 I = 1, N + IPIV( I ) = I + 10 CONTINUE + DO 20 I = 1, N - 2 + DU2( I ) = ZERO + 20 CONTINUE +* + DO 30 I = 1, N - 2 + IF( CABS1( D( I ) ).GE.CABS1( DL( I ) ) ) THEN +* +* No row interchange required, eliminate DL(I) +* + IF( CABS1( D( I ) ).NE.ZERO ) THEN + FACT = DL( I ) / D( I ) + DL( I ) = FACT + D( I+1 ) = D( I+1 ) - FACT*DU( I ) + END IF + ELSE +* +* Interchange rows I and I+1, eliminate DL(I) +* + FACT = D( I ) / DL( I ) + D( I ) = DL( I ) + DL( I ) = FACT + TEMP = DU( I ) + DU( I ) = D( I+1 ) + D( I+1 ) = TEMP - FACT*D( I+1 ) + DU2( I ) = DU( I+1 ) + DU( I+1 ) = -FACT*DU( I+1 ) + IPIV( I ) = I + 1 + END IF + 30 CONTINUE + IF( N.GT.1 ) THEN + I = N - 1 + IF( CABS1( D( I ) ).GE.CABS1( DL( I ) ) ) THEN + IF( CABS1( D( I ) ).NE.ZERO ) THEN + FACT = DL( I ) / D( I ) + DL( I ) = FACT + D( I+1 ) = D( I+1 ) - FACT*DU( I ) + END IF + ELSE + FACT = D( I ) / DL( I ) + D( I ) = DL( I ) + DL( I ) = FACT + TEMP = DU( I ) + DU( I ) = D( I+1 ) + D( I+1 ) = TEMP - FACT*D( I+1 ) + IPIV( I ) = I + 1 + END IF + END IF +* +* Check for a zero on the diagonal of U. +* + DO 40 I = 1, N + IF( CABS1( D( I ) ).EQ.ZERO ) THEN + INFO = I + GO TO 50 + END IF + 40 CONTINUE + 50 CONTINUE +* + RETURN +* +* End of CGTTRF +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cgttrs.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cgttrs.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,142 @@ + SUBROUTINE CGTTRS( TRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB, + $ INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX B( LDB, * ), D( * ), DL( * ), DU( * ), DU2( * ) +* .. +* +* Purpose +* ======= +* +* CGTTRS solves one of the systems of equations +* A * X = B, A**T * X = B, or A**H * X = B, +* with a tridiagonal matrix A using the LU factorization computed +* by CGTTRF. +* +* Arguments +* ========= +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations. +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose) +* +* N (input) INTEGER +* The order of the matrix A. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* DL (input) COMPLEX array, dimension (N-1) +* The (n-1) multipliers that define the matrix L from the +* LU factorization of A. +* +* D (input) COMPLEX array, dimension (N) +* The n diagonal elements of the upper triangular matrix U from +* the LU factorization of A. +* +* DU (input) COMPLEX array, dimension (N-1) +* The (n-1) elements of the first super-diagonal of U. +* +* DU2 (input) COMPLEX array, dimension (N-2) +* The (n-2) elements of the second super-diagonal of U. +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices; for 1 <= i <= n, row i of the matrix was +* interchanged with row IPIV(i). IPIV(i) will always be either +* i or i+1; IPIV(i) = i indicates a row interchange was not +* required. +* +* B (input/output) COMPLEX array, dimension (LDB,NRHS) +* On entry, the matrix of right hand side vectors B. +* On exit, B is overwritten by the solution vectors X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL NOTRAN + INTEGER ITRANS, J, JB, NB +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CGTTS2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOTRAN = ( TRANS.EQ.'N' .OR. TRANS.EQ.'n' ) + IF( .NOT.NOTRAN .AND. .NOT.( TRANS.EQ.'T' .OR. TRANS.EQ. + $ 't' ) .AND. .NOT.( TRANS.EQ.'C' .OR. TRANS.EQ.'c' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( N, 1 ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGTTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* +* Decode TRANS +* + IF( NOTRAN ) THEN + ITRANS = 0 + ELSE IF( TRANS.EQ.'T' .OR. TRANS.EQ.'t' ) THEN + ITRANS = 1 + ELSE + ITRANS = 2 + END IF +* +* Determine the number of right-hand sides to solve at a time. +* + IF( NRHS.EQ.1 ) THEN + NB = 1 + ELSE + NB = MAX( 1, ILAENV( 1, 'CGTTRS', TRANS, N, NRHS, -1, -1 ) ) + END IF +* + IF( NB.GE.NRHS ) THEN + CALL CGTTS2( ITRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB ) + ELSE + DO 10 J = 1, NRHS, NB + JB = MIN( NRHS-J+1, NB ) + CALL CGTTS2( ITRANS, N, JB, DL, D, DU, DU2, IPIV, B( 1, J ), + $ LDB ) + 10 CONTINUE + END IF +* +* End of CGTTRS +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cgtts2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cgtts2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,271 @@ + SUBROUTINE CGTTS2( ITRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER ITRANS, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX B( LDB, * ), D( * ), DL( * ), DU( * ), DU2( * ) +* .. +* +* Purpose +* ======= +* +* CGTTS2 solves one of the systems of equations +* A * X = B, A**T * X = B, or A**H * X = B, +* with a tridiagonal matrix A using the LU factorization computed +* by CGTTRF. +* +* Arguments +* ========= +* +* ITRANS (input) INTEGER +* Specifies the form of the system of equations. +* = 0: A * X = B (No transpose) +* = 1: A**T * X = B (Transpose) +* = 2: A**H * X = B (Conjugate transpose) +* +* N (input) INTEGER +* The order of the matrix A. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* DL (input) COMPLEX array, dimension (N-1) +* The (n-1) multipliers that define the matrix L from the +* LU factorization of A. +* +* D (input) COMPLEX array, dimension (N) +* The n diagonal elements of the upper triangular matrix U from +* the LU factorization of A. +* +* DU (input) COMPLEX array, dimension (N-1) +* The (n-1) elements of the first super-diagonal of U. +* +* DU2 (input) COMPLEX array, dimension (N-2) +* The (n-2) elements of the second super-diagonal of U. +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices; for 1 <= i <= n, row i of the matrix was +* interchanged with row IPIV(i). IPIV(i) will always be either +* i or i+1; IPIV(i) = i indicates a row interchange was not +* required. +* +* B (input/output) COMPLEX array, dimension (LDB,NRHS) +* On entry, the matrix of right hand side vectors B. +* On exit, B is overwritten by the solution vectors X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J + COMPLEX TEMP +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( ITRANS.EQ.0 ) THEN +* +* Solve A*X = B using the LU factorization of A, +* overwriting each right hand side vector with its solution. +* + IF( NRHS.LE.1 ) THEN + J = 1 + 10 CONTINUE +* +* Solve L*x = b. +* + DO 20 I = 1, N - 1 + IF( IPIV( I ).EQ.I ) THEN + B( I+1, J ) = B( I+1, J ) - DL( I )*B( I, J ) + ELSE + TEMP = B( I, J ) + B( I, J ) = B( I+1, J ) + B( I+1, J ) = TEMP - DL( I )*B( I, J ) + END IF + 20 CONTINUE +* +* Solve U*x = b. +* + B( N, J ) = B( N, J ) / D( N ) + IF( N.GT.1 ) + $ B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) / + $ D( N-1 ) + DO 30 I = N - 2, 1, -1 + B( I, J ) = ( B( I, J )-DU( I )*B( I+1, J )-DU2( I )* + $ B( I+2, J ) ) / D( I ) + 30 CONTINUE + IF( J.LT.NRHS ) THEN + J = J + 1 + GO TO 10 + END IF + ELSE + DO 60 J = 1, NRHS +* +* Solve L*x = b. +* + DO 40 I = 1, N - 1 + IF( IPIV( I ).EQ.I ) THEN + B( I+1, J ) = B( I+1, J ) - DL( I )*B( I, J ) + ELSE + TEMP = B( I, J ) + B( I, J ) = B( I+1, J ) + B( I+1, J ) = TEMP - DL( I )*B( I, J ) + END IF + 40 CONTINUE +* +* Solve U*x = b. +* + B( N, J ) = B( N, J ) / D( N ) + IF( N.GT.1 ) + $ B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) / + $ D( N-1 ) + DO 50 I = N - 2, 1, -1 + B( I, J ) = ( B( I, J )-DU( I )*B( I+1, J )-DU2( I )* + $ B( I+2, J ) ) / D( I ) + 50 CONTINUE + 60 CONTINUE + END IF + ELSE IF( ITRANS.EQ.1 ) THEN +* +* Solve A**T * X = B. +* + IF( NRHS.LE.1 ) THEN + J = 1 + 70 CONTINUE +* +* Solve U**T * x = b. +* + B( 1, J ) = B( 1, J ) / D( 1 ) + IF( N.GT.1 ) + $ B( 2, J ) = ( B( 2, J )-DU( 1 )*B( 1, J ) ) / D( 2 ) + DO 80 I = 3, N + B( I, J ) = ( B( I, J )-DU( I-1 )*B( I-1, J )-DU2( I-2 )* + $ B( I-2, J ) ) / D( I ) + 80 CONTINUE +* +* Solve L**T * x = b. +* + DO 90 I = N - 1, 1, -1 + IF( IPIV( I ).EQ.I ) THEN + B( I, J ) = B( I, J ) - DL( I )*B( I+1, J ) + ELSE + TEMP = B( I+1, J ) + B( I+1, J ) = B( I, J ) - DL( I )*TEMP + B( I, J ) = TEMP + END IF + 90 CONTINUE + IF( J.LT.NRHS ) THEN + J = J + 1 + GO TO 70 + END IF + ELSE + DO 120 J = 1, NRHS +* +* Solve U**T * x = b. +* + B( 1, J ) = B( 1, J ) / D( 1 ) + IF( N.GT.1 ) + $ B( 2, J ) = ( B( 2, J )-DU( 1 )*B( 1, J ) ) / D( 2 ) + DO 100 I = 3, N + B( I, J ) = ( B( I, J )-DU( I-1 )*B( I-1, J )- + $ DU2( I-2 )*B( I-2, J ) ) / D( I ) + 100 CONTINUE +* +* Solve L**T * x = b. +* + DO 110 I = N - 1, 1, -1 + IF( IPIV( I ).EQ.I ) THEN + B( I, J ) = B( I, J ) - DL( I )*B( I+1, J ) + ELSE + TEMP = B( I+1, J ) + B( I+1, J ) = B( I, J ) - DL( I )*TEMP + B( I, J ) = TEMP + END IF + 110 CONTINUE + 120 CONTINUE + END IF + ELSE +* +* Solve A**H * X = B. +* + IF( NRHS.LE.1 ) THEN + J = 1 + 130 CONTINUE +* +* Solve U**H * x = b. +* + B( 1, J ) = B( 1, J ) / CONJG( D( 1 ) ) + IF( N.GT.1 ) + $ B( 2, J ) = ( B( 2, J )-CONJG( DU( 1 ) )*B( 1, J ) ) / + $ CONJG( D( 2 ) ) + DO 140 I = 3, N + B( I, J ) = ( B( I, J )-CONJG( DU( I-1 ) )*B( I-1, J )- + $ CONJG( DU2( I-2 ) )*B( I-2, J ) ) / + $ CONJG( D( I ) ) + 140 CONTINUE +* +* Solve L**H * x = b. +* + DO 150 I = N - 1, 1, -1 + IF( IPIV( I ).EQ.I ) THEN + B( I, J ) = B( I, J ) - CONJG( DL( I ) )*B( I+1, J ) + ELSE + TEMP = B( I+1, J ) + B( I+1, J ) = B( I, J ) - CONJG( DL( I ) )*TEMP + B( I, J ) = TEMP + END IF + 150 CONTINUE + IF( J.LT.NRHS ) THEN + J = J + 1 + GO TO 130 + END IF + ELSE + DO 180 J = 1, NRHS +* +* Solve U**H * x = b. +* + B( 1, J ) = B( 1, J ) / CONJG( D( 1 ) ) + IF( N.GT.1 ) + $ B( 2, J ) = ( B( 2, J )-CONJG( DU( 1 ) )*B( 1, J ) ) / + $ CONJG( D( 2 ) ) + DO 160 I = 3, N + B( I, J ) = ( B( I, J )-CONJG( DU( I-1 ) )* + $ B( I-1, J )-CONJG( DU2( I-2 ) )* + $ B( I-2, J ) ) / CONJG( D( I ) ) + 160 CONTINUE +* +* Solve L**H * x = b. +* + DO 170 I = N - 1, 1, -1 + IF( IPIV( I ).EQ.I ) THEN + B( I, J ) = B( I, J ) - CONJG( DL( I ) )* + $ B( I+1, J ) + ELSE + TEMP = B( I+1, J ) + B( I+1, J ) = B( I, J ) - CONJG( DL( I ) )*TEMP + B( I, J ) = TEMP + END IF + 170 CONTINUE + 180 CONTINUE + END IF + END IF +* +* End of CGTTS2 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cheev.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cheev.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,218 @@ + SUBROUTINE CHEEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + REAL RWORK( * ), W( * ) + COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CHEEV computes all eigenvalues and, optionally, eigenvectors of a +* complex Hermitian matrix A. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA, N) +* On entry, the Hermitian matrix A. If UPLO = 'U', the +* leading N-by-N upper triangular part of A contains the +* upper triangular part of the matrix A. If UPLO = 'L', +* the leading N-by-N lower triangular part of A contains +* the lower triangular part of the matrix A. +* On exit, if JOBZ = 'V', then if INFO = 0, A contains the +* orthonormal eigenvectors of the matrix A. +* If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') +* or the upper triangle (if UPLO='U') of A, including the +* diagonal, is destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* W (output) REAL array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* WORK (workspace/output) COMPLEX array, dimension (MAX(1,LWORK)) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of the array WORK. LWORK >= max(1,2*N-1). +* For optimal efficiency, LWORK >= (NB+1)*N, +* where NB is the blocksize for CHETRD returned by ILAENV. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace) REAL array, dimension (max(1, 3*N-2)) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the algorithm failed to converge; i +* off-diagonal elements of an intermediate tridiagonal +* form did not converge to zero. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E0, 0.0E0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, IMAX, INDE, INDTAU, INDWRK, ISCALE, + $ LLWORK, LWKOPT, NB + REAL ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL CLANHE, SLAMCH + EXTERNAL ILAENV, LSAME, CLANHE, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CHETRD, CLASCL, CSTEQR, CUNGTR, SSCAL, SSTERF, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'CHETRD', UPLO, N, -1, -1, -1 ) + LWKOPT = MAX( 1, ( NB+1 )*N ) + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.MAX( 1, 2*N-1 ) .AND. .NOT.LQUERY ) + $ INFO = -8 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHEEV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + RETURN + END IF +* + IF( N.EQ.1 ) THEN + W( 1 ) = A( 1, 1 ) + WORK( 1 ) = 1 + IF( WANTZ ) + $ A( 1, 1 ) = CONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = CLANHE( 'M', UPLO, N, A, LDA, RWORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) + $ CALL CLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO ) +* +* Call CHETRD to reduce Hermitian matrix to tridiagonal form. +* + INDE = 1 + INDTAU = 1 + INDWRK = INDTAU + N + LLWORK = LWORK - INDWRK + 1 + CALL CHETRD( UPLO, N, A, LDA, W, RWORK( INDE ), WORK( INDTAU ), + $ WORK( INDWRK ), LLWORK, IINFO ) +* +* For eigenvalues only, call SSTERF. For eigenvectors, first call +* CUNGTR to generate the unitary matrix, then call CSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, RWORK( INDE ), INFO ) + ELSE + CALL CUNGTR( UPLO, N, A, LDA, WORK( INDTAU ), WORK( INDWRK ), + $ LLWORK, IINFO ) + INDWRK = INDE + N + CALL CSTEQR( JOBZ, N, W, RWORK( INDE ), A, LDA, + $ RWORK( INDWRK ), INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* Set WORK(1) to optimal complex workspace size. +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of CHEEV +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/chetd2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/chetd2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,258 @@ + SUBROUTINE CHETD2( UPLO, N, A, LDA, D, E, TAU, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + REAL D( * ), E( * ) + COMPLEX A( LDA, * ), TAU( * ) +* .. +* +* Purpose +* ======= +* +* CHETD2 reduces a complex Hermitian matrix A to real symmetric +* tridiagonal form T by a unitary similarity transformation: +* Q' * A * Q = T. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* Hermitian matrix A is stored: +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the Hermitian matrix A. If UPLO = 'U', the leading +* n-by-n upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading n-by-n lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* On exit, if UPLO = 'U', the diagonal and first superdiagonal +* of A are overwritten by the corresponding elements of the +* tridiagonal matrix T, and the elements above the first +* superdiagonal, with the array TAU, represent the unitary +* matrix Q as a product of elementary reflectors; if UPLO +* = 'L', the diagonal and first subdiagonal of A are over- +* written by the corresponding elements of the tridiagonal +* matrix T, and the elements below the first subdiagonal, with +* the array TAU, represent the unitary matrix Q as a product +* of elementary reflectors. See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* D (output) REAL array, dimension (N) +* The diagonal elements of the tridiagonal matrix T: +* D(i) = A(i,i). +* +* E (output) REAL array, dimension (N-1) +* The off-diagonal elements of the tridiagonal matrix T: +* E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. +* +* TAU (output) COMPLEX array, dimension (N-1) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* If UPLO = 'U', the matrix Q is represented as a product of elementary +* reflectors +* +* Q = H(n-1) . . . H(2) H(1). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in +* A(1:i-1,i+1), and tau in TAU(i). +* +* If UPLO = 'L', the matrix Q is represented as a product of elementary +* reflectors +* +* Q = H(1) H(2) . . . H(n-1). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), +* and tau in TAU(i). +* +* The contents of A on exit are illustrated by the following examples +* with n = 5: +* +* if UPLO = 'U': if UPLO = 'L': +* +* ( d e v2 v3 v4 ) ( d ) +* ( d e v3 v4 ) ( e d ) +* ( d e v4 ) ( v1 e d ) +* ( d e ) ( v1 v2 e d ) +* ( d ) ( v1 v2 v3 e d ) +* +* where d and e denote diagonal and off-diagonal elements of T, and vi +* denotes an element of the vector defining H(i). +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO, HALF + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ), + $ HALF = ( 0.5E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I + COMPLEX ALPHA, TAUI +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CHEMV, CHER2, CLARFG, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX CDOTC + EXTERNAL LSAME, CDOTC +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHETD2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Reduce the upper triangle of A +* + A( N, N ) = REAL( A( N, N ) ) + DO 10 I = N - 1, 1, -1 +* +* Generate elementary reflector H(i) = I - tau * v * v' +* to annihilate A(1:i-1,i+1) +* + ALPHA = A( I, I+1 ) + CALL CLARFG( I, ALPHA, A( 1, I+1 ), 1, TAUI ) + E( I ) = ALPHA +* + IF( TAUI.NE.ZERO ) THEN +* +* Apply H(i) from both sides to A(1:i,1:i) +* + A( I, I+1 ) = ONE +* +* Compute x := tau * A * v storing x in TAU(1:i) +* + CALL CHEMV( UPLO, I, TAUI, A, LDA, A( 1, I+1 ), 1, ZERO, + $ TAU, 1 ) +* +* Compute w := x - 1/2 * tau * (x'*v) * v +* + ALPHA = -HALF*TAUI*CDOTC( I, TAU, 1, A( 1, I+1 ), 1 ) + CALL CAXPY( I, ALPHA, A( 1, I+1 ), 1, TAU, 1 ) +* +* Apply the transformation as a rank-2 update: +* A := A - v * w' - w * v' +* + CALL CHER2( UPLO, I, -ONE, A( 1, I+1 ), 1, TAU, 1, A, + $ LDA ) +* + ELSE + A( I, I ) = REAL( A( I, I ) ) + END IF + A( I, I+1 ) = E( I ) + D( I+1 ) = A( I+1, I+1 ) + TAU( I ) = TAUI + 10 CONTINUE + D( 1 ) = A( 1, 1 ) + ELSE +* +* Reduce the lower triangle of A +* + A( 1, 1 ) = REAL( A( 1, 1 ) ) + DO 20 I = 1, N - 1 +* +* Generate elementary reflector H(i) = I - tau * v * v' +* to annihilate A(i+2:n,i) +* + ALPHA = A( I+1, I ) + CALL CLARFG( N-I, ALPHA, A( MIN( I+2, N ), I ), 1, TAUI ) + E( I ) = ALPHA +* + IF( TAUI.NE.ZERO ) THEN +* +* Apply H(i) from both sides to A(i+1:n,i+1:n) +* + A( I+1, I ) = ONE +* +* Compute x := tau * A * v storing y in TAU(i:n-1) +* + CALL CHEMV( UPLO, N-I, TAUI, A( I+1, I+1 ), LDA, + $ A( I+1, I ), 1, ZERO, TAU( I ), 1 ) +* +* Compute w := x - 1/2 * tau * (x'*v) * v +* + ALPHA = -HALF*TAUI*CDOTC( N-I, TAU( I ), 1, A( I+1, I ), + $ 1 ) + CALL CAXPY( N-I, ALPHA, A( I+1, I ), 1, TAU( I ), 1 ) +* +* Apply the transformation as a rank-2 update: +* A := A - v * w' - w * v' +* + CALL CHER2( UPLO, N-I, -ONE, A( I+1, I ), 1, TAU( I ), 1, + $ A( I+1, I+1 ), LDA ) +* + ELSE + A( I+1, I+1 ) = REAL( A( I+1, I+1 ) ) + END IF + A( I+1, I ) = E( I ) + D( I ) = A( I, I ) + TAU( I ) = TAUI + 20 CONTINUE + D( N ) = A( N, N ) + END IF +* + RETURN +* +* End of CHETD2 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/chetrd.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/chetrd.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,296 @@ + SUBROUTINE CHETRD( UPLO, N, A, LDA, D, E, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + REAL D( * ), E( * ) + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CHETRD reduces a complex Hermitian matrix A to real symmetric +* tridiagonal form T by a unitary similarity transformation: +* Q**H * A * Q = T. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the Hermitian matrix A. If UPLO = 'U', the leading +* N-by-N upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading N-by-N lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* On exit, if UPLO = 'U', the diagonal and first superdiagonal +* of A are overwritten by the corresponding elements of the +* tridiagonal matrix T, and the elements above the first +* superdiagonal, with the array TAU, represent the unitary +* matrix Q as a product of elementary reflectors; if UPLO +* = 'L', the diagonal and first subdiagonal of A are over- +* written by the corresponding elements of the tridiagonal +* matrix T, and the elements below the first subdiagonal, with +* the array TAU, represent the unitary matrix Q as a product +* of elementary reflectors. See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* D (output) REAL array, dimension (N) +* The diagonal elements of the tridiagonal matrix T: +* D(i) = A(i,i). +* +* E (output) REAL array, dimension (N-1) +* The off-diagonal elements of the tridiagonal matrix T: +* E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. +* +* TAU (output) COMPLEX array, dimension (N-1) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace/output) COMPLEX array, dimension (MAX(1,LWORK)) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= 1. +* For optimum performance LWORK >= N*NB, where NB is the +* optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* If UPLO = 'U', the matrix Q is represented as a product of elementary +* reflectors +* +* Q = H(n-1) . . . H(2) H(1). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in +* A(1:i-1,i+1), and tau in TAU(i). +* +* If UPLO = 'L', the matrix Q is represented as a product of elementary +* reflectors +* +* Q = H(1) H(2) . . . H(n-1). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), +* and tau in TAU(i). +* +* The contents of A on exit are illustrated by the following examples +* with n = 5: +* +* if UPLO = 'U': if UPLO = 'L': +* +* ( d e v2 v3 v4 ) ( d ) +* ( d e v3 v4 ) ( e d ) +* ( d e v4 ) ( v1 e d ) +* ( d e ) ( v1 v2 e d ) +* ( d ) ( v1 v2 v3 e d ) +* +* where d and e denote diagonal and off-diagonal elements of T, and vi +* denotes an element of the vector defining H(i). +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER I, IINFO, IWS, J, KK, LDWORK, LWKOPT, NB, + $ NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL CHER2K, CHETD2, CLATRD, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -9 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size. +* + NB = ILAENV( 1, 'CHETRD', UPLO, N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHETRD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NX = N + IWS = 1 + IF( NB.GT.1 .AND. NB.LT.N ) THEN +* +* Determine when to cross over from blocked to unblocked code +* (last block is always handled by unblocked code). +* + NX = MAX( NB, ILAENV( 3, 'CHETRD', UPLO, N, -1, -1, -1 ) ) + IF( NX.LT.N ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: determine the +* minimum value of NB, and reduce NB or force use of +* unblocked code by setting NX = N. +* + NB = MAX( LWORK / LDWORK, 1 ) + NBMIN = ILAENV( 2, 'CHETRD', UPLO, N, -1, -1, -1 ) + IF( NB.LT.NBMIN ) + $ NX = N + END IF + ELSE + NX = N + END IF + ELSE + NB = 1 + END IF +* + IF( UPPER ) THEN +* +* Reduce the upper triangle of A. +* Columns 1:kk are handled by the unblocked method. +* + KK = N - ( ( N-NX+NB-1 ) / NB )*NB + DO 20 I = N - NB + 1, KK + 1, -NB +* +* Reduce columns i:i+nb-1 to tridiagonal form and form the +* matrix W which is needed to update the unreduced part of +* the matrix +* + CALL CLATRD( UPLO, I+NB-1, NB, A, LDA, E, TAU, WORK, + $ LDWORK ) +* +* Update the unreduced submatrix A(1:i-1,1:i-1), using an +* update of the form: A := A - V*W' - W*V' +* + CALL CHER2K( UPLO, 'No transpose', I-1, NB, -CONE, + $ A( 1, I ), LDA, WORK, LDWORK, ONE, A, LDA ) +* +* Copy superdiagonal elements back into A, and diagonal +* elements into D +* + DO 10 J = I, I + NB - 1 + A( J-1, J ) = E( J-1 ) + D( J ) = A( J, J ) + 10 CONTINUE + 20 CONTINUE +* +* Use unblocked code to reduce the last or only block +* + CALL CHETD2( UPLO, KK, A, LDA, D, E, TAU, IINFO ) + ELSE +* +* Reduce the lower triangle of A +* + DO 40 I = 1, N - NX, NB +* +* Reduce columns i:i+nb-1 to tridiagonal form and form the +* matrix W which is needed to update the unreduced part of +* the matrix +* + CALL CLATRD( UPLO, N-I+1, NB, A( I, I ), LDA, E( I ), + $ TAU( I ), WORK, LDWORK ) +* +* Update the unreduced submatrix A(i+nb:n,i+nb:n), using +* an update of the form: A := A - V*W' - W*V' +* + CALL CHER2K( UPLO, 'No transpose', N-I-NB+1, NB, -CONE, + $ A( I+NB, I ), LDA, WORK( NB+1 ), LDWORK, ONE, + $ A( I+NB, I+NB ), LDA ) +* +* Copy subdiagonal elements back into A, and diagonal +* elements into D +* + DO 30 J = I, I + NB - 1 + A( J+1, J ) = E( J ) + D( J ) = A( J, J ) + 30 CONTINUE + 40 CONTINUE +* +* Use unblocked code to reduce the last or only block +* + CALL CHETD2( UPLO, N-I+1, A( I, I ), LDA, D( I ), E( I ), + $ TAU( I ), IINFO ) + END IF +* + WORK( 1 ) = LWKOPT + RETURN +* +* End of CHETRD +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/chseqr.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/chseqr.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,395 @@ + SUBROUTINE CHSEQR( JOB, COMPZ, N, ILO, IHI, H, LDH, W, Z, LDZ, + $ WORK, LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER IHI, ILO, INFO, LDH, LDZ, LWORK, N + CHARACTER COMPZ, JOB +* .. +* .. Array Arguments .. + COMPLEX H( LDH, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* Purpose +* ======= +* +* CHSEQR computes the eigenvalues of a Hessenberg matrix H +* and, optionally, the matrices T and Z from the Schur decomposition +* H = Z T Z**H, where T is an upper triangular matrix (the +* Schur form), and Z is the unitary matrix of Schur vectors. +* +* Optionally Z may be postmultiplied into an input unitary +* matrix Q so that this routine can give the Schur factorization +* of a matrix A which has been reduced to the Hessenberg form H +* by the unitary matrix Q: A = Q*H*Q**H = (QZ)*H*(QZ)**H. +* +* Arguments +* ========= +* +* JOB (input) CHARACTER*1 +* = 'E': compute eigenvalues only; +* = 'S': compute eigenvalues and the Schur form T. +* +* COMPZ (input) CHARACTER*1 +* = 'N': no Schur vectors are computed; +* = 'I': Z is initialized to the unit matrix and the matrix Z +* of Schur vectors of H is returned; +* = 'V': Z must contain an unitary matrix Q on entry, and +* the product Q*Z is returned. +* +* N (input) INTEGER +* The order of the matrix H. N .GE. 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* It is assumed that H is already upper triangular in rows +* and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally +* set by a previous call to CGEBAL, and then passed to CGEHRD +* when the matrix output by CGEBAL is reduced to Hessenberg +* form. Otherwise ILO and IHI should be set to 1 and N +* respectively. If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N. +* If N = 0, then ILO = 1 and IHI = 0. +* +* H (input/output) COMPLEX array, dimension (LDH,N) +* On entry, the upper Hessenberg matrix H. +* On exit, if INFO = 0 and JOB = 'S', H contains the upper +* triangular matrix T from the Schur decomposition (the +* Schur form). If INFO = 0 and JOB = 'E', the contents of +* H are unspecified on exit. (The output value of H when +* INFO.GT.0 is given under the description of INFO below.) +* +* Unlike earlier versions of CHSEQR, this subroutine may +* explicitly H(i,j) = 0 for i.GT.j and j = 1, 2, ... ILO-1 +* or j = IHI+1, IHI+2, ... N. +* +* LDH (input) INTEGER +* The leading dimension of the array H. LDH .GE. max(1,N). +* +* W (output) COMPLEX array, dimension (N) +* The computed eigenvalues. If JOB = 'S', the eigenvalues are +* stored in the same order as on the diagonal of the Schur +* form returned in H, with W(i) = H(i,i). +* +* Z (input/output) COMPLEX array, dimension (LDZ,N) +* If COMPZ = 'N', Z is not referenced. +* If COMPZ = 'I', on entry Z need not be set and on exit, +* if INFO = 0, Z contains the unitary matrix Z of the Schur +* vectors of H. If COMPZ = 'V', on entry Z must contain an +* N-by-N matrix Q, which is assumed to be equal to the unit +* matrix except for the submatrix Z(ILO:IHI,ILO:IHI). On exit, +* if INFO = 0, Z contains Q*Z. +* Normally Q is the unitary matrix generated by CUNGHR +* after the call to CGEHRD which formed the Hessenberg matrix +* H. (The output value of Z when INFO.GT.0 is given under +* the description of INFO below.) +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. if COMPZ = 'I' or +* COMPZ = 'V', then LDZ.GE.MAX(1,N). Otherwize, LDZ.GE.1. +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns an estimate of +* the optimal value for LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK .GE. max(1,N) +* is sufficient, but LWORK typically as large as 6*N may +* be required for optimal performance. A workspace query +* to determine the optimal workspace size is recommended. +* +* If LWORK = -1, then CHSEQR does a workspace query. +* In this case, CHSEQR checks the input parameters and +* estimates the optimal workspace size for the given +* values of N, ILO and IHI. The estimate is returned +* in WORK(1). No error message related to LWORK is +* issued by XERBLA. Neither H nor Z are accessed. +* +* +* INFO (output) INTEGER +* = 0: successful exit +* .LT. 0: if INFO = -i, the i-th argument had an illegal +* value +* .GT. 0: if INFO = i, CHSEQR failed to compute all of +* the eigenvalues. Elements 1:ilo-1 and i+1:n of WR +* and WI contain those eigenvalues which have been +* successfully computed. (Failures are rare.) +* +* If INFO .GT. 0 and JOB = 'E', then on exit, the +* remaining unconverged eigenvalues are the eigen- +* values of the upper Hessenberg matrix rows and +* columns ILO through INFO of the final, output +* value of H. +* +* If INFO .GT. 0 and JOB = 'S', then on exit +* +* (*) (initial value of H)*U = U*(final value of H) +* +* where U is a unitary matrix. The final +* value of H is upper Hessenberg and triangular in +* rows and columns INFO+1 through IHI. +* +* If INFO .GT. 0 and COMPZ = 'V', then on exit +* +* (final value of Z) = (initial value of Z)*U +* +* where U is the unitary matrix in (*) (regard- +* less of the value of JOB.) +* +* If INFO .GT. 0 and COMPZ = 'I', then on exit +* (final value of Z) = U +* where U is the unitary matrix in (*) (regard- +* less of the value of JOB.) +* +* If INFO .GT. 0 and COMPZ = 'N', then Z is not +* accessed. +* +* ================================================================ +* Default values supplied by +* ILAENV(ISPEC,'CHSEQR',JOB(:1)//COMPZ(:1),N,ILO,IHI,LWORK). +* It is suggested that these defaults be adjusted in order +* to attain best performance in each particular +* computational environment. +* +* ISPEC=1: The CLAHQR vs CLAQR0 crossover point. +* Default: 75. (Must be at least 11.) +* +* ISPEC=2: Recommended deflation window size. +* This depends on ILO, IHI and NS. NS is the +* number of simultaneous shifts returned +* by ILAENV(ISPEC=4). (See ISPEC=4 below.) +* The default for (IHI-ILO+1).LE.500 is NS. +* The default for (IHI-ILO+1).GT.500 is 3*NS/2. +* +* ISPEC=3: Nibble crossover point. (See ILAENV for +* details.) Default: 14% of deflation window +* size. +* +* ISPEC=4: Number of simultaneous shifts, NS, in +* a multi-shift QR iteration. +* +* If IHI-ILO+1 is ... +* +* greater than ...but less ... the +* or equal to ... than default is +* +* 1 30 NS - 2(+) +* 30 60 NS - 4(+) +* 60 150 NS = 10(+) +* 150 590 NS = ** +* 590 3000 NS = 64 +* 3000 6000 NS = 128 +* 6000 infinity NS = 256 +* +* (+) By default some or all matrices of this order +* are passed to the implicit double shift routine +* CLAHQR and NS is ignored. See ISPEC=1 above +* and comments in IPARM for details. +* +* The asterisks (**) indicate an ad-hoc +* function of N increasing from 10 to 64. +* +* ISPEC=5: Select structured matrix multiply. +* (See ILAENV for details.) Default: 3. +* +* ================================================================ +* Based on contributions by +* Karen Braman and Ralph Byers, Department of Mathematics, +* University of Kansas, USA +* +* ================================================================ +* References: +* K. Braman, R. Byers and R. Mathias, The Multi-Shift QR +* Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 +* Performance, SIAM Journal of Matrix Analysis, volume 23, pages +* 929--947, 2002. +* +* K. Braman, R. Byers and R. Mathias, The Multi-Shift QR +* Algorithm Part II: Aggressive Early Deflation, SIAM Journal +* of Matrix Analysis, volume 23, pages 948--973, 2002. +* +* ================================================================ +* .. Parameters .. +* +* ==== Matrices of order NTINY or smaller must be processed by +* . CLAHQR because of insufficient subdiagonal scratch space. +* . (This is a hard limit.) ==== +* +* ==== NL allocates some local workspace to help small matrices +* . through a rare CLAHQR failure. NL .GT. NTINY = 11 is +* . required and NL .LE. NMIN = ILAENV(ISPEC=1,...) is recom- +* . mended. (The default value of NMIN is 75.) Using NL = 49 +* . allows up to six simultaneous shifts and a 16-by-16 +* . deflation window. ==== +* + INTEGER NTINY + PARAMETER ( NTINY = 11 ) + INTEGER NL + PARAMETER ( NL = 49 ) + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0e0, 0.0e0 ), + $ ONE = ( 1.0e0, 0.0e0 ) ) + REAL RZERO + PARAMETER ( RZERO = 0.0e0 ) +* .. +* .. Local Arrays .. + COMPLEX HL( NL, NL ), WORKL( NL ) +* .. +* .. Local Scalars .. + INTEGER KBOT, NMIN + LOGICAL INITZ, LQUERY, WANTT, WANTZ +* .. +* .. External Functions .. + INTEGER ILAENV + LOGICAL LSAME + EXTERNAL ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CLACPY, CLAHQR, CLAQR0, CLASET, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CMPLX, MAX, MIN, REAL +* .. +* .. Executable Statements .. +* +* ==== Decode and check the input parameters. ==== +* + WANTT = LSAME( JOB, 'S' ) + INITZ = LSAME( COMPZ, 'I' ) + WANTZ = INITZ .OR. LSAME( COMPZ, 'V' ) + WORK( 1 ) = CMPLX( REAL( MAX( 1, N ) ), RZERO ) + LQUERY = LWORK.EQ.-1 +* + INFO = 0 + IF( .NOT.LSAME( JOB, 'E' ) .AND. .NOT.WANTT ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( COMPZ, 'N' ) .AND. .NOT.WANTZ ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN + INFO = -5 + ELSE IF( LDH.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.MAX( 1, N ) ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.NE.0 ) THEN +* +* ==== Quick return in case of invalid argument. ==== +* + CALL XERBLA( 'CHSEQR', -INFO ) + RETURN +* + ELSE IF( N.EQ.0 ) THEN +* +* ==== Quick return in case N = 0; nothing to do. ==== +* + RETURN +* + ELSE IF( LQUERY ) THEN +* +* ==== Quick return in case of a workspace query ==== +* + CALL CLAQR0( WANTT, WANTZ, N, ILO, IHI, H, LDH, W, ILO, IHI, Z, + $ LDZ, WORK, LWORK, INFO ) +* ==== Ensure reported workspace size is backward-compatible with +* . previous LAPACK versions. ==== + WORK( 1 ) = CMPLX( MAX( REAL( WORK( 1 ) ), REAL( MAX( 1, + $ N ) ) ), RZERO ) + RETURN +* + ELSE +* +* ==== copy eigenvalues isolated by CGEBAL ==== +* + IF( ILO.GT.1 ) + $ CALL CCOPY( ILO-1, H, LDH+1, W, 1 ) + IF( IHI.LT.N ) + $ CALL CCOPY( N-IHI, H( IHI+1, IHI+1 ), LDH+1, W( IHI+1 ), 1 ) +* +* ==== Initialize Z, if requested ==== +* + IF( INITZ ) + $ CALL CLASET( 'A', N, N, ZERO, ONE, Z, LDZ ) +* +* ==== Quick return if possible ==== +* + IF( ILO.EQ.IHI ) THEN + W( ILO ) = H( ILO, ILO ) + RETURN + END IF +* +* ==== CLAHQR/CLAQR0 crossover point ==== +* + NMIN = ILAENV( 1, 'CHSEQR', JOB( : 1 ) // COMPZ( : 1 ), N, ILO, + $ IHI, LWORK ) + NMIN = MAX( NTINY, NMIN ) +* +* ==== CLAQR0 for big matrices; CLAHQR for small ones ==== +* + IF( N.GT.NMIN ) THEN + CALL CLAQR0( WANTT, WANTZ, N, ILO, IHI, H, LDH, W, ILO, IHI, + $ Z, LDZ, WORK, LWORK, INFO ) + ELSE +* +* ==== Small matrix ==== +* + CALL CLAHQR( WANTT, WANTZ, N, ILO, IHI, H, LDH, W, ILO, IHI, + $ Z, LDZ, INFO ) +* + IF( INFO.GT.0 ) THEN +* +* ==== A rare CLAHQR failure! CLAQR0 sometimes succeeds +* . when CLAHQR fails. ==== +* + KBOT = INFO +* + IF( N.GE.NL ) THEN +* +* ==== Larger matrices have enough subdiagonal scratch +* . space to call CLAQR0 directly. ==== +* + CALL CLAQR0( WANTT, WANTZ, N, ILO, KBOT, H, LDH, W, + $ ILO, IHI, Z, LDZ, WORK, LWORK, INFO ) +* + ELSE +* +* ==== Tiny matrices don't have enough subdiagonal +* . scratch space to benefit from CLAQR0. Hence, +* . tiny matrices must be copied into a larger +* . array before calling CLAQR0. ==== +* + CALL CLACPY( 'A', N, N, H, LDH, HL, NL ) + HL( N+1, N ) = ZERO + CALL CLASET( 'A', NL, NL-N, ZERO, ZERO, HL( 1, N+1 ), + $ NL ) + CALL CLAQR0( WANTT, WANTZ, NL, ILO, KBOT, HL, NL, W, + $ ILO, IHI, Z, LDZ, WORKL, NL, INFO ) + IF( WANTT .OR. INFO.NE.0 ) + $ CALL CLACPY( 'A', N, N, HL, NL, H, LDH ) + END IF + END IF + END IF +* +* ==== Clear out the trash, if necessary. ==== +* + IF( ( WANTT .OR. INFO.NE.0 ) .AND. N.GT.2 ) + $ CALL CLASET( 'L', N-2, N-2, ZERO, ZERO, H( 3, 1 ), LDH ) +* +* ==== Ensure reported workspace size is backward-compatible with +* . previous LAPACK versions. ==== +* + WORK( 1 ) = CMPLX( MAX( REAL( MAX( 1, N ) ), + $ REAL( WORK( 1 ) ) ), RZERO ) + END IF +* +* ==== End of CHSEQR ==== +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/clabrd.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/clabrd.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,328 @@ + SUBROUTINE CLABRD( M, N, NB, A, LDA, D, E, TAUQ, TAUP, X, LDX, Y, + $ LDY ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER LDA, LDX, LDY, M, N, NB +* .. +* .. Array Arguments .. + REAL D( * ), E( * ) + COMPLEX A( LDA, * ), TAUP( * ), TAUQ( * ), X( LDX, * ), + $ Y( LDY, * ) +* .. +* +* Purpose +* ======= +* +* CLABRD reduces the first NB rows and columns of a complex general +* m by n matrix A to upper or lower real bidiagonal form by a unitary +* transformation Q' * A * P, and returns the matrices X and Y which +* are needed to apply the transformation to the unreduced part of A. +* +* If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower +* bidiagonal form. +* +* This is an auxiliary routine called by CGEBRD +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows in the matrix A. +* +* N (input) INTEGER +* The number of columns in the matrix A. +* +* NB (input) INTEGER +* The number of leading rows and columns of A to be reduced. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the m by n general matrix to be reduced. +* On exit, the first NB rows and columns of the matrix are +* overwritten; the rest of the array is unchanged. +* If m >= n, elements on and below the diagonal in the first NB +* columns, with the array TAUQ, represent the unitary +* matrix Q as a product of elementary reflectors; and +* elements above the diagonal in the first NB rows, with the +* array TAUP, represent the unitary matrix P as a product +* of elementary reflectors. +* If m < n, elements below the diagonal in the first NB +* columns, with the array TAUQ, represent the unitary +* matrix Q as a product of elementary reflectors, and +* elements on and above the diagonal in the first NB rows, +* with the array TAUP, represent the unitary matrix P as +* a product of elementary reflectors. +* See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* D (output) REAL array, dimension (NB) +* The diagonal elements of the first NB rows and columns of +* the reduced matrix. D(i) = A(i,i). +* +* E (output) REAL array, dimension (NB) +* The off-diagonal elements of the first NB rows and columns of +* the reduced matrix. +* +* TAUQ (output) COMPLEX array dimension (NB) +* The scalar factors of the elementary reflectors which +* represent the unitary matrix Q. See Further Details. +* +* TAUP (output) COMPLEX array, dimension (NB) +* The scalar factors of the elementary reflectors which +* represent the unitary matrix P. See Further Details. +* +* X (output) COMPLEX array, dimension (LDX,NB) +* The m-by-nb matrix X required to update the unreduced part +* of A. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,M). +* +* Y (output) COMPLEX array, dimension (LDY,NB) +* The n-by-nb matrix Y required to update the unreduced part +* of A. +* +* LDY (input) INTEGER +* The leading dimension of the array Y. LDY >= max(1,N). +* +* Further Details +* =============== +* +* The matrices Q and P are represented as products of elementary +* reflectors: +* +* Q = H(1) H(2) . . . H(nb) and P = G(1) G(2) . . . G(nb) +* +* Each H(i) and G(i) has the form: +* +* H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' +* +* where tauq and taup are complex scalars, and v and u are complex +* vectors. +* +* If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in +* A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in +* A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). +* +* If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in +* A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in +* A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). +* +* The elements of the vectors v and u together form the m-by-nb matrix +* V and the nb-by-n matrix U' which are needed, with X and Y, to apply +* the transformation to the unreduced part of the matrix, using a block +* update of the form: A := A - V*Y' - X*U'. +* +* The contents of A on exit are illustrated by the following examples +* with nb = 2: +* +* m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): +* +* ( 1 1 u1 u1 u1 ) ( 1 u1 u1 u1 u1 u1 ) +* ( v1 1 1 u2 u2 ) ( 1 1 u2 u2 u2 u2 ) +* ( v1 v2 a a a ) ( v1 1 a a a a ) +* ( v1 v2 a a a ) ( v1 v2 a a a a ) +* ( v1 v2 a a a ) ( v1 v2 a a a a ) +* ( v1 v2 a a a ) +* +* where a denotes an element of the original matrix which is unchanged, +* vi denotes an element of the vector defining H(i), and ui an element +* of the vector defining G(i). +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), + $ ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I + COMPLEX ALPHA +* .. +* .. External Subroutines .. + EXTERNAL CGEMV, CLACGV, CLARFG, CSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) + $ RETURN +* + IF( M.GE.N ) THEN +* +* Reduce to upper bidiagonal form +* + DO 10 I = 1, NB +* +* Update A(i:m,i) +* + CALL CLACGV( I-1, Y( I, 1 ), LDY ) + CALL CGEMV( 'No transpose', M-I+1, I-1, -ONE, A( I, 1 ), + $ LDA, Y( I, 1 ), LDY, ONE, A( I, I ), 1 ) + CALL CLACGV( I-1, Y( I, 1 ), LDY ) + CALL CGEMV( 'No transpose', M-I+1, I-1, -ONE, X( I, 1 ), + $ LDX, A( 1, I ), 1, ONE, A( I, I ), 1 ) +* +* Generate reflection Q(i) to annihilate A(i+1:m,i) +* + ALPHA = A( I, I ) + CALL CLARFG( M-I+1, ALPHA, A( MIN( I+1, M ), I ), 1, + $ TAUQ( I ) ) + D( I ) = ALPHA + IF( I.LT.N ) THEN + A( I, I ) = ONE +* +* Compute Y(i+1:n,i) +* + CALL CGEMV( 'Conjugate transpose', M-I+1, N-I, ONE, + $ A( I, I+1 ), LDA, A( I, I ), 1, ZERO, + $ Y( I+1, I ), 1 ) + CALL CGEMV( 'Conjugate transpose', M-I+1, I-1, ONE, + $ A( I, 1 ), LDA, A( I, I ), 1, ZERO, + $ Y( 1, I ), 1 ) + CALL CGEMV( 'No transpose', N-I, I-1, -ONE, Y( I+1, 1 ), + $ LDY, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 ) + CALL CGEMV( 'Conjugate transpose', M-I+1, I-1, ONE, + $ X( I, 1 ), LDX, A( I, I ), 1, ZERO, + $ Y( 1, I ), 1 ) + CALL CGEMV( 'Conjugate transpose', I-1, N-I, -ONE, + $ A( 1, I+1 ), LDA, Y( 1, I ), 1, ONE, + $ Y( I+1, I ), 1 ) + CALL CSCAL( N-I, TAUQ( I ), Y( I+1, I ), 1 ) +* +* Update A(i,i+1:n) +* + CALL CLACGV( N-I, A( I, I+1 ), LDA ) + CALL CLACGV( I, A( I, 1 ), LDA ) + CALL CGEMV( 'No transpose', N-I, I, -ONE, Y( I+1, 1 ), + $ LDY, A( I, 1 ), LDA, ONE, A( I, I+1 ), LDA ) + CALL CLACGV( I, A( I, 1 ), LDA ) + CALL CLACGV( I-1, X( I, 1 ), LDX ) + CALL CGEMV( 'Conjugate transpose', I-1, N-I, -ONE, + $ A( 1, I+1 ), LDA, X( I, 1 ), LDX, ONE, + $ A( I, I+1 ), LDA ) + CALL CLACGV( I-1, X( I, 1 ), LDX ) +* +* Generate reflection P(i) to annihilate A(i,i+2:n) +* + ALPHA = A( I, I+1 ) + CALL CLARFG( N-I, ALPHA, A( I, MIN( I+2, N ) ), + $ LDA, TAUP( I ) ) + E( I ) = ALPHA + A( I, I+1 ) = ONE +* +* Compute X(i+1:m,i) +* + CALL CGEMV( 'No transpose', M-I, N-I, ONE, A( I+1, I+1 ), + $ LDA, A( I, I+1 ), LDA, ZERO, X( I+1, I ), 1 ) + CALL CGEMV( 'Conjugate transpose', N-I, I, ONE, + $ Y( I+1, 1 ), LDY, A( I, I+1 ), LDA, ZERO, + $ X( 1, I ), 1 ) + CALL CGEMV( 'No transpose', M-I, I, -ONE, A( I+1, 1 ), + $ LDA, X( 1, I ), 1, ONE, X( I+1, I ), 1 ) + CALL CGEMV( 'No transpose', I-1, N-I, ONE, A( 1, I+1 ), + $ LDA, A( I, I+1 ), LDA, ZERO, X( 1, I ), 1 ) + CALL CGEMV( 'No transpose', M-I, I-1, -ONE, X( I+1, 1 ), + $ LDX, X( 1, I ), 1, ONE, X( I+1, I ), 1 ) + CALL CSCAL( M-I, TAUP( I ), X( I+1, I ), 1 ) + CALL CLACGV( N-I, A( I, I+1 ), LDA ) + END IF + 10 CONTINUE + ELSE +* +* Reduce to lower bidiagonal form +* + DO 20 I = 1, NB +* +* Update A(i,i:n) +* + CALL CLACGV( N-I+1, A( I, I ), LDA ) + CALL CLACGV( I-1, A( I, 1 ), LDA ) + CALL CGEMV( 'No transpose', N-I+1, I-1, -ONE, Y( I, 1 ), + $ LDY, A( I, 1 ), LDA, ONE, A( I, I ), LDA ) + CALL CLACGV( I-1, A( I, 1 ), LDA ) + CALL CLACGV( I-1, X( I, 1 ), LDX ) + CALL CGEMV( 'Conjugate transpose', I-1, N-I+1, -ONE, + $ A( 1, I ), LDA, X( I, 1 ), LDX, ONE, A( I, I ), + $ LDA ) + CALL CLACGV( I-1, X( I, 1 ), LDX ) +* +* Generate reflection P(i) to annihilate A(i,i+1:n) +* + ALPHA = A( I, I ) + CALL CLARFG( N-I+1, ALPHA, A( I, MIN( I+1, N ) ), LDA, + $ TAUP( I ) ) + D( I ) = ALPHA + IF( I.LT.M ) THEN + A( I, I ) = ONE +* +* Compute X(i+1:m,i) +* + CALL CGEMV( 'No transpose', M-I, N-I+1, ONE, A( I+1, I ), + $ LDA, A( I, I ), LDA, ZERO, X( I+1, I ), 1 ) + CALL CGEMV( 'Conjugate transpose', N-I+1, I-1, ONE, + $ Y( I, 1 ), LDY, A( I, I ), LDA, ZERO, + $ X( 1, I ), 1 ) + CALL CGEMV( 'No transpose', M-I, I-1, -ONE, A( I+1, 1 ), + $ LDA, X( 1, I ), 1, ONE, X( I+1, I ), 1 ) + CALL CGEMV( 'No transpose', I-1, N-I+1, ONE, A( 1, I ), + $ LDA, A( I, I ), LDA, ZERO, X( 1, I ), 1 ) + CALL CGEMV( 'No transpose', M-I, I-1, -ONE, X( I+1, 1 ), + $ LDX, X( 1, I ), 1, ONE, X( I+1, I ), 1 ) + CALL CSCAL( M-I, TAUP( I ), X( I+1, I ), 1 ) + CALL CLACGV( N-I+1, A( I, I ), LDA ) +* +* Update A(i+1:m,i) +* + CALL CLACGV( I-1, Y( I, 1 ), LDY ) + CALL CGEMV( 'No transpose', M-I, I-1, -ONE, A( I+1, 1 ), + $ LDA, Y( I, 1 ), LDY, ONE, A( I+1, I ), 1 ) + CALL CLACGV( I-1, Y( I, 1 ), LDY ) + CALL CGEMV( 'No transpose', M-I, I, -ONE, X( I+1, 1 ), + $ LDX, A( 1, I ), 1, ONE, A( I+1, I ), 1 ) +* +* Generate reflection Q(i) to annihilate A(i+2:m,i) +* + ALPHA = A( I+1, I ) + CALL CLARFG( M-I, ALPHA, A( MIN( I+2, M ), I ), 1, + $ TAUQ( I ) ) + E( I ) = ALPHA + A( I+1, I ) = ONE +* +* Compute Y(i+1:n,i) +* + CALL CGEMV( 'Conjugate transpose', M-I, N-I, ONE, + $ A( I+1, I+1 ), LDA, A( I+1, I ), 1, ZERO, + $ Y( I+1, I ), 1 ) + CALL CGEMV( 'Conjugate transpose', M-I, I-1, ONE, + $ A( I+1, 1 ), LDA, A( I+1, I ), 1, ZERO, + $ Y( 1, I ), 1 ) + CALL CGEMV( 'No transpose', N-I, I-1, -ONE, Y( I+1, 1 ), + $ LDY, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 ) + CALL CGEMV( 'Conjugate transpose', M-I, I, ONE, + $ X( I+1, 1 ), LDX, A( I+1, I ), 1, ZERO, + $ Y( 1, I ), 1 ) + CALL CGEMV( 'Conjugate transpose', I, N-I, -ONE, + $ A( 1, I+1 ), LDA, Y( 1, I ), 1, ONE, + $ Y( I+1, I ), 1 ) + CALL CSCAL( N-I, TAUQ( I ), Y( I+1, I ), 1 ) + ELSE + CALL CLACGV( N-I+1, A( I, I ), LDA ) + END IF + 20 CONTINUE + END IF + RETURN +* +* End of CLABRD +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/clacgv.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/clacgv.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,60 @@ + SUBROUTINE CLACGV( N, X, INCX ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INCX, N +* .. +* .. Array Arguments .. + COMPLEX X( * ) +* .. +* +* Purpose +* ======= +* +* CLACGV conjugates a complex vector of length N. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The length of the vector X. N >= 0. +* +* X (input/output) COMPLEX array, dimension +* (1+(N-1)*abs(INCX)) +* On entry, the vector of length N to be conjugated. +* On exit, X is overwritten with conjg(X). +* +* INCX (input) INTEGER +* The spacing between successive elements of X. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IOFF +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG +* .. +* .. Executable Statements .. +* + IF( INCX.EQ.1 ) THEN + DO 10 I = 1, N + X( I ) = CONJG( X( I ) ) + 10 CONTINUE + ELSE + IOFF = 1 + IF( INCX.LT.0 ) + $ IOFF = 1 - ( N-1 )*INCX + DO 20 I = 1, N + X( IOFF ) = CONJG( X( IOFF ) ) + IOFF = IOFF + INCX + 20 CONTINUE + END IF + RETURN +* +* End of CLACGV +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/clacn2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/clacn2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,221 @@ + SUBROUTINE CLACN2( N, V, X, EST, KASE, ISAVE ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER KASE, N + REAL EST +* .. +* .. Array Arguments .. + INTEGER ISAVE( 3 ) + COMPLEX V( * ), X( * ) +* .. +* +* Purpose +* ======= +* +* CLACN2 estimates the 1-norm of a square, complex matrix A. +* Reverse communication is used for evaluating matrix-vector products. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix. N >= 1. +* +* V (workspace) COMPLEX array, dimension (N) +* On the final return, V = A*W, where EST = norm(V)/norm(W) +* (W is not returned). +* +* X (input/output) COMPLEX array, dimension (N) +* On an intermediate return, X should be overwritten by +* A * X, if KASE=1, +* A' * X, if KASE=2, +* where A' is the conjugate transpose of A, and CLACN2 must be +* re-called with all the other parameters unchanged. +* +* EST (input/output) REAL +* On entry with KASE = 1 or 2 and ISAVE(1) = 3, EST should be +* unchanged from the previous call to CLACN2. +* On exit, EST is an estimate (a lower bound) for norm(A). +* +* KASE (input/output) INTEGER +* On the initial call to CLACN2, KASE should be 0. +* On an intermediate return, KASE will be 1 or 2, indicating +* whether X should be overwritten by A * X or A' * X. +* On the final return from CLACN2, KASE will again be 0. +* +* ISAVE (input/output) INTEGER array, dimension (3) +* ISAVE is used to save variables between calls to SLACN2 +* +* Further Details +* ======= ======= +* +* Contributed by Nick Higham, University of Manchester. +* Originally named CONEST, dated March 16, 1988. +* +* Reference: N.J. Higham, "FORTRAN codes for estimating the one-norm of +* a real or complex matrix, with applications to condition estimation", +* ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988. +* +* Last modified: April, 1999 +* +* This is a thread safe version of CLACON, which uses the array ISAVE +* in place of a SAVE statement, as follows: +* +* CLACON CLACN2 +* JUMP ISAVE(1) +* J ISAVE(2) +* ITER ISAVE(3) +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + REAL ONE, TWO + PARAMETER ( ONE = 1.0E0, TWO = 2.0E0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E0, 0.0E0 ), + $ CONE = ( 1.0E0, 0.0E0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, JLAST + REAL ABSXI, ALTSGN, ESTOLD, SAFMIN, TEMP +* .. +* .. External Functions .. + INTEGER ICMAX1 + REAL SCSUM1, SLAMCH + EXTERNAL ICMAX1, SCSUM1, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CCOPY +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, REAL +* .. +* .. Executable Statements .. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + IF( KASE.EQ.0 ) THEN + DO 10 I = 1, N + X( I ) = CMPLX( ONE / REAL( N ) ) + 10 CONTINUE + KASE = 1 + ISAVE( 1 ) = 1 + RETURN + END IF +* + GO TO ( 20, 40, 70, 90, 120 )ISAVE( 1 ) +* +* ................ ENTRY (ISAVE( 1 ) = 1) +* FIRST ITERATION. X HAS BEEN OVERWRITTEN BY A*X. +* + 20 CONTINUE + IF( N.EQ.1 ) THEN + V( 1 ) = X( 1 ) + EST = ABS( V( 1 ) ) +* ... QUIT + GO TO 130 + END IF + EST = SCSUM1( N, X, 1 ) +* + DO 30 I = 1, N + ABSXI = ABS( X( I ) ) + IF( ABSXI.GT.SAFMIN ) THEN + X( I ) = CMPLX( REAL( X( I ) ) / ABSXI, + $ AIMAG( X( I ) ) / ABSXI ) + ELSE + X( I ) = CONE + END IF + 30 CONTINUE + KASE = 2 + ISAVE( 1 ) = 2 + RETURN +* +* ................ ENTRY (ISAVE( 1 ) = 2) +* FIRST ITERATION. X HAS BEEN OVERWRITTEN BY CTRANS(A)*X. +* + 40 CONTINUE + ISAVE( 2 ) = ICMAX1( N, X, 1 ) + ISAVE( 3 ) = 2 +* +* MAIN LOOP - ITERATIONS 2,3,...,ITMAX. +* + 50 CONTINUE + DO 60 I = 1, N + X( I ) = CZERO + 60 CONTINUE + X( ISAVE( 2 ) ) = CONE + KASE = 1 + ISAVE( 1 ) = 3 + RETURN +* +* ................ ENTRY (ISAVE( 1 ) = 3) +* X HAS BEEN OVERWRITTEN BY A*X. +* + 70 CONTINUE + CALL CCOPY( N, X, 1, V, 1 ) + ESTOLD = EST + EST = SCSUM1( N, V, 1 ) +* +* TEST FOR CYCLING. + IF( EST.LE.ESTOLD ) + $ GO TO 100 +* + DO 80 I = 1, N + ABSXI = ABS( X( I ) ) + IF( ABSXI.GT.SAFMIN ) THEN + X( I ) = CMPLX( REAL( X( I ) ) / ABSXI, + $ AIMAG( X( I ) ) / ABSXI ) + ELSE + X( I ) = CONE + END IF + 80 CONTINUE + KASE = 2 + ISAVE( 1 ) = 4 + RETURN +* +* ................ ENTRY (ISAVE( 1 ) = 4) +* X HAS BEEN OVERWRITTEN BY CTRANS(A)*X. +* + 90 CONTINUE + JLAST = ISAVE( 2 ) + ISAVE( 2 ) = ICMAX1( N, X, 1 ) + IF( ( ABS( X( JLAST ) ).NE.ABS( X( ISAVE( 2 ) ) ) ) .AND. + $ ( ISAVE( 3 ).LT.ITMAX ) ) THEN + ISAVE( 3 ) = ISAVE( 3 ) + 1 + GO TO 50 + END IF +* +* ITERATION COMPLETE. FINAL STAGE. +* + 100 CONTINUE + ALTSGN = ONE + DO 110 I = 1, N + X( I ) = CMPLX( ALTSGN*( ONE + REAL( I-1 ) / REAL( N-1 ) ) ) + ALTSGN = -ALTSGN + 110 CONTINUE + KASE = 1 + ISAVE( 1 ) = 5 + RETURN +* +* ................ ENTRY (ISAVE( 1 ) = 5) +* X HAS BEEN OVERWRITTEN BY A*X. +* + 120 CONTINUE + TEMP = TWO*( SCSUM1( N, X, 1 ) / REAL( 3*N ) ) + IF( TEMP.GT.EST ) THEN + CALL CCOPY( N, X, 1, V, 1 ) + EST = TEMP + END IF +* + 130 CONTINUE + KASE = 0 + RETURN +* +* End of CLACN2 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/clacon.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/clacon.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,212 @@ + SUBROUTINE CLACON( N, V, X, EST, KASE ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER KASE, N + REAL EST +* .. +* .. Array Arguments .. + COMPLEX V( N ), X( N ) +* .. +* +* Purpose +* ======= +* +* CLACON estimates the 1-norm of a square, complex matrix A. +* Reverse communication is used for evaluating matrix-vector products. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix. N >= 1. +* +* V (workspace) COMPLEX array, dimension (N) +* On the final return, V = A*W, where EST = norm(V)/norm(W) +* (W is not returned). +* +* X (input/output) COMPLEX array, dimension (N) +* On an intermediate return, X should be overwritten by +* A * X, if KASE=1, +* A' * X, if KASE=2, +* where A' is the conjugate transpose of A, and CLACON must be +* re-called with all the other parameters unchanged. +* +* EST (input/output) REAL +* On entry with KASE = 1 or 2 and JUMP = 3, EST should be +* unchanged from the previous call to CLACON. +* On exit, EST is an estimate (a lower bound) for norm(A). +* +* KASE (input/output) INTEGER +* On the initial call to CLACON, KASE should be 0. +* On an intermediate return, KASE will be 1 or 2, indicating +* whether X should be overwritten by A * X or A' * X. +* On the final return from CLACON, KASE will again be 0. +* +* Further Details +* ======= ======= +* +* Contributed by Nick Higham, University of Manchester. +* Originally named CONEST, dated March 16, 1988. +* +* Reference: N.J. Higham, "FORTRAN codes for estimating the one-norm of +* a real or complex matrix, with applications to condition estimation", +* ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988. +* +* Last modified: April, 1999 +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + REAL ONE, TWO + PARAMETER ( ONE = 1.0E0, TWO = 2.0E0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E0, 0.0E0 ), + $ CONE = ( 1.0E0, 0.0E0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, ITER, J, JLAST, JUMP + REAL ABSXI, ALTSGN, ESTOLD, SAFMIN, TEMP +* .. +* .. External Functions .. + INTEGER ICMAX1 + REAL SCSUM1, SLAMCH + EXTERNAL ICMAX1, SCSUM1, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CCOPY +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, REAL +* .. +* .. Save statement .. + SAVE +* .. +* .. Executable Statements .. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + IF( KASE.EQ.0 ) THEN + DO 10 I = 1, N + X( I ) = CMPLX( ONE / REAL( N ) ) + 10 CONTINUE + KASE = 1 + JUMP = 1 + RETURN + END IF +* + GO TO ( 20, 40, 70, 90, 120 )JUMP +* +* ................ ENTRY (JUMP = 1) +* FIRST ITERATION. X HAS BEEN OVERWRITTEN BY A*X. +* + 20 CONTINUE + IF( N.EQ.1 ) THEN + V( 1 ) = X( 1 ) + EST = ABS( V( 1 ) ) +* ... QUIT + GO TO 130 + END IF + EST = SCSUM1( N, X, 1 ) +* + DO 30 I = 1, N + ABSXI = ABS( X( I ) ) + IF( ABSXI.GT.SAFMIN ) THEN + X( I ) = CMPLX( REAL( X( I ) ) / ABSXI, + $ AIMAG( X( I ) ) / ABSXI ) + ELSE + X( I ) = CONE + END IF + 30 CONTINUE + KASE = 2 + JUMP = 2 + RETURN +* +* ................ ENTRY (JUMP = 2) +* FIRST ITERATION. X HAS BEEN OVERWRITTEN BY CTRANS(A)*X. +* + 40 CONTINUE + J = ICMAX1( N, X, 1 ) + ITER = 2 +* +* MAIN LOOP - ITERATIONS 2,3,...,ITMAX. +* + 50 CONTINUE + DO 60 I = 1, N + X( I ) = CZERO + 60 CONTINUE + X( J ) = CONE + KASE = 1 + JUMP = 3 + RETURN +* +* ................ ENTRY (JUMP = 3) +* X HAS BEEN OVERWRITTEN BY A*X. +* + 70 CONTINUE + CALL CCOPY( N, X, 1, V, 1 ) + ESTOLD = EST + EST = SCSUM1( N, V, 1 ) +* +* TEST FOR CYCLING. + IF( EST.LE.ESTOLD ) + $ GO TO 100 +* + DO 80 I = 1, N + ABSXI = ABS( X( I ) ) + IF( ABSXI.GT.SAFMIN ) THEN + X( I ) = CMPLX( REAL( X( I ) ) / ABSXI, + $ AIMAG( X( I ) ) / ABSXI ) + ELSE + X( I ) = CONE + END IF + 80 CONTINUE + KASE = 2 + JUMP = 4 + RETURN +* +* ................ ENTRY (JUMP = 4) +* X HAS BEEN OVERWRITTEN BY CTRANS(A)*X. +* + 90 CONTINUE + JLAST = J + J = ICMAX1( N, X, 1 ) + IF( ( ABS( X( JLAST ) ).NE.ABS( X( J ) ) ) .AND. + $ ( ITER.LT.ITMAX ) ) THEN + ITER = ITER + 1 + GO TO 50 + END IF +* +* ITERATION COMPLETE. FINAL STAGE. +* + 100 CONTINUE + ALTSGN = ONE + DO 110 I = 1, N + X( I ) = CMPLX( ALTSGN*( ONE+REAL( I-1 ) / REAL( N-1 ) ) ) + ALTSGN = -ALTSGN + 110 CONTINUE + KASE = 1 + JUMP = 5 + RETURN +* +* ................ ENTRY (JUMP = 5) +* X HAS BEEN OVERWRITTEN BY A*X. +* + 120 CONTINUE + TEMP = TWO*( SCSUM1( N, X, 1 ) / REAL( 3*N ) ) + IF( TEMP.GT.EST ) THEN + CALL CCOPY( N, X, 1, V, 1 ) + EST = TEMP + END IF +* + 130 CONTINUE + KASE = 0 + RETURN +* +* End of CLACON +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/clacpy.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/clacpy.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,90 @@ + SUBROUTINE CLACPY( UPLO, M, N, A, LDA, B, LDB ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER LDA, LDB, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* CLACPY copies all or part of a two-dimensional matrix A to another +* matrix B. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies the part of the matrix A to be copied to B. +* = 'U': Upper triangular part +* = 'L': Lower triangular part +* Otherwise: All of the matrix A +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input) COMPLEX array, dimension (LDA,N) +* The m by n matrix A. If UPLO = 'U', only the upper trapezium +* is accessed; if UPLO = 'L', only the lower trapezium is +* accessed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (output) COMPLEX array, dimension (LDB,N) +* On exit, B = A in the locations specified by UPLO. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,M). +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = 1, MIN( J, M ) + B( I, J ) = A( I, J ) + 10 CONTINUE + 20 CONTINUE +* + ELSE IF( LSAME( UPLO, 'L' ) ) THEN + DO 40 J = 1, N + DO 30 I = J, M + B( I, J ) = A( I, J ) + 30 CONTINUE + 40 CONTINUE +* + ELSE + DO 60 J = 1, N + DO 50 I = 1, M + B( I, J ) = A( I, J ) + 50 CONTINUE + 60 CONTINUE + END IF +* + RETURN +* +* End of CLACPY +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cladiv.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cladiv.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,46 @@ + COMPLEX FUNCTION CLADIV( X, Y ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + COMPLEX X, Y +* .. +* +* Purpose +* ======= +* +* CLADIV := X / Y, where X and Y are complex. The computation of X / Y +* will not overflow on an intermediary step unless the results +* overflows. +* +* Arguments +* ========= +* +* X (input) COMPLEX +* Y (input) COMPLEX +* The complex scalars X and Y. +* +* ===================================================================== +* +* .. Local Scalars .. + REAL ZI, ZR +* .. +* .. External Subroutines .. + EXTERNAL SLADIV +* .. +* .. Intrinsic Functions .. + INTRINSIC AIMAG, CMPLX, REAL +* .. +* .. Executable Statements .. +* + CALL SLADIV( REAL( X ), AIMAG( X ), REAL( Y ), AIMAG( Y ), ZR, + $ ZI ) + CLADIV = CMPLX( ZR, ZI ) +* + RETURN +* +* End of CLADIV +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/clahqr.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/clahqr.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,469 @@ + SUBROUTINE CLAHQR( WANTT, WANTZ, N, ILO, IHI, H, LDH, W, ILOZ, + $ IHIZ, Z, LDZ, INFO ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER IHI, IHIZ, ILO, ILOZ, INFO, LDH, LDZ, N + LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. + COMPLEX H( LDH, * ), W( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* CLAHQR is an auxiliary routine called by CHSEQR to update the +* eigenvalues and Schur decomposition already computed by CHSEQR, by +* dealing with the Hessenberg submatrix in rows and columns ILO to +* IHI. +* +* Arguments +* ========= +* +* WANTT (input) LOGICAL +* = .TRUE. : the full Schur form T is required; +* = .FALSE.: only eigenvalues are required. +* +* WANTZ (input) LOGICAL +* = .TRUE. : the matrix of Schur vectors Z is required; +* = .FALSE.: Schur vectors are not required. +* +* N (input) INTEGER +* The order of the matrix H. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* It is assumed that H is already upper triangular in rows and +* columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless ILO = 1). +* CLAHQR works primarily with the Hessenberg submatrix in rows +* and columns ILO to IHI, but applies transformations to all of +* H if WANTT is .TRUE.. +* 1 <= ILO <= max(1,IHI); IHI <= N. +* +* H (input/output) COMPLEX array, dimension (LDH,N) +* On entry, the upper Hessenberg matrix H. +* On exit, if INFO is zero and if WANTT is .TRUE., then H +* is upper triangular in rows and columns ILO:IHI. If INFO +* is zero and if WANTT is .FALSE., then the contents of H +* are unspecified on exit. The output state of H in case +* INF is positive is below under the description of INFO. +* +* LDH (input) INTEGER +* The leading dimension of the array H. LDH >= max(1,N). +* +* W (output) COMPLEX array, dimension (N) +* The computed eigenvalues ILO to IHI are stored in the +* corresponding elements of W. If WANTT is .TRUE., the +* eigenvalues are stored in the same order as on the diagonal +* of the Schur form returned in H, with W(i) = H(i,i). +* +* ILOZ (input) INTEGER +* IHIZ (input) INTEGER +* Specify the rows of Z to which transformations must be +* applied if WANTZ is .TRUE.. +* 1 <= ILOZ <= ILO; IHI <= IHIZ <= N. +* +* Z (input/output) COMPLEX array, dimension (LDZ,N) +* If WANTZ is .TRUE., on entry Z must contain the current +* matrix Z of transformations accumulated by CHSEQR, and on +* exit Z has been updated; transformations are applied only to +* the submatrix Z(ILOZ:IHIZ,ILO:IHI). +* If WANTZ is .FALSE., Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* .GT. 0: if INFO = i, CLAHQR failed to compute all the +* eigenvalues ILO to IHI in a total of 30 iterations +* per eigenvalue; elements i+1:ihi of W contain +* those eigenvalues which have been successfully +* computed. +* +* If INFO .GT. 0 and WANTT is .FALSE., then on exit, +* the remaining unconverged eigenvalues are the +* eigenvalues of the upper Hessenberg matrix +* rows and columns ILO thorugh INFO of the final, +* output value of H. +* +* If INFO .GT. 0 and WANTT is .TRUE., then on exit +* (*) (initial value of H)*U = U*(final value of H) +* where U is an orthognal matrix. The final +* value of H is upper Hessenberg and triangular in +* rows and columns INFO+1 through IHI. +* +* If INFO .GT. 0 and WANTZ is .TRUE., then on exit +* (final value of Z) = (initial value of Z)*U +* where U is the orthogonal matrix in (*) +* (regardless of the value of WANTT.) +* +* Further Details +* =============== +* +* 02-96 Based on modifications by +* David Day, Sandia National Laboratory, USA +* +* 12-04 Further modifications by +* Ralph Byers, University of Kansas, USA +* +* This is a modified version of CLAHQR from LAPACK version 3.0. +* It is (1) more robust against overflow and underflow and +* (2) adopts the more conservative Ahues & Tisseur stopping +* criterion (LAWN 122, 1997). +* +* ========================================================= +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 30 ) + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0e0, 0.0e0 ), + $ ONE = ( 1.0e0, 0.0e0 ) ) + REAL RZERO, RONE, HALF + PARAMETER ( RZERO = 0.0e0, RONE = 1.0e0, HALF = 0.5e0 ) + REAL DAT1 + PARAMETER ( DAT1 = 3.0e0 / 4.0e0 ) +* .. +* .. Local Scalars .. + COMPLEX CDUM, H11, H11S, H22, SC, SUM, T, T1, TEMP, U, + $ V2, X, Y + REAL AA, AB, BA, BB, H10, H21, RTEMP, S, SAFMAX, + $ SAFMIN, SMLNUM, SX, T2, TST, ULP + INTEGER I, I1, I2, ITS, J, JHI, JLO, K, L, M, NH, NZ +* .. +* .. Local Arrays .. + COMPLEX V( 2 ) +* .. +* .. External Functions .. + COMPLEX CLADIV + REAL SLAMCH + EXTERNAL CLADIV, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CLARFG, CSCAL, SLABAD +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CONJG, MAX, MIN, REAL, SQRT +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) ) +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN + IF( ILO.EQ.IHI ) THEN + W( ILO ) = H( ILO, ILO ) + RETURN + END IF +* +* ==== clear out the trash ==== + DO 10 J = ILO, IHI - 3 + H( J+2, J ) = ZERO + H( J+3, J ) = ZERO + 10 CONTINUE + IF( ILO.LE.IHI-2 ) + $ H( IHI, IHI-2 ) = ZERO +* ==== ensure that subdiagonal entries are real ==== + DO 20 I = ILO + 1, IHI + IF( AIMAG( H( I, I-1 ) ).NE.RZERO ) THEN +* ==== The following redundant normalization +* . avoids problems with both gradual and +* . sudden underflow in ABS(H(I,I-1)) ==== + SC = H( I, I-1 ) / CABS1( H( I, I-1 ) ) + SC = CONJG( SC ) / ABS( SC ) + H( I, I-1 ) = ABS( H( I, I-1 ) ) + IF( WANTT ) THEN + JLO = 1 + JHI = N + ELSE + JLO = ILO + JHI = IHI + END IF + CALL CSCAL( JHI-I+1, SC, H( I, I ), LDH ) + CALL CSCAL( MIN( JHI, I+1 )-JLO+1, CONJG( SC ), H( JLO, I ), + $ 1 ) + IF( WANTZ ) + $ CALL CSCAL( IHIZ-ILOZ+1, CONJG( SC ), Z( ILOZ, I ), 1 ) + END IF + 20 CONTINUE +* + NH = IHI - ILO + 1 + NZ = IHIZ - ILOZ + 1 +* +* Set machine-dependent constants for the stopping criterion. +* + SAFMIN = SLAMCH( 'SAFE MINIMUM' ) + SAFMAX = RONE / SAFMIN + CALL SLABAD( SAFMIN, SAFMAX ) + ULP = SLAMCH( 'PRECISION' ) + SMLNUM = SAFMIN*( REAL( NH ) / ULP ) +* +* I1 and I2 are the indices of the first row and last column of H +* to which transformations must be applied. If eigenvalues only are +* being computed, I1 and I2 are set inside the main loop. +* + IF( WANTT ) THEN + I1 = 1 + I2 = N + END IF +* +* The main loop begins here. I is the loop index and decreases from +* IHI to ILO in steps of 1. Each iteration of the loop works +* with the active submatrix in rows and columns L to I. +* Eigenvalues I+1 to IHI have already converged. Either L = ILO, or +* H(L,L-1) is negligible so that the matrix splits. +* + I = IHI + 30 CONTINUE + IF( I.LT.ILO ) + $ GO TO 150 +* +* Perform QR iterations on rows and columns ILO to I until a +* submatrix of order 1 splits off at the bottom because a +* subdiagonal element has become negligible. +* + L = ILO + DO 130 ITS = 0, ITMAX +* +* Look for a single small subdiagonal element. +* + DO 40 K = I, L + 1, -1 + IF( CABS1( H( K, K-1 ) ).LE.SMLNUM ) + $ GO TO 50 + TST = CABS1( H( K-1, K-1 ) ) + CABS1( H( K, K ) ) + IF( TST.EQ.ZERO ) THEN + IF( K-2.GE.ILO ) + $ TST = TST + ABS( REAL( H( K-1, K-2 ) ) ) + IF( K+1.LE.IHI ) + $ TST = TST + ABS( REAL( H( K+1, K ) ) ) + END IF +* ==== The following is a conservative small subdiagonal +* . deflation criterion due to Ahues & Tisseur (LAWN 122, +* . 1997). It has better mathematical foundation and +* . improves accuracy in some examples. ==== + IF( ABS( REAL( H( K, K-1 ) ) ).LE.ULP*TST ) THEN + AB = MAX( CABS1( H( K, K-1 ) ), CABS1( H( K-1, K ) ) ) + BA = MIN( CABS1( H( K, K-1 ) ), CABS1( H( K-1, K ) ) ) + AA = MAX( CABS1( H( K, K ) ), + $ CABS1( H( K-1, K-1 )-H( K, K ) ) ) + BB = MIN( CABS1( H( K, K ) ), + $ CABS1( H( K-1, K-1 )-H( K, K ) ) ) + S = AA + AB + IF( BA*( AB / S ).LE.MAX( SMLNUM, + $ ULP*( BB*( AA / S ) ) ) )GO TO 50 + END IF + 40 CONTINUE + 50 CONTINUE + L = K + IF( L.GT.ILO ) THEN +* +* H(L,L-1) is negligible +* + H( L, L-1 ) = ZERO + END IF +* +* Exit from loop if a submatrix of order 1 has split off. +* + IF( L.GE.I ) + $ GO TO 140 +* +* Now the active submatrix is in rows and columns L to I. If +* eigenvalues only are being computed, only the active submatrix +* need be transformed. +* + IF( .NOT.WANTT ) THEN + I1 = L + I2 = I + END IF +* + IF( ITS.EQ.10 .OR. ITS.EQ.20 ) THEN +* +* Exceptional shift. +* + S = DAT1*ABS( REAL( H( I, I-1 ) ) ) + T = S + H( I, I ) + ELSE +* +* Wilkinson's shift. +* + T = H( I, I ) + U = SQRT( H( I-1, I ) )*SQRT( H( I, I-1 ) ) + S = CABS1( U ) + IF( S.NE.RZERO ) THEN + X = HALF*( H( I-1, I-1 )-T ) + SX = CABS1( X ) + S = MAX( S, CABS1( X ) ) + Y = S*SQRT( ( X / S )**2+( U / S )**2 ) + IF( SX.GT.RZERO ) THEN + IF( REAL( X / SX )*REAL( Y )+AIMAG( X / SX )* + $ AIMAG( Y ).LT.RZERO )Y = -Y + END IF + T = T - U*CLADIV( U, ( X+Y ) ) + END IF + END IF +* +* Look for two consecutive small subdiagonal elements. +* + DO 60 M = I - 1, L + 1, -1 +* +* Determine the effect of starting the single-shift QR +* iteration at row M, and see if this would make H(M,M-1) +* negligible. +* + H11 = H( M, M ) + H22 = H( M+1, M+1 ) + H11S = H11 - T + H21 = H( M+1, M ) + S = CABS1( H11S ) + ABS( H21 ) + H11S = H11S / S + H21 = H21 / S + V( 1 ) = H11S + V( 2 ) = H21 + H10 = H( M, M-1 ) + IF( ABS( H10 )*ABS( H21 ).LE.ULP* + $ ( CABS1( H11S )*( CABS1( H11 )+CABS1( H22 ) ) ) ) + $ GO TO 70 + 60 CONTINUE + H11 = H( L, L ) + H22 = H( L+1, L+1 ) + H11S = H11 - T + H21 = H( L+1, L ) + S = CABS1( H11S ) + ABS( H21 ) + H11S = H11S / S + H21 = H21 / S + V( 1 ) = H11S + V( 2 ) = H21 + 70 CONTINUE +* +* Single-shift QR step +* + DO 120 K = M, I - 1 +* +* The first iteration of this loop determines a reflection G +* from the vector V and applies it from left and right to H, +* thus creating a nonzero bulge below the subdiagonal. +* +* Each subsequent iteration determines a reflection G to +* restore the Hessenberg form in the (K-1)th column, and thus +* chases the bulge one step toward the bottom of the active +* submatrix. +* +* V(2) is always real before the call to CLARFG, and hence +* after the call T2 ( = T1*V(2) ) is also real. +* + IF( K.GT.M ) + $ CALL CCOPY( 2, H( K, K-1 ), 1, V, 1 ) + CALL CLARFG( 2, V( 1 ), V( 2 ), 1, T1 ) + IF( K.GT.M ) THEN + H( K, K-1 ) = V( 1 ) + H( K+1, K-1 ) = ZERO + END IF + V2 = V( 2 ) + T2 = REAL( T1*V2 ) +* +* Apply G from the left to transform the rows of the matrix +* in columns K to I2. +* + DO 80 J = K, I2 + SUM = CONJG( T1 )*H( K, J ) + T2*H( K+1, J ) + H( K, J ) = H( K, J ) - SUM + H( K+1, J ) = H( K+1, J ) - SUM*V2 + 80 CONTINUE +* +* Apply G from the right to transform the columns of the +* matrix in rows I1 to min(K+2,I). +* + DO 90 J = I1, MIN( K+2, I ) + SUM = T1*H( J, K ) + T2*H( J, K+1 ) + H( J, K ) = H( J, K ) - SUM + H( J, K+1 ) = H( J, K+1 ) - SUM*CONJG( V2 ) + 90 CONTINUE +* + IF( WANTZ ) THEN +* +* Accumulate transformations in the matrix Z +* + DO 100 J = ILOZ, IHIZ + SUM = T1*Z( J, K ) + T2*Z( J, K+1 ) + Z( J, K ) = Z( J, K ) - SUM + Z( J, K+1 ) = Z( J, K+1 ) - SUM*CONJG( V2 ) + 100 CONTINUE + END IF +* + IF( K.EQ.M .AND. M.GT.L ) THEN +* +* If the QR step was started at row M > L because two +* consecutive small subdiagonals were found, then extra +* scaling must be performed to ensure that H(M,M-1) remains +* real. +* + TEMP = ONE - T1 + TEMP = TEMP / ABS( TEMP ) + H( M+1, M ) = H( M+1, M )*CONJG( TEMP ) + IF( M+2.LE.I ) + $ H( M+2, M+1 ) = H( M+2, M+1 )*TEMP + DO 110 J = M, I + IF( J.NE.M+1 ) THEN + IF( I2.GT.J ) + $ CALL CSCAL( I2-J, TEMP, H( J, J+1 ), LDH ) + CALL CSCAL( J-I1, CONJG( TEMP ), H( I1, J ), 1 ) + IF( WANTZ ) THEN + CALL CSCAL( NZ, CONJG( TEMP ), Z( ILOZ, J ), 1 ) + END IF + END IF + 110 CONTINUE + END IF + 120 CONTINUE +* +* Ensure that H(I,I-1) is real. +* + TEMP = H( I, I-1 ) + IF( AIMAG( TEMP ).NE.RZERO ) THEN + RTEMP = ABS( TEMP ) + H( I, I-1 ) = RTEMP + TEMP = TEMP / RTEMP + IF( I2.GT.I ) + $ CALL CSCAL( I2-I, CONJG( TEMP ), H( I, I+1 ), LDH ) + CALL CSCAL( I-I1, TEMP, H( I1, I ), 1 ) + IF( WANTZ ) THEN + CALL CSCAL( NZ, TEMP, Z( ILOZ, I ), 1 ) + END IF + END IF +* + 130 CONTINUE +* +* Failure to converge in remaining number of iterations +* + INFO = I + RETURN +* + 140 CONTINUE +* +* H(I,I-1) is negligible: one eigenvalue has converged. +* + W( I ) = H( I, I ) +* +* return to start of the main loop with new value of I. +* + I = L - 1 + GO TO 30 +* + 150 CONTINUE + RETURN +* +* End of CLAHQR +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/clahr2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/clahr2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,240 @@ + SUBROUTINE CLAHR2( N, K, NB, A, LDA, TAU, T, LDT, Y, LDY ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER K, LDA, LDT, LDY, N, NB +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), T( LDT, NB ), TAU( NB ), + $ Y( LDY, NB ) +* .. +* +* Purpose +* ======= +* +* CLAHR2 reduces the first NB columns of A complex general n-BY-(n-k+1) +* matrix A so that elements below the k-th subdiagonal are zero. The +* reduction is performed by an unitary similarity transformation +* Q' * A * Q. The routine returns the matrices V and T which determine +* Q as a block reflector I - V*T*V', and also the matrix Y = A * V * T. +* +* This is an auxiliary routine called by CGEHRD. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. +* +* K (input) INTEGER +* The offset for the reduction. Elements below the k-th +* subdiagonal in the first NB columns are reduced to zero. +* K < N. +* +* NB (input) INTEGER +* The number of columns to be reduced. +* +* A (input/output) COMPLEX array, dimension (LDA,N-K+1) +* On entry, the n-by-(n-k+1) general matrix A. +* On exit, the elements on and above the k-th subdiagonal in +* the first NB columns are overwritten with the corresponding +* elements of the reduced matrix; the elements below the k-th +* subdiagonal, with the array TAU, represent the matrix Q as a +* product of elementary reflectors. The other columns of A are +* unchanged. See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* TAU (output) COMPLEX array, dimension (NB) +* The scalar factors of the elementary reflectors. See Further +* Details. +* +* T (output) COMPLEX array, dimension (LDT,NB) +* The upper triangular matrix T. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= NB. +* +* Y (output) COMPLEX array, dimension (LDY,NB) +* The n-by-nb matrix Y. +* +* LDY (input) INTEGER +* The leading dimension of the array Y. LDY >= N. +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of nb elementary reflectors +* +* Q = H(1) H(2) . . . H(nb). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in +* A(i+k+1:n,i), and tau in TAU(i). +* +* The elements of the vectors v together form the (n-k+1)-by-nb matrix +* V which is needed, with T and Y, to apply the transformation to the +* unreduced part of the matrix, using an update of the form: +* A := (I - V*T*V') * (A - Y*V'). +* +* The contents of A on exit are illustrated by the following example +* with n = 7, k = 3 and nb = 2: +* +* ( a a a a a ) +* ( a a a a a ) +* ( a a a a a ) +* ( h h a a a ) +* ( v1 h a a a ) +* ( v1 v2 a a a ) +* ( v1 v2 a a a ) +* +* where a denotes an element of the original matrix A, h denotes a +* modified element of the upper Hessenberg matrix H, and vi denotes an +* element of the vector defining H(i). +* +* This file is a slight modification of LAPACK-3.0's CLAHRD +* incorporating improvements proposed by Quintana-Orti and Van de +* Gejin. Note that the entries of A(1:K,2:NB) differ from those +* returned by the original LAPACK routine. This function is +* not backward compatible with LAPACK3.0. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), + $ ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I + COMPLEX EI +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CCOPY, CGEMM, CGEMV, CLACPY, + $ CLARFG, CSCAL, CTRMM, CTRMV, CLACGV +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.1 ) + $ RETURN +* + DO 10 I = 1, NB + IF( I.GT.1 ) THEN +* +* Update A(K+1:N,I) +* +* Update I-th column of A - Y * V' +* + CALL CLACGV( I-1, A( K+I-1, 1 ), LDA ) + CALL CGEMV( 'NO TRANSPOSE', N-K, I-1, -ONE, Y(K+1,1), LDY, + $ A( K+I-1, 1 ), LDA, ONE, A( K+1, I ), 1 ) + CALL CLACGV( I-1, A( K+I-1, 1 ), LDA ) +* +* Apply I - V * T' * V' to this column (call it b) from the +* left, using the last column of T as workspace +* +* Let V = ( V1 ) and b = ( b1 ) (first I-1 rows) +* ( V2 ) ( b2 ) +* +* where V1 is unit lower triangular +* +* w := V1' * b1 +* + CALL CCOPY( I-1, A( K+1, I ), 1, T( 1, NB ), 1 ) + CALL CTRMV( 'Lower', 'Conjugate transpose', 'UNIT', + $ I-1, A( K+1, 1 ), + $ LDA, T( 1, NB ), 1 ) +* +* w := w + V2'*b2 +* + CALL CGEMV( 'Conjugate transpose', N-K-I+1, I-1, + $ ONE, A( K+I, 1 ), + $ LDA, A( K+I, I ), 1, ONE, T( 1, NB ), 1 ) +* +* w := T'*w +* + CALL CTRMV( 'Upper', 'Conjugate transpose', 'NON-UNIT', + $ I-1, T, LDT, + $ T( 1, NB ), 1 ) +* +* b2 := b2 - V2*w +* + CALL CGEMV( 'NO TRANSPOSE', N-K-I+1, I-1, -ONE, + $ A( K+I, 1 ), + $ LDA, T( 1, NB ), 1, ONE, A( K+I, I ), 1 ) +* +* b1 := b1 - V1*w +* + CALL CTRMV( 'Lower', 'NO TRANSPOSE', + $ 'UNIT', I-1, + $ A( K+1, 1 ), LDA, T( 1, NB ), 1 ) + CALL CAXPY( I-1, -ONE, T( 1, NB ), 1, A( K+1, I ), 1 ) +* + A( K+I-1, I-1 ) = EI + END IF +* +* Generate the elementary reflector H(I) to annihilate +* A(K+I+1:N,I) +* + CALL CLARFG( N-K-I+1, A( K+I, I ), A( MIN( K+I+1, N ), I ), 1, + $ TAU( I ) ) + EI = A( K+I, I ) + A( K+I, I ) = ONE +* +* Compute Y(K+1:N,I) +* + CALL CGEMV( 'NO TRANSPOSE', N-K, N-K-I+1, + $ ONE, A( K+1, I+1 ), + $ LDA, A( K+I, I ), 1, ZERO, Y( K+1, I ), 1 ) + CALL CGEMV( 'Conjugate transpose', N-K-I+1, I-1, + $ ONE, A( K+I, 1 ), LDA, + $ A( K+I, I ), 1, ZERO, T( 1, I ), 1 ) + CALL CGEMV( 'NO TRANSPOSE', N-K, I-1, -ONE, + $ Y( K+1, 1 ), LDY, + $ T( 1, I ), 1, ONE, Y( K+1, I ), 1 ) + CALL CSCAL( N-K, TAU( I ), Y( K+1, I ), 1 ) +* +* Compute T(1:I,I) +* + CALL CSCAL( I-1, -TAU( I ), T( 1, I ), 1 ) + CALL CTRMV( 'Upper', 'No Transpose', 'NON-UNIT', + $ I-1, T, LDT, + $ T( 1, I ), 1 ) + T( I, I ) = TAU( I ) +* + 10 CONTINUE + A( K+NB, NB ) = EI +* +* Compute Y(1:K,1:NB) +* + CALL CLACPY( 'ALL', K, NB, A( 1, 2 ), LDA, Y, LDY ) + CALL CTRMM( 'RIGHT', 'Lower', 'NO TRANSPOSE', + $ 'UNIT', K, NB, + $ ONE, A( K+1, 1 ), LDA, Y, LDY ) + IF( N.GT.K+NB ) + $ CALL CGEMM( 'NO TRANSPOSE', 'NO TRANSPOSE', K, + $ NB, N-K-NB, ONE, + $ A( 1, 2+NB ), LDA, A( K+1+NB, 1 ), LDA, ONE, Y, + $ LDY ) + CALL CTRMM( 'RIGHT', 'Upper', 'NO TRANSPOSE', + $ 'NON-UNIT', K, NB, + $ ONE, T, LDT, Y, LDY ) +* + RETURN +* +* End of CLAHR2 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/clahrd.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/clahrd.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,213 @@ + SUBROUTINE CLAHRD( N, K, NB, A, LDA, TAU, T, LDT, Y, LDY ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER K, LDA, LDT, LDY, N, NB +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), T( LDT, NB ), TAU( NB ), + $ Y( LDY, NB ) +* .. +* +* Purpose +* ======= +* +* CLAHRD reduces the first NB columns of a complex general n-by-(n-k+1) +* matrix A so that elements below the k-th subdiagonal are zero. The +* reduction is performed by a unitary similarity transformation +* Q' * A * Q. The routine returns the matrices V and T which determine +* Q as a block reflector I - V*T*V', and also the matrix Y = A * V * T. +* +* This is an OBSOLETE auxiliary routine. +* This routine will be 'deprecated' in a future release. +* Please use the new routine CLAHR2 instead. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. +* +* K (input) INTEGER +* The offset for the reduction. Elements below the k-th +* subdiagonal in the first NB columns are reduced to zero. +* +* NB (input) INTEGER +* The number of columns to be reduced. +* +* A (input/output) COMPLEX array, dimension (LDA,N-K+1) +* On entry, the n-by-(n-k+1) general matrix A. +* On exit, the elements on and above the k-th subdiagonal in +* the first NB columns are overwritten with the corresponding +* elements of the reduced matrix; the elements below the k-th +* subdiagonal, with the array TAU, represent the matrix Q as a +* product of elementary reflectors. The other columns of A are +* unchanged. See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* TAU (output) COMPLEX array, dimension (NB) +* The scalar factors of the elementary reflectors. See Further +* Details. +* +* T (output) COMPLEX array, dimension (LDT,NB) +* The upper triangular matrix T. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= NB. +* +* Y (output) COMPLEX array, dimension (LDY,NB) +* The n-by-nb matrix Y. +* +* LDY (input) INTEGER +* The leading dimension of the array Y. LDY >= max(1,N). +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of nb elementary reflectors +* +* Q = H(1) H(2) . . . H(nb). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in +* A(i+k+1:n,i), and tau in TAU(i). +* +* The elements of the vectors v together form the (n-k+1)-by-nb matrix +* V which is needed, with T and Y, to apply the transformation to the +* unreduced part of the matrix, using an update of the form: +* A := (I - V*T*V') * (A - Y*V'). +* +* The contents of A on exit are illustrated by the following example +* with n = 7, k = 3 and nb = 2: +* +* ( a h a a a ) +* ( a h a a a ) +* ( a h a a a ) +* ( h h a a a ) +* ( v1 h a a a ) +* ( v1 v2 a a a ) +* ( v1 v2 a a a ) +* +* where a denotes an element of the original matrix A, h denotes a +* modified element of the upper Hessenberg matrix H, and vi denotes an +* element of the vector defining H(i). +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), + $ ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I + COMPLEX EI +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CCOPY, CGEMV, CLACGV, CLARFG, CSCAL, + $ CTRMV +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.1 ) + $ RETURN +* + DO 10 I = 1, NB + IF( I.GT.1 ) THEN +* +* Update A(1:n,i) +* +* Compute i-th column of A - Y * V' +* + CALL CLACGV( I-1, A( K+I-1, 1 ), LDA ) + CALL CGEMV( 'No transpose', N, I-1, -ONE, Y, LDY, + $ A( K+I-1, 1 ), LDA, ONE, A( 1, I ), 1 ) + CALL CLACGV( I-1, A( K+I-1, 1 ), LDA ) +* +* Apply I - V * T' * V' to this column (call it b) from the +* left, using the last column of T as workspace +* +* Let V = ( V1 ) and b = ( b1 ) (first I-1 rows) +* ( V2 ) ( b2 ) +* +* where V1 is unit lower triangular +* +* w := V1' * b1 +* + CALL CCOPY( I-1, A( K+1, I ), 1, T( 1, NB ), 1 ) + CALL CTRMV( 'Lower', 'Conjugate transpose', 'Unit', I-1, + $ A( K+1, 1 ), LDA, T( 1, NB ), 1 ) +* +* w := w + V2'*b2 +* + CALL CGEMV( 'Conjugate transpose', N-K-I+1, I-1, ONE, + $ A( K+I, 1 ), LDA, A( K+I, I ), 1, ONE, + $ T( 1, NB ), 1 ) +* +* w := T'*w +* + CALL CTRMV( 'Upper', 'Conjugate transpose', 'Non-unit', I-1, + $ T, LDT, T( 1, NB ), 1 ) +* +* b2 := b2 - V2*w +* + CALL CGEMV( 'No transpose', N-K-I+1, I-1, -ONE, A( K+I, 1 ), + $ LDA, T( 1, NB ), 1, ONE, A( K+I, I ), 1 ) +* +* b1 := b1 - V1*w +* + CALL CTRMV( 'Lower', 'No transpose', 'Unit', I-1, + $ A( K+1, 1 ), LDA, T( 1, NB ), 1 ) + CALL CAXPY( I-1, -ONE, T( 1, NB ), 1, A( K+1, I ), 1 ) +* + A( K+I-1, I-1 ) = EI + END IF +* +* Generate the elementary reflector H(i) to annihilate +* A(k+i+1:n,i) +* + EI = A( K+I, I ) + CALL CLARFG( N-K-I+1, EI, A( MIN( K+I+1, N ), I ), 1, + $ TAU( I ) ) + A( K+I, I ) = ONE +* +* Compute Y(1:n,i) +* + CALL CGEMV( 'No transpose', N, N-K-I+1, ONE, A( 1, I+1 ), LDA, + $ A( K+I, I ), 1, ZERO, Y( 1, I ), 1 ) + CALL CGEMV( 'Conjugate transpose', N-K-I+1, I-1, ONE, + $ A( K+I, 1 ), LDA, A( K+I, I ), 1, ZERO, T( 1, I ), + $ 1 ) + CALL CGEMV( 'No transpose', N, I-1, -ONE, Y, LDY, T( 1, I ), 1, + $ ONE, Y( 1, I ), 1 ) + CALL CSCAL( N, TAU( I ), Y( 1, I ), 1 ) +* +* Compute T(1:i,i) +* + CALL CSCAL( I-1, -TAU( I ), T( 1, I ), 1 ) + CALL CTRMV( 'Upper', 'No transpose', 'Non-unit', I-1, T, LDT, + $ T( 1, I ), 1 ) + T( I, I ) = TAU( I ) +* + 10 CONTINUE + A( K+NB, NB ) = EI +* + RETURN +* +* End of CLAHRD +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/claic1.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/claic1.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,295 @@ + SUBROUTINE CLAIC1( JOB, J, X, SEST, W, GAMMA, SESTPR, S, C ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER J, JOB + REAL SEST, SESTPR + COMPLEX C, GAMMA, S +* .. +* .. Array Arguments .. + COMPLEX W( J ), X( J ) +* .. +* +* Purpose +* ======= +* +* CLAIC1 applies one step of incremental condition estimation in +* its simplest version: +* +* Let x, twonorm(x) = 1, be an approximate singular vector of an j-by-j +* lower triangular matrix L, such that +* twonorm(L*x) = sest +* Then CLAIC1 computes sestpr, s, c such that +* the vector +* [ s*x ] +* xhat = [ c ] +* is an approximate singular vector of +* [ L 0 ] +* Lhat = [ w' gamma ] +* in the sense that +* twonorm(Lhat*xhat) = sestpr. +* +* Depending on JOB, an estimate for the largest or smallest singular +* value is computed. +* +* Note that [s c]' and sestpr**2 is an eigenpair of the system +* +* diag(sest*sest, 0) + [alpha gamma] * [ conjg(alpha) ] +* [ conjg(gamma) ] +* +* where alpha = conjg(x)'*w. +* +* Arguments +* ========= +* +* JOB (input) INTEGER +* = 1: an estimate for the largest singular value is computed. +* = 2: an estimate for the smallest singular value is computed. +* +* J (input) INTEGER +* Length of X and W +* +* X (input) COMPLEX array, dimension (J) +* The j-vector x. +* +* SEST (input) REAL +* Estimated singular value of j by j matrix L +* +* W (input) COMPLEX array, dimension (J) +* The j-vector w. +* +* GAMMA (input) COMPLEX +* The diagonal element gamma. +* +* SESTPR (output) REAL +* Estimated singular value of (j+1) by (j+1) matrix Lhat. +* +* S (output) COMPLEX +* Sine needed in forming xhat. +* +* C (output) COMPLEX +* Cosine needed in forming xhat. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0 ) + REAL HALF, FOUR + PARAMETER ( HALF = 0.5E0, FOUR = 4.0E0 ) +* .. +* .. Local Scalars .. + REAL ABSALP, ABSEST, ABSGAM, B, EPS, NORMA, S1, S2, + $ SCL, T, TEST, TMP, ZETA1, ZETA2 + COMPLEX ALPHA, COSINE, SINE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, CONJG, MAX, SQRT +* .. +* .. External Functions .. + REAL SLAMCH + COMPLEX CDOTC + EXTERNAL SLAMCH, CDOTC +* .. +* .. Executable Statements .. +* + EPS = SLAMCH( 'Epsilon' ) + ALPHA = CDOTC( J, X, 1, W, 1 ) +* + ABSALP = ABS( ALPHA ) + ABSGAM = ABS( GAMMA ) + ABSEST = ABS( SEST ) +* + IF( JOB.EQ.1 ) THEN +* +* Estimating largest singular value +* +* special cases +* + IF( SEST.EQ.ZERO ) THEN + S1 = MAX( ABSGAM, ABSALP ) + IF( S1.EQ.ZERO ) THEN + S = ZERO + C = ONE + SESTPR = ZERO + ELSE + S = ALPHA / S1 + C = GAMMA / S1 + TMP = SQRT( S*CONJG( S )+C*CONJG( C ) ) + S = S / TMP + C = C / TMP + SESTPR = S1*TMP + END IF + RETURN + ELSE IF( ABSGAM.LE.EPS*ABSEST ) THEN + S = ONE + C = ZERO + TMP = MAX( ABSEST, ABSALP ) + S1 = ABSEST / TMP + S2 = ABSALP / TMP + SESTPR = TMP*SQRT( S1*S1+S2*S2 ) + RETURN + ELSE IF( ABSALP.LE.EPS*ABSEST ) THEN + S1 = ABSGAM + S2 = ABSEST + IF( S1.LE.S2 ) THEN + S = ONE + C = ZERO + SESTPR = S2 + ELSE + S = ZERO + C = ONE + SESTPR = S1 + END IF + RETURN + ELSE IF( ABSEST.LE.EPS*ABSALP .OR. ABSEST.LE.EPS*ABSGAM ) THEN + S1 = ABSGAM + S2 = ABSALP + IF( S1.LE.S2 ) THEN + TMP = S1 / S2 + SCL = SQRT( ONE+TMP*TMP ) + SESTPR = S2*SCL + S = ( ALPHA / S2 ) / SCL + C = ( GAMMA / S2 ) / SCL + ELSE + TMP = S2 / S1 + SCL = SQRT( ONE+TMP*TMP ) + SESTPR = S1*SCL + S = ( ALPHA / S1 ) / SCL + C = ( GAMMA / S1 ) / SCL + END IF + RETURN + ELSE +* +* normal case +* + ZETA1 = ABSALP / ABSEST + ZETA2 = ABSGAM / ABSEST +* + B = ( ONE-ZETA1*ZETA1-ZETA2*ZETA2 )*HALF + C = ZETA1*ZETA1 + IF( B.GT.ZERO ) THEN + T = C / ( B+SQRT( B*B+C ) ) + ELSE + T = SQRT( B*B+C ) - B + END IF +* + SINE = -( ALPHA / ABSEST ) / T + COSINE = -( GAMMA / ABSEST ) / ( ONE+T ) + TMP = SQRT( SINE*CONJG( SINE )+COSINE*CONJG( COSINE ) ) + S = SINE / TMP + C = COSINE / TMP + SESTPR = SQRT( T+ONE )*ABSEST + RETURN + END IF +* + ELSE IF( JOB.EQ.2 ) THEN +* +* Estimating smallest singular value +* +* special cases +* + IF( SEST.EQ.ZERO ) THEN + SESTPR = ZERO + IF( MAX( ABSGAM, ABSALP ).EQ.ZERO ) THEN + SINE = ONE + COSINE = ZERO + ELSE + SINE = -CONJG( GAMMA ) + COSINE = CONJG( ALPHA ) + END IF + S1 = MAX( ABS( SINE ), ABS( COSINE ) ) + S = SINE / S1 + C = COSINE / S1 + TMP = SQRT( S*CONJG( S )+C*CONJG( C ) ) + S = S / TMP + C = C / TMP + RETURN + ELSE IF( ABSGAM.LE.EPS*ABSEST ) THEN + S = ZERO + C = ONE + SESTPR = ABSGAM + RETURN + ELSE IF( ABSALP.LE.EPS*ABSEST ) THEN + S1 = ABSGAM + S2 = ABSEST + IF( S1.LE.S2 ) THEN + S = ZERO + C = ONE + SESTPR = S1 + ELSE + S = ONE + C = ZERO + SESTPR = S2 + END IF + RETURN + ELSE IF( ABSEST.LE.EPS*ABSALP .OR. ABSEST.LE.EPS*ABSGAM ) THEN + S1 = ABSGAM + S2 = ABSALP + IF( S1.LE.S2 ) THEN + TMP = S1 / S2 + SCL = SQRT( ONE+TMP*TMP ) + SESTPR = ABSEST*( TMP / SCL ) + S = -( CONJG( GAMMA ) / S2 ) / SCL + C = ( CONJG( ALPHA ) / S2 ) / SCL + ELSE + TMP = S2 / S1 + SCL = SQRT( ONE+TMP*TMP ) + SESTPR = ABSEST / SCL + S = -( CONJG( GAMMA ) / S1 ) / SCL + C = ( CONJG( ALPHA ) / S1 ) / SCL + END IF + RETURN + ELSE +* +* normal case +* + ZETA1 = ABSALP / ABSEST + ZETA2 = ABSGAM / ABSEST +* + NORMA = MAX( ONE+ZETA1*ZETA1+ZETA1*ZETA2, + $ ZETA1*ZETA2+ZETA2*ZETA2 ) +* +* See if root is closer to zero or to ONE +* + TEST = ONE + TWO*( ZETA1-ZETA2 )*( ZETA1+ZETA2 ) + IF( TEST.GE.ZERO ) THEN +* +* root is close to zero, compute directly +* + B = ( ZETA1*ZETA1+ZETA2*ZETA2+ONE )*HALF + C = ZETA2*ZETA2 + T = C / ( B+SQRT( ABS( B*B-C ) ) ) + SINE = ( ALPHA / ABSEST ) / ( ONE-T ) + COSINE = -( GAMMA / ABSEST ) / T + SESTPR = SQRT( T+FOUR*EPS*EPS*NORMA )*ABSEST + ELSE +* +* root is closer to ONE, shift by that amount +* + B = ( ZETA2*ZETA2+ZETA1*ZETA1-ONE )*HALF + C = ZETA1*ZETA1 + IF( B.GE.ZERO ) THEN + T = -C / ( B+SQRT( B*B+C ) ) + ELSE + T = B - SQRT( B*B+C ) + END IF + SINE = -( ALPHA / ABSEST ) / T + COSINE = -( GAMMA / ABSEST ) / ( ONE+T ) + SESTPR = SQRT( ONE+T+FOUR*EPS*EPS*NORMA )*ABSEST + END IF + TMP = SQRT( SINE*CONJG( SINE )+COSINE*CONJG( COSINE ) ) + S = SINE / TMP + C = COSINE / TMP + RETURN +* + END IF + END IF + RETURN +* +* End of CLAIC1 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/clals0.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/clals0.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,433 @@ + SUBROUTINE CLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B, LDB, BX, LDBX, + $ PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, LDGNUM, + $ POLES, DIFL, DIFR, Z, K, C, S, RWORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER GIVPTR, ICOMPQ, INFO, K, LDB, LDBX, LDGCOL, + $ LDGNUM, NL, NR, NRHS, SQRE + REAL C, S +* .. +* .. Array Arguments .. + INTEGER GIVCOL( LDGCOL, * ), PERM( * ) + REAL DIFL( * ), DIFR( LDGNUM, * ), + $ GIVNUM( LDGNUM, * ), POLES( LDGNUM, * ), + $ RWORK( * ), Z( * ) + COMPLEX B( LDB, * ), BX( LDBX, * ) +* .. +* +* Purpose +* ======= +* +* CLALS0 applies back the multiplying factors of either the left or the +* right singular vector matrix of a diagonal matrix appended by a row +* to the right hand side matrix B in solving the least squares problem +* using the divide-and-conquer SVD approach. +* +* For the left singular vector matrix, three types of orthogonal +* matrices are involved: +* +* (1L) Givens rotations: the number of such rotations is GIVPTR; the +* pairs of columns/rows they were applied to are stored in GIVCOL; +* and the C- and S-values of these rotations are stored in GIVNUM. +* +* (2L) Permutation. The (NL+1)-st row of B is to be moved to the first +* row, and for J=2:N, PERM(J)-th row of B is to be moved to the +* J-th row. +* +* (3L) The left singular vector matrix of the remaining matrix. +* +* For the right singular vector matrix, four types of orthogonal +* matrices are involved: +* +* (1R) The right singular vector matrix of the remaining matrix. +* +* (2R) If SQRE = 1, one extra Givens rotation to generate the right +* null space. +* +* (3R) The inverse transformation of (2L). +* +* (4R) The inverse transformation of (1L). +* +* Arguments +* ========= +* +* ICOMPQ (input) INTEGER +* Specifies whether singular vectors are to be computed in +* factored form: +* = 0: Left singular vector matrix. +* = 1: Right singular vector matrix. +* +* NL (input) INTEGER +* The row dimension of the upper block. NL >= 1. +* +* NR (input) INTEGER +* The row dimension of the lower block. NR >= 1. +* +* SQRE (input) INTEGER +* = 0: the lower block is an NR-by-NR square matrix. +* = 1: the lower block is an NR-by-(NR+1) rectangular matrix. +* +* The bidiagonal matrix has row dimension N = NL + NR + 1, +* and column dimension M = N + SQRE. +* +* NRHS (input) INTEGER +* The number of columns of B and BX. NRHS must be at least 1. +* +* B (input/output) COMPLEX array, dimension ( LDB, NRHS ) +* On input, B contains the right hand sides of the least +* squares problem in rows 1 through M. On output, B contains +* the solution X in rows 1 through N. +* +* LDB (input) INTEGER +* The leading dimension of B. LDB must be at least +* max(1,MAX( M, N ) ). +* +* BX (workspace) COMPLEX array, dimension ( LDBX, NRHS ) +* +* LDBX (input) INTEGER +* The leading dimension of BX. +* +* PERM (input) INTEGER array, dimension ( N ) +* The permutations (from deflation and sorting) applied +* to the two blocks. +* +* GIVPTR (input) INTEGER +* The number of Givens rotations which took place in this +* subproblem. +* +* GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 ) +* Each pair of numbers indicates a pair of rows/columns +* involved in a Givens rotation. +* +* LDGCOL (input) INTEGER +* The leading dimension of GIVCOL, must be at least N. +* +* GIVNUM (input) REAL array, dimension ( LDGNUM, 2 ) +* Each number indicates the C or S value used in the +* corresponding Givens rotation. +* +* LDGNUM (input) INTEGER +* The leading dimension of arrays DIFR, POLES and +* GIVNUM, must be at least K. +* +* POLES (input) REAL array, dimension ( LDGNUM, 2 ) +* On entry, POLES(1:K, 1) contains the new singular +* values obtained from solving the secular equation, and +* POLES(1:K, 2) is an array containing the poles in the secular +* equation. +* +* DIFL (input) REAL array, dimension ( K ). +* On entry, DIFL(I) is the distance between I-th updated +* (undeflated) singular value and the I-th (undeflated) old +* singular value. +* +* DIFR (input) REAL array, dimension ( LDGNUM, 2 ). +* On entry, DIFR(I, 1) contains the distances between I-th +* updated (undeflated) singular value and the I+1-th +* (undeflated) old singular value. And DIFR(I, 2) is the +* normalizing factor for the I-th right singular vector. +* +* Z (input) REAL array, dimension ( K ) +* Contain the components of the deflation-adjusted updating row +* vector. +* +* K (input) INTEGER +* Contains the dimension of the non-deflated matrix, +* This is the order of the related secular equation. 1 <= K <=N. +* +* C (input) REAL +* C contains garbage if SQRE =0 and the C-value of a Givens +* rotation related to the right null space if SQRE = 1. +* +* S (input) REAL +* S contains garbage if SQRE =0 and the S-value of a Givens +* rotation related to the right null space if SQRE = 1. +* +* RWORK (workspace) REAL array, dimension +* ( K*(1+NRHS) + 2*NRHS ) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Ren-Cang Li, Computer Science Division, University of +* California at Berkeley, USA +* Osni Marques, LBNL/NERSC, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO, NEGONE + PARAMETER ( ONE = 1.0E0, ZERO = 0.0E0, NEGONE = -1.0E0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, JCOL, JROW, M, N, NLP1 + REAL DIFLJ, DIFRJ, DJ, DSIGJ, DSIGJP, TEMP +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CLACPY, CLASCL, CSROT, CSSCAL, SGEMV, + $ XERBLA +* .. +* .. External Functions .. + REAL SLAMC3, SNRM2 + EXTERNAL SLAMC3, SNRM2 +* .. +* .. Intrinsic Functions .. + INTRINSIC AIMAG, CMPLX, MAX, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN + INFO = -1 + ELSE IF( NL.LT.1 ) THEN + INFO = -2 + ELSE IF( NR.LT.1 ) THEN + INFO = -3 + ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN + INFO = -4 + END IF +* + N = NL + NR + 1 +* + IF( NRHS.LT.1 ) THEN + INFO = -5 + ELSE IF( LDB.LT.N ) THEN + INFO = -7 + ELSE IF( LDBX.LT.N ) THEN + INFO = -9 + ELSE IF( GIVPTR.LT.0 ) THEN + INFO = -11 + ELSE IF( LDGCOL.LT.N ) THEN + INFO = -13 + ELSE IF( LDGNUM.LT.N ) THEN + INFO = -15 + ELSE IF( K.LT.1 ) THEN + INFO = -20 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLALS0', -INFO ) + RETURN + END IF +* + M = N + SQRE + NLP1 = NL + 1 +* + IF( ICOMPQ.EQ.0 ) THEN +* +* Apply back orthogonal transformations from the left. +* +* Step (1L): apply back the Givens rotations performed. +* + DO 10 I = 1, GIVPTR + CALL CSROT( NRHS, B( GIVCOL( I, 2 ), 1 ), LDB, + $ B( GIVCOL( I, 1 ), 1 ), LDB, GIVNUM( I, 2 ), + $ GIVNUM( I, 1 ) ) + 10 CONTINUE +* +* Step (2L): permute rows of B. +* + CALL CCOPY( NRHS, B( NLP1, 1 ), LDB, BX( 1, 1 ), LDBX ) + DO 20 I = 2, N + CALL CCOPY( NRHS, B( PERM( I ), 1 ), LDB, BX( I, 1 ), LDBX ) + 20 CONTINUE +* +* Step (3L): apply the inverse of the left singular vector +* matrix to BX. +* + IF( K.EQ.1 ) THEN + CALL CCOPY( NRHS, BX, LDBX, B, LDB ) + IF( Z( 1 ).LT.ZERO ) THEN + CALL CSSCAL( NRHS, NEGONE, B, LDB ) + END IF + ELSE + DO 100 J = 1, K + DIFLJ = DIFL( J ) + DJ = POLES( J, 1 ) + DSIGJ = -POLES( J, 2 ) + IF( J.LT.K ) THEN + DIFRJ = -DIFR( J, 1 ) + DSIGJP = -POLES( J+1, 2 ) + END IF + IF( ( Z( J ).EQ.ZERO ) .OR. ( POLES( J, 2 ).EQ.ZERO ) ) + $ THEN + RWORK( J ) = ZERO + ELSE + RWORK( J ) = -POLES( J, 2 )*Z( J ) / DIFLJ / + $ ( POLES( J, 2 )+DJ ) + END IF + DO 30 I = 1, J - 1 + IF( ( Z( I ).EQ.ZERO ) .OR. + $ ( POLES( I, 2 ).EQ.ZERO ) ) THEN + RWORK( I ) = ZERO + ELSE + RWORK( I ) = POLES( I, 2 )*Z( I ) / + $ ( SLAMC3( POLES( I, 2 ), DSIGJ )- + $ DIFLJ ) / ( POLES( I, 2 )+DJ ) + END IF + 30 CONTINUE + DO 40 I = J + 1, K + IF( ( Z( I ).EQ.ZERO ) .OR. + $ ( POLES( I, 2 ).EQ.ZERO ) ) THEN + RWORK( I ) = ZERO + ELSE + RWORK( I ) = POLES( I, 2 )*Z( I ) / + $ ( SLAMC3( POLES( I, 2 ), DSIGJP )+ + $ DIFRJ ) / ( POLES( I, 2 )+DJ ) + END IF + 40 CONTINUE + RWORK( 1 ) = NEGONE + TEMP = SNRM2( K, RWORK, 1 ) +* +* Since B and BX are complex, the following call to SGEMV +* is performed in two steps (real and imaginary parts). +* +* CALL SGEMV( 'T', K, NRHS, ONE, BX, LDBX, WORK, 1, ZERO, +* $ B( J, 1 ), LDB ) +* + I = K + NRHS*2 + DO 60 JCOL = 1, NRHS + DO 50 JROW = 1, K + I = I + 1 + RWORK( I ) = REAL( BX( JROW, JCOL ) ) + 50 CONTINUE + 60 CONTINUE + CALL SGEMV( 'T', K, NRHS, ONE, RWORK( 1+K+NRHS*2 ), K, + $ RWORK( 1 ), 1, ZERO, RWORK( 1+K ), 1 ) + I = K + NRHS*2 + DO 80 JCOL = 1, NRHS + DO 70 JROW = 1, K + I = I + 1 + RWORK( I ) = AIMAG( BX( JROW, JCOL ) ) + 70 CONTINUE + 80 CONTINUE + CALL SGEMV( 'T', K, NRHS, ONE, RWORK( 1+K+NRHS*2 ), K, + $ RWORK( 1 ), 1, ZERO, RWORK( 1+K+NRHS ), 1 ) + DO 90 JCOL = 1, NRHS + B( J, JCOL ) = CMPLX( RWORK( JCOL+K ), + $ RWORK( JCOL+K+NRHS ) ) + 90 CONTINUE + CALL CLASCL( 'G', 0, 0, TEMP, ONE, 1, NRHS, B( J, 1 ), + $ LDB, INFO ) + 100 CONTINUE + END IF +* +* Move the deflated rows of BX to B also. +* + IF( K.LT.MAX( M, N ) ) + $ CALL CLACPY( 'A', N-K, NRHS, BX( K+1, 1 ), LDBX, + $ B( K+1, 1 ), LDB ) + ELSE +* +* Apply back the right orthogonal transformations. +* +* Step (1R): apply back the new right singular vector matrix +* to B. +* + IF( K.EQ.1 ) THEN + CALL CCOPY( NRHS, B, LDB, BX, LDBX ) + ELSE + DO 180 J = 1, K + DSIGJ = POLES( J, 2 ) + IF( Z( J ).EQ.ZERO ) THEN + RWORK( J ) = ZERO + ELSE + RWORK( J ) = -Z( J ) / DIFL( J ) / + $ ( DSIGJ+POLES( J, 1 ) ) / DIFR( J, 2 ) + END IF + DO 110 I = 1, J - 1 + IF( Z( J ).EQ.ZERO ) THEN + RWORK( I ) = ZERO + ELSE + RWORK( I ) = Z( J ) / ( SLAMC3( DSIGJ, -POLES( I+1, + $ 2 ) )-DIFR( I, 1 ) ) / + $ ( DSIGJ+POLES( I, 1 ) ) / DIFR( I, 2 ) + END IF + 110 CONTINUE + DO 120 I = J + 1, K + IF( Z( J ).EQ.ZERO ) THEN + RWORK( I ) = ZERO + ELSE + RWORK( I ) = Z( J ) / ( SLAMC3( DSIGJ, -POLES( I, + $ 2 ) )-DIFL( I ) ) / + $ ( DSIGJ+POLES( I, 1 ) ) / DIFR( I, 2 ) + END IF + 120 CONTINUE +* +* Since B and BX are complex, the following call to SGEMV +* is performed in two steps (real and imaginary parts). +* +* CALL SGEMV( 'T', K, NRHS, ONE, B, LDB, WORK, 1, ZERO, +* $ BX( J, 1 ), LDBX ) +* + I = K + NRHS*2 + DO 140 JCOL = 1, NRHS + DO 130 JROW = 1, K + I = I + 1 + RWORK( I ) = REAL( B( JROW, JCOL ) ) + 130 CONTINUE + 140 CONTINUE + CALL SGEMV( 'T', K, NRHS, ONE, RWORK( 1+K+NRHS*2 ), K, + $ RWORK( 1 ), 1, ZERO, RWORK( 1+K ), 1 ) + I = K + NRHS*2 + DO 160 JCOL = 1, NRHS + DO 150 JROW = 1, K + I = I + 1 + RWORK( I ) = AIMAG( B( JROW, JCOL ) ) + 150 CONTINUE + 160 CONTINUE + CALL SGEMV( 'T', K, NRHS, ONE, RWORK( 1+K+NRHS*2 ), K, + $ RWORK( 1 ), 1, ZERO, RWORK( 1+K+NRHS ), 1 ) + DO 170 JCOL = 1, NRHS + BX( J, JCOL ) = CMPLX( RWORK( JCOL+K ), + $ RWORK( JCOL+K+NRHS ) ) + 170 CONTINUE + 180 CONTINUE + END IF +* +* Step (2R): if SQRE = 1, apply back the rotation that is +* related to the right null space of the subproblem. +* + IF( SQRE.EQ.1 ) THEN + CALL CCOPY( NRHS, B( M, 1 ), LDB, BX( M, 1 ), LDBX ) + CALL CSROT( NRHS, BX( 1, 1 ), LDBX, BX( M, 1 ), LDBX, C, S ) + END IF + IF( K.LT.MAX( M, N ) ) + $ CALL CLACPY( 'A', N-K, NRHS, B( K+1, 1 ), LDB, + $ BX( K+1, 1 ), LDBX ) +* +* Step (3R): permute rows of B. +* + CALL CCOPY( NRHS, BX( 1, 1 ), LDBX, B( NLP1, 1 ), LDB ) + IF( SQRE.EQ.1 ) THEN + CALL CCOPY( NRHS, BX( M, 1 ), LDBX, B( M, 1 ), LDB ) + END IF + DO 190 I = 2, N + CALL CCOPY( NRHS, BX( I, 1 ), LDBX, B( PERM( I ), 1 ), LDB ) + 190 CONTINUE +* +* Step (4R): apply back the Givens rotations performed. +* + DO 200 I = GIVPTR, 1, -1 + CALL CSROT( NRHS, B( GIVCOL( I, 2 ), 1 ), LDB, + $ B( GIVCOL( I, 1 ), 1 ), LDB, GIVNUM( I, 2 ), + $ -GIVNUM( I, 1 ) ) + 200 CONTINUE + END IF +* + RETURN +* +* End of CLALS0 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/clalsa.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/clalsa.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,503 @@ + SUBROUTINE CLALSA( ICOMPQ, SMLSIZ, N, NRHS, B, LDB, BX, LDBX, U, + $ LDU, VT, K, DIFL, DIFR, Z, POLES, GIVPTR, + $ GIVCOL, LDGCOL, PERM, GIVNUM, C, S, RWORK, + $ IWORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER ICOMPQ, INFO, LDB, LDBX, LDGCOL, LDU, N, NRHS, + $ SMLSIZ +* .. +* .. Array Arguments .. + INTEGER GIVCOL( LDGCOL, * ), GIVPTR( * ), IWORK( * ), + $ K( * ), PERM( LDGCOL, * ) + REAL C( * ), DIFL( LDU, * ), DIFR( LDU, * ), + $ GIVNUM( LDU, * ), POLES( LDU, * ), RWORK( * ), + $ S( * ), U( LDU, * ), VT( LDU, * ), Z( LDU, * ) + COMPLEX B( LDB, * ), BX( LDBX, * ) +* .. +* +* Purpose +* ======= +* +* CLALSA is an itermediate step in solving the least squares problem +* by computing the SVD of the coefficient matrix in compact form (The +* singular vectors are computed as products of simple orthorgonal +* matrices.). +* +* If ICOMPQ = 0, CLALSA applies the inverse of the left singular vector +* matrix of an upper bidiagonal matrix to the right hand side; and if +* ICOMPQ = 1, CLALSA applies the right singular vector matrix to the +* right hand side. The singular vector matrices were generated in +* compact form by CLALSA. +* +* Arguments +* ========= +* +* ICOMPQ (input) INTEGER +* Specifies whether the left or the right singular vector +* matrix is involved. +* = 0: Left singular vector matrix +* = 1: Right singular vector matrix +* +* SMLSIZ (input) INTEGER +* The maximum size of the subproblems at the bottom of the +* computation tree. +* +* N (input) INTEGER +* The row and column dimensions of the upper bidiagonal matrix. +* +* NRHS (input) INTEGER +* The number of columns of B and BX. NRHS must be at least 1. +* +* B (input/output) COMPLEX array, dimension ( LDB, NRHS ) +* On input, B contains the right hand sides of the least +* squares problem in rows 1 through M. +* On output, B contains the solution X in rows 1 through N. +* +* LDB (input) INTEGER +* The leading dimension of B in the calling subprogram. +* LDB must be at least max(1,MAX( M, N ) ). +* +* BX (output) COMPLEX array, dimension ( LDBX, NRHS ) +* On exit, the result of applying the left or right singular +* vector matrix to B. +* +* LDBX (input) INTEGER +* The leading dimension of BX. +* +* U (input) REAL array, dimension ( LDU, SMLSIZ ). +* On entry, U contains the left singular vector matrices of all +* subproblems at the bottom level. +* +* LDU (input) INTEGER, LDU = > N. +* The leading dimension of arrays U, VT, DIFL, DIFR, +* POLES, GIVNUM, and Z. +* +* VT (input) REAL array, dimension ( LDU, SMLSIZ+1 ). +* On entry, VT' contains the right singular vector matrices of +* all subproblems at the bottom level. +* +* K (input) INTEGER array, dimension ( N ). +* +* DIFL (input) REAL array, dimension ( LDU, NLVL ). +* where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1. +* +* DIFR (input) REAL array, dimension ( LDU, 2 * NLVL ). +* On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record +* distances between singular values on the I-th level and +* singular values on the (I -1)-th level, and DIFR(*, 2 * I) +* record the normalizing factors of the right singular vectors +* matrices of subproblems on I-th level. +* +* Z (input) REAL array, dimension ( LDU, NLVL ). +* On entry, Z(1, I) contains the components of the deflation- +* adjusted updating row vector for subproblems on the I-th +* level. +* +* POLES (input) REAL array, dimension ( LDU, 2 * NLVL ). +* On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old +* singular values involved in the secular equations on the I-th +* level. +* +* GIVPTR (input) INTEGER array, dimension ( N ). +* On entry, GIVPTR( I ) records the number of Givens +* rotations performed on the I-th problem on the computation +* tree. +* +* GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 * NLVL ). +* On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the +* locations of Givens rotations performed on the I-th level on +* the computation tree. +* +* LDGCOL (input) INTEGER, LDGCOL = > N. +* The leading dimension of arrays GIVCOL and PERM. +* +* PERM (input) INTEGER array, dimension ( LDGCOL, NLVL ). +* On entry, PERM(*, I) records permutations done on the I-th +* level of the computation tree. +* +* GIVNUM (input) REAL array, dimension ( LDU, 2 * NLVL ). +* On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S- +* values of Givens rotations performed on the I-th level on the +* computation tree. +* +* C (input) REAL array, dimension ( N ). +* On entry, if the I-th subproblem is not square, +* C( I ) contains the C-value of a Givens rotation related to +* the right null space of the I-th subproblem. +* +* S (input) REAL array, dimension ( N ). +* On entry, if the I-th subproblem is not square, +* S( I ) contains the S-value of a Givens rotation related to +* the right null space of the I-th subproblem. +* +* RWORK (workspace) REAL array, dimension at least +* max ( N, (SMLSZ+1)*NRHS*3 ). +* +* IWORK (workspace) INTEGER array. +* The dimension must be at least 3 * N +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Ren-Cang Li, Computer Science Division, University of +* California at Berkeley, USA +* Osni Marques, LBNL/NERSC, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + INTEGER I, I1, IC, IM1, INODE, J, JCOL, JIMAG, JREAL, + $ JROW, LF, LL, LVL, LVL2, ND, NDB1, NDIML, + $ NDIMR, NL, NLF, NLP1, NLVL, NR, NRF, NRP1, SQRE +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CLALS0, SGEMM, SLASDT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC AIMAG, CMPLX, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN + INFO = -1 + ELSE IF( SMLSIZ.LT.3 ) THEN + INFO = -2 + ELSE IF( N.LT.SMLSIZ ) THEN + INFO = -3 + ELSE IF( NRHS.LT.1 ) THEN + INFO = -4 + ELSE IF( LDB.LT.N ) THEN + INFO = -6 + ELSE IF( LDBX.LT.N ) THEN + INFO = -8 + ELSE IF( LDU.LT.N ) THEN + INFO = -10 + ELSE IF( LDGCOL.LT.N ) THEN + INFO = -19 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLALSA', -INFO ) + RETURN + END IF +* +* Book-keeping and setting up the computation tree. +* + INODE = 1 + NDIML = INODE + N + NDIMR = NDIML + N +* + CALL SLASDT( N, NLVL, ND, IWORK( INODE ), IWORK( NDIML ), + $ IWORK( NDIMR ), SMLSIZ ) +* +* The following code applies back the left singular vector factors. +* For applying back the right singular vector factors, go to 170. +* + IF( ICOMPQ.EQ.1 ) THEN + GO TO 170 + END IF +* +* The nodes on the bottom level of the tree were solved +* by SLASDQ. The corresponding left and right singular vector +* matrices are in explicit form. First apply back the left +* singular vector matrices. +* + NDB1 = ( ND+1 ) / 2 + DO 130 I = NDB1, ND +* +* IC : center row of each node +* NL : number of rows of left subproblem +* NR : number of rows of right subproblem +* NLF: starting row of the left subproblem +* NRF: starting row of the right subproblem +* + I1 = I - 1 + IC = IWORK( INODE+I1 ) + NL = IWORK( NDIML+I1 ) + NR = IWORK( NDIMR+I1 ) + NLF = IC - NL + NRF = IC + 1 +* +* Since B and BX are complex, the following call to SGEMM +* is performed in two steps (real and imaginary parts). +* +* CALL SGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU, +* $ B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX ) +* + J = NL*NRHS*2 + DO 20 JCOL = 1, NRHS + DO 10 JROW = NLF, NLF + NL - 1 + J = J + 1 + RWORK( J ) = REAL( B( JROW, JCOL ) ) + 10 CONTINUE + 20 CONTINUE + CALL SGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU, + $ RWORK( 1+NL*NRHS*2 ), NL, ZERO, RWORK( 1 ), NL ) + J = NL*NRHS*2 + DO 40 JCOL = 1, NRHS + DO 30 JROW = NLF, NLF + NL - 1 + J = J + 1 + RWORK( J ) = AIMAG( B( JROW, JCOL ) ) + 30 CONTINUE + 40 CONTINUE + CALL SGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU, + $ RWORK( 1+NL*NRHS*2 ), NL, ZERO, RWORK( 1+NL*NRHS ), + $ NL ) + JREAL = 0 + JIMAG = NL*NRHS + DO 60 JCOL = 1, NRHS + DO 50 JROW = NLF, NLF + NL - 1 + JREAL = JREAL + 1 + JIMAG = JIMAG + 1 + BX( JROW, JCOL ) = CMPLX( RWORK( JREAL ), + $ RWORK( JIMAG ) ) + 50 CONTINUE + 60 CONTINUE +* +* Since B and BX are complex, the following call to SGEMM +* is performed in two steps (real and imaginary parts). +* +* CALL SGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU, +* $ B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX ) +* + J = NR*NRHS*2 + DO 80 JCOL = 1, NRHS + DO 70 JROW = NRF, NRF + NR - 1 + J = J + 1 + RWORK( J ) = REAL( B( JROW, JCOL ) ) + 70 CONTINUE + 80 CONTINUE + CALL SGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU, + $ RWORK( 1+NR*NRHS*2 ), NR, ZERO, RWORK( 1 ), NR ) + J = NR*NRHS*2 + DO 100 JCOL = 1, NRHS + DO 90 JROW = NRF, NRF + NR - 1 + J = J + 1 + RWORK( J ) = AIMAG( B( JROW, JCOL ) ) + 90 CONTINUE + 100 CONTINUE + CALL SGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU, + $ RWORK( 1+NR*NRHS*2 ), NR, ZERO, RWORK( 1+NR*NRHS ), + $ NR ) + JREAL = 0 + JIMAG = NR*NRHS + DO 120 JCOL = 1, NRHS + DO 110 JROW = NRF, NRF + NR - 1 + JREAL = JREAL + 1 + JIMAG = JIMAG + 1 + BX( JROW, JCOL ) = CMPLX( RWORK( JREAL ), + $ RWORK( JIMAG ) ) + 110 CONTINUE + 120 CONTINUE +* + 130 CONTINUE +* +* Next copy the rows of B that correspond to unchanged rows +* in the bidiagonal matrix to BX. +* + DO 140 I = 1, ND + IC = IWORK( INODE+I-1 ) + CALL CCOPY( NRHS, B( IC, 1 ), LDB, BX( IC, 1 ), LDBX ) + 140 CONTINUE +* +* Finally go through the left singular vector matrices of all +* the other subproblems bottom-up on the tree. +* + J = 2**NLVL + SQRE = 0 +* + DO 160 LVL = NLVL, 1, -1 + LVL2 = 2*LVL - 1 +* +* find the first node LF and last node LL on +* the current level LVL +* + IF( LVL.EQ.1 ) THEN + LF = 1 + LL = 1 + ELSE + LF = 2**( LVL-1 ) + LL = 2*LF - 1 + END IF + DO 150 I = LF, LL + IM1 = I - 1 + IC = IWORK( INODE+IM1 ) + NL = IWORK( NDIML+IM1 ) + NR = IWORK( NDIMR+IM1 ) + NLF = IC - NL + NRF = IC + 1 + J = J - 1 + CALL CLALS0( ICOMPQ, NL, NR, SQRE, NRHS, BX( NLF, 1 ), LDBX, + $ B( NLF, 1 ), LDB, PERM( NLF, LVL ), + $ GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL, + $ GIVNUM( NLF, LVL2 ), LDU, POLES( NLF, LVL2 ), + $ DIFL( NLF, LVL ), DIFR( NLF, LVL2 ), + $ Z( NLF, LVL ), K( J ), C( J ), S( J ), RWORK, + $ INFO ) + 150 CONTINUE + 160 CONTINUE + GO TO 330 +* +* ICOMPQ = 1: applying back the right singular vector factors. +* + 170 CONTINUE +* +* First now go through the right singular vector matrices of all +* the tree nodes top-down. +* + J = 0 + DO 190 LVL = 1, NLVL + LVL2 = 2*LVL - 1 +* +* Find the first node LF and last node LL on +* the current level LVL. +* + IF( LVL.EQ.1 ) THEN + LF = 1 + LL = 1 + ELSE + LF = 2**( LVL-1 ) + LL = 2*LF - 1 + END IF + DO 180 I = LL, LF, -1 + IM1 = I - 1 + IC = IWORK( INODE+IM1 ) + NL = IWORK( NDIML+IM1 ) + NR = IWORK( NDIMR+IM1 ) + NLF = IC - NL + NRF = IC + 1 + IF( I.EQ.LL ) THEN + SQRE = 0 + ELSE + SQRE = 1 + END IF + J = J + 1 + CALL CLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B( NLF, 1 ), LDB, + $ BX( NLF, 1 ), LDBX, PERM( NLF, LVL ), + $ GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL, + $ GIVNUM( NLF, LVL2 ), LDU, POLES( NLF, LVL2 ), + $ DIFL( NLF, LVL ), DIFR( NLF, LVL2 ), + $ Z( NLF, LVL ), K( J ), C( J ), S( J ), RWORK, + $ INFO ) + 180 CONTINUE + 190 CONTINUE +* +* The nodes on the bottom level of the tree were solved +* by SLASDQ. The corresponding right singular vector +* matrices are in explicit form. Apply them back. +* + NDB1 = ( ND+1 ) / 2 + DO 320 I = NDB1, ND + I1 = I - 1 + IC = IWORK( INODE+I1 ) + NL = IWORK( NDIML+I1 ) + NR = IWORK( NDIMR+I1 ) + NLP1 = NL + 1 + IF( I.EQ.ND ) THEN + NRP1 = NR + ELSE + NRP1 = NR + 1 + END IF + NLF = IC - NL + NRF = IC + 1 +* +* Since B and BX are complex, the following call to SGEMM is +* performed in two steps (real and imaginary parts). +* +* CALL SGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU, +* $ B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX ) +* + J = NLP1*NRHS*2 + DO 210 JCOL = 1, NRHS + DO 200 JROW = NLF, NLF + NLP1 - 1 + J = J + 1 + RWORK( J ) = REAL( B( JROW, JCOL ) ) + 200 CONTINUE + 210 CONTINUE + CALL SGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU, + $ RWORK( 1+NLP1*NRHS*2 ), NLP1, ZERO, RWORK( 1 ), + $ NLP1 ) + J = NLP1*NRHS*2 + DO 230 JCOL = 1, NRHS + DO 220 JROW = NLF, NLF + NLP1 - 1 + J = J + 1 + RWORK( J ) = AIMAG( B( JROW, JCOL ) ) + 220 CONTINUE + 230 CONTINUE + CALL SGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU, + $ RWORK( 1+NLP1*NRHS*2 ), NLP1, ZERO, + $ RWORK( 1+NLP1*NRHS ), NLP1 ) + JREAL = 0 + JIMAG = NLP1*NRHS + DO 250 JCOL = 1, NRHS + DO 240 JROW = NLF, NLF + NLP1 - 1 + JREAL = JREAL + 1 + JIMAG = JIMAG + 1 + BX( JROW, JCOL ) = CMPLX( RWORK( JREAL ), + $ RWORK( JIMAG ) ) + 240 CONTINUE + 250 CONTINUE +* +* Since B and BX are complex, the following call to SGEMM is +* performed in two steps (real and imaginary parts). +* +* CALL SGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU, +* $ B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX ) +* + J = NRP1*NRHS*2 + DO 270 JCOL = 1, NRHS + DO 260 JROW = NRF, NRF + NRP1 - 1 + J = J + 1 + RWORK( J ) = REAL( B( JROW, JCOL ) ) + 260 CONTINUE + 270 CONTINUE + CALL SGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU, + $ RWORK( 1+NRP1*NRHS*2 ), NRP1, ZERO, RWORK( 1 ), + $ NRP1 ) + J = NRP1*NRHS*2 + DO 290 JCOL = 1, NRHS + DO 280 JROW = NRF, NRF + NRP1 - 1 + J = J + 1 + RWORK( J ) = AIMAG( B( JROW, JCOL ) ) + 280 CONTINUE + 290 CONTINUE + CALL SGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU, + $ RWORK( 1+NRP1*NRHS*2 ), NRP1, ZERO, + $ RWORK( 1+NRP1*NRHS ), NRP1 ) + JREAL = 0 + JIMAG = NRP1*NRHS + DO 310 JCOL = 1, NRHS + DO 300 JROW = NRF, NRF + NRP1 - 1 + JREAL = JREAL + 1 + JIMAG = JIMAG + 1 + BX( JROW, JCOL ) = CMPLX( RWORK( JREAL ), + $ RWORK( JIMAG ) ) + 300 CONTINUE + 310 CONTINUE +* + 320 CONTINUE +* + 330 CONTINUE +* + RETURN +* +* End of CLALSA +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/clalsd.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/clalsd.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,596 @@ + SUBROUTINE CLALSD( UPLO, SMLSIZ, N, NRHS, D, E, B, LDB, RCOND, + $ RANK, WORK, RWORK, IWORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS, RANK, SMLSIZ + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL D( * ), E( * ), RWORK( * ) + COMPLEX B( LDB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CLALSD uses the singular value decomposition of A to solve the least +* squares problem of finding X to minimize the Euclidean norm of each +* column of A*X-B, where A is N-by-N upper bidiagonal, and X and B +* are N-by-NRHS. The solution X overwrites B. +* +* The singular values of A smaller than RCOND times the largest +* singular value are treated as zero in solving the least squares +* problem; in this case a minimum norm solution is returned. +* The actual singular values are returned in D in ascending order. +* +* This code makes very mild assumptions about floating point +* arithmetic. It will work on machines with a guard digit in +* add/subtract, or on those binary machines without guard digits +* which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. +* It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': D and E define an upper bidiagonal matrix. +* = 'L': D and E define a lower bidiagonal matrix. +* +* SMLSIZ (input) INTEGER +* The maximum size of the subproblems at the bottom of the +* computation tree. +* +* N (input) INTEGER +* The dimension of the bidiagonal matrix. N >= 0. +* +* NRHS (input) INTEGER +* The number of columns of B. NRHS must be at least 1. +* +* D (input/output) REAL array, dimension (N) +* On entry D contains the main diagonal of the bidiagonal +* matrix. On exit, if INFO = 0, D contains its singular values. +* +* E (input/output) REAL array, dimension (N-1) +* Contains the super-diagonal entries of the bidiagonal matrix. +* On exit, E has been destroyed. +* +* B (input/output) COMPLEX array, dimension (LDB,NRHS) +* On input, B contains the right hand sides of the least +* squares problem. On output, B contains the solution X. +* +* LDB (input) INTEGER +* The leading dimension of B in the calling subprogram. +* LDB must be at least max(1,N). +* +* RCOND (input) REAL +* The singular values of A less than or equal to RCOND times +* the largest singular value are treated as zero in solving +* the least squares problem. If RCOND is negative, +* machine precision is used instead. +* For example, if diag(S)*X=B were the least squares problem, +* where diag(S) is a diagonal matrix of singular values, the +* solution would be X(i) = B(i) / S(i) if S(i) is greater than +* RCOND*max(S), and X(i) = 0 if S(i) is less than or equal to +* RCOND*max(S). +* +* RANK (output) INTEGER +* The number of singular values of A greater than RCOND times +* the largest singular value. +* +* WORK (workspace) COMPLEX array, dimension (N * NRHS). +* +* RWORK (workspace) REAL array, dimension at least +* (9*N + 2*N*SMLSIZ + 8*N*NLVL + 3*SMLSIZ*NRHS + (SMLSIZ+1)**2), +* where +* NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 ) +* +* IWORK (workspace) INTEGER array, dimension (3*N*NLVL + 11*N). +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: The algorithm failed to compute an singular value while +* working on the submatrix lying in rows and columns +* INFO/(N+1) through MOD(INFO,N+1). +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Ren-Cang Li, Computer Science Division, University of +* California at Berkeley, USA +* Osni Marques, LBNL/NERSC, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0 ) + COMPLEX CZERO + PARAMETER ( CZERO = ( 0.0E0, 0.0E0 ) ) +* .. +* .. Local Scalars .. + INTEGER BX, BXST, C, DIFL, DIFR, GIVCOL, GIVNUM, + $ GIVPTR, I, ICMPQ1, ICMPQ2, IRWB, IRWIB, IRWRB, + $ IRWU, IRWVT, IRWWRK, IWK, J, JCOL, JIMAG, + $ JREAL, JROW, K, NLVL, NM1, NRWORK, NSIZE, NSUB, + $ PERM, POLES, S, SIZEI, SMLSZP, SQRE, ST, ST1, + $ U, VT, Z + REAL CS, EPS, ORGNRM, R, RCND, SN, TOL +* .. +* .. External Functions .. + INTEGER ISAMAX + REAL SLAMCH, SLANST + EXTERNAL ISAMAX, SLAMCH, SLANST +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CLACPY, CLALSA, CLASCL, CLASET, CSROT, + $ SGEMM, SLARTG, SLASCL, SLASDA, SLASDQ, SLASET, + $ SLASRT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, INT, LOG, REAL, SIGN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.1 ) THEN + INFO = -4 + ELSE IF( ( LDB.LT.1 ) .OR. ( LDB.LT.N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLALSD', -INFO ) + RETURN + END IF +* + EPS = SLAMCH( 'Epsilon' ) +* +* Set up the tolerance. +* + IF( ( RCOND.LE.ZERO ) .OR. ( RCOND.GE.ONE ) ) THEN + RCND = EPS + ELSE + RCND = RCOND + END IF +* + RANK = 0 +* +* Quick return if possible. +* + IF( N.EQ.0 ) THEN + RETURN + ELSE IF( N.EQ.1 ) THEN + IF( D( 1 ).EQ.ZERO ) THEN + CALL CLASET( 'A', 1, NRHS, CZERO, CZERO, B, LDB ) + ELSE + RANK = 1 + CALL CLASCL( 'G', 0, 0, D( 1 ), ONE, 1, NRHS, B, LDB, INFO ) + D( 1 ) = ABS( D( 1 ) ) + END IF + RETURN + END IF +* +* Rotate the matrix if it is lower bidiagonal. +* + IF( UPLO.EQ.'L' ) THEN + DO 10 I = 1, N - 1 + CALL SLARTG( D( I ), E( I ), CS, SN, R ) + D( I ) = R + E( I ) = SN*D( I+1 ) + D( I+1 ) = CS*D( I+1 ) + IF( NRHS.EQ.1 ) THEN + CALL CSROT( 1, B( I, 1 ), 1, B( I+1, 1 ), 1, CS, SN ) + ELSE + RWORK( I*2-1 ) = CS + RWORK( I*2 ) = SN + END IF + 10 CONTINUE + IF( NRHS.GT.1 ) THEN + DO 30 I = 1, NRHS + DO 20 J = 1, N - 1 + CS = RWORK( J*2-1 ) + SN = RWORK( J*2 ) + CALL CSROT( 1, B( J, I ), 1, B( J+1, I ), 1, CS, SN ) + 20 CONTINUE + 30 CONTINUE + END IF + END IF +* +* Scale. +* + NM1 = N - 1 + ORGNRM = SLANST( 'M', N, D, E ) + IF( ORGNRM.EQ.ZERO ) THEN + CALL CLASET( 'A', N, NRHS, CZERO, CZERO, B, LDB ) + RETURN + END IF +* + CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, N, 1, D, N, INFO ) + CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, NM1, 1, E, NM1, INFO ) +* +* If N is smaller than the minimum divide size SMLSIZ, then solve +* the problem with another solver. +* + IF( N.LE.SMLSIZ ) THEN + IRWU = 1 + IRWVT = IRWU + N*N + IRWWRK = IRWVT + N*N + IRWRB = IRWWRK + IRWIB = IRWRB + N*NRHS + IRWB = IRWIB + N*NRHS + CALL SLASET( 'A', N, N, ZERO, ONE, RWORK( IRWU ), N ) + CALL SLASET( 'A', N, N, ZERO, ONE, RWORK( IRWVT ), N ) + CALL SLASDQ( 'U', 0, N, N, N, 0, D, E, RWORK( IRWVT ), N, + $ RWORK( IRWU ), N, RWORK( IRWWRK ), 1, + $ RWORK( IRWWRK ), INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF +* +* In the real version, B is passed to SLASDQ and multiplied +* internally by Q'. Here B is complex and that product is +* computed below in two steps (real and imaginary parts). +* + J = IRWB - 1 + DO 50 JCOL = 1, NRHS + DO 40 JROW = 1, N + J = J + 1 + RWORK( J ) = REAL( B( JROW, JCOL ) ) + 40 CONTINUE + 50 CONTINUE + CALL SGEMM( 'T', 'N', N, NRHS, N, ONE, RWORK( IRWU ), N, + $ RWORK( IRWB ), N, ZERO, RWORK( IRWRB ), N ) + J = IRWB - 1 + DO 70 JCOL = 1, NRHS + DO 60 JROW = 1, N + J = J + 1 + RWORK( J ) = AIMAG( B( JROW, JCOL ) ) + 60 CONTINUE + 70 CONTINUE + CALL SGEMM( 'T', 'N', N, NRHS, N, ONE, RWORK( IRWU ), N, + $ RWORK( IRWB ), N, ZERO, RWORK( IRWIB ), N ) + JREAL = IRWRB - 1 + JIMAG = IRWIB - 1 + DO 90 JCOL = 1, NRHS + DO 80 JROW = 1, N + JREAL = JREAL + 1 + JIMAG = JIMAG + 1 + B( JROW, JCOL ) = CMPLX( RWORK( JREAL ), RWORK( JIMAG ) ) + 80 CONTINUE + 90 CONTINUE +* + TOL = RCND*ABS( D( ISAMAX( N, D, 1 ) ) ) + DO 100 I = 1, N + IF( D( I ).LE.TOL ) THEN + CALL CLASET( 'A', 1, NRHS, CZERO, CZERO, B( I, 1 ), LDB ) + ELSE + CALL CLASCL( 'G', 0, 0, D( I ), ONE, 1, NRHS, B( I, 1 ), + $ LDB, INFO ) + RANK = RANK + 1 + END IF + 100 CONTINUE +* +* Since B is complex, the following call to SGEMM is performed +* in two steps (real and imaginary parts). That is for V * B +* (in the real version of the code V' is stored in WORK). +* +* CALL SGEMM( 'T', 'N', N, NRHS, N, ONE, WORK, N, B, LDB, ZERO, +* $ WORK( NWORK ), N ) +* + J = IRWB - 1 + DO 120 JCOL = 1, NRHS + DO 110 JROW = 1, N + J = J + 1 + RWORK( J ) = REAL( B( JROW, JCOL ) ) + 110 CONTINUE + 120 CONTINUE + CALL SGEMM( 'T', 'N', N, NRHS, N, ONE, RWORK( IRWVT ), N, + $ RWORK( IRWB ), N, ZERO, RWORK( IRWRB ), N ) + J = IRWB - 1 + DO 140 JCOL = 1, NRHS + DO 130 JROW = 1, N + J = J + 1 + RWORK( J ) = AIMAG( B( JROW, JCOL ) ) + 130 CONTINUE + 140 CONTINUE + CALL SGEMM( 'T', 'N', N, NRHS, N, ONE, RWORK( IRWVT ), N, + $ RWORK( IRWB ), N, ZERO, RWORK( IRWIB ), N ) + JREAL = IRWRB - 1 + JIMAG = IRWIB - 1 + DO 160 JCOL = 1, NRHS + DO 150 JROW = 1, N + JREAL = JREAL + 1 + JIMAG = JIMAG + 1 + B( JROW, JCOL ) = CMPLX( RWORK( JREAL ), RWORK( JIMAG ) ) + 150 CONTINUE + 160 CONTINUE +* +* Unscale. +* + CALL SLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO ) + CALL SLASRT( 'D', N, D, INFO ) + CALL CLASCL( 'G', 0, 0, ORGNRM, ONE, N, NRHS, B, LDB, INFO ) +* + RETURN + END IF +* +* Book-keeping and setting up some constants. +* + NLVL = INT( LOG( REAL( N ) / REAL( SMLSIZ+1 ) ) / LOG( TWO ) ) + 1 +* + SMLSZP = SMLSIZ + 1 +* + U = 1 + VT = 1 + SMLSIZ*N + DIFL = VT + SMLSZP*N + DIFR = DIFL + NLVL*N + Z = DIFR + NLVL*N*2 + C = Z + NLVL*N + S = C + N + POLES = S + N + GIVNUM = POLES + 2*NLVL*N + NRWORK = GIVNUM + 2*NLVL*N + BX = 1 +* + IRWRB = NRWORK + IRWIB = IRWRB + SMLSIZ*NRHS + IRWB = IRWIB + SMLSIZ*NRHS +* + SIZEI = 1 + N + K = SIZEI + N + GIVPTR = K + N + PERM = GIVPTR + N + GIVCOL = PERM + NLVL*N + IWK = GIVCOL + NLVL*N*2 +* + ST = 1 + SQRE = 0 + ICMPQ1 = 1 + ICMPQ2 = 0 + NSUB = 0 +* + DO 170 I = 1, N + IF( ABS( D( I ) ).LT.EPS ) THEN + D( I ) = SIGN( EPS, D( I ) ) + END IF + 170 CONTINUE +* + DO 240 I = 1, NM1 + IF( ( ABS( E( I ) ).LT.EPS ) .OR. ( I.EQ.NM1 ) ) THEN + NSUB = NSUB + 1 + IWORK( NSUB ) = ST +* +* Subproblem found. First determine its size and then +* apply divide and conquer on it. +* + IF( I.LT.NM1 ) THEN +* +* A subproblem with E(I) small for I < NM1. +* + NSIZE = I - ST + 1 + IWORK( SIZEI+NSUB-1 ) = NSIZE + ELSE IF( ABS( E( I ) ).GE.EPS ) THEN +* +* A subproblem with E(NM1) not too small but I = NM1. +* + NSIZE = N - ST + 1 + IWORK( SIZEI+NSUB-1 ) = NSIZE + ELSE +* +* A subproblem with E(NM1) small. This implies an +* 1-by-1 subproblem at D(N), which is not solved +* explicitly. +* + NSIZE = I - ST + 1 + IWORK( SIZEI+NSUB-1 ) = NSIZE + NSUB = NSUB + 1 + IWORK( NSUB ) = N + IWORK( SIZEI+NSUB-1 ) = 1 + CALL CCOPY( NRHS, B( N, 1 ), LDB, WORK( BX+NM1 ), N ) + END IF + ST1 = ST - 1 + IF( NSIZE.EQ.1 ) THEN +* +* This is a 1-by-1 subproblem and is not solved +* explicitly. +* + CALL CCOPY( NRHS, B( ST, 1 ), LDB, WORK( BX+ST1 ), N ) + ELSE IF( NSIZE.LE.SMLSIZ ) THEN +* +* This is a small subproblem and is solved by SLASDQ. +* + CALL SLASET( 'A', NSIZE, NSIZE, ZERO, ONE, + $ RWORK( VT+ST1 ), N ) + CALL SLASET( 'A', NSIZE, NSIZE, ZERO, ONE, + $ RWORK( U+ST1 ), N ) + CALL SLASDQ( 'U', 0, NSIZE, NSIZE, NSIZE, 0, D( ST ), + $ E( ST ), RWORK( VT+ST1 ), N, RWORK( U+ST1 ), + $ N, RWORK( NRWORK ), 1, RWORK( NRWORK ), + $ INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF +* +* In the real version, B is passed to SLASDQ and multiplied +* internally by Q'. Here B is complex and that product is +* computed below in two steps (real and imaginary parts). +* + J = IRWB - 1 + DO 190 JCOL = 1, NRHS + DO 180 JROW = ST, ST + NSIZE - 1 + J = J + 1 + RWORK( J ) = REAL( B( JROW, JCOL ) ) + 180 CONTINUE + 190 CONTINUE + CALL SGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE, + $ RWORK( U+ST1 ), N, RWORK( IRWB ), NSIZE, + $ ZERO, RWORK( IRWRB ), NSIZE ) + J = IRWB - 1 + DO 210 JCOL = 1, NRHS + DO 200 JROW = ST, ST + NSIZE - 1 + J = J + 1 + RWORK( J ) = AIMAG( B( JROW, JCOL ) ) + 200 CONTINUE + 210 CONTINUE + CALL SGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE, + $ RWORK( U+ST1 ), N, RWORK( IRWB ), NSIZE, + $ ZERO, RWORK( IRWIB ), NSIZE ) + JREAL = IRWRB - 1 + JIMAG = IRWIB - 1 + DO 230 JCOL = 1, NRHS + DO 220 JROW = ST, ST + NSIZE - 1 + JREAL = JREAL + 1 + JIMAG = JIMAG + 1 + B( JROW, JCOL ) = CMPLX( RWORK( JREAL ), + $ RWORK( JIMAG ) ) + 220 CONTINUE + 230 CONTINUE +* + CALL CLACPY( 'A', NSIZE, NRHS, B( ST, 1 ), LDB, + $ WORK( BX+ST1 ), N ) + ELSE +* +* A large problem. Solve it using divide and conquer. +* + CALL SLASDA( ICMPQ1, SMLSIZ, NSIZE, SQRE, D( ST ), + $ E( ST ), RWORK( U+ST1 ), N, RWORK( VT+ST1 ), + $ IWORK( K+ST1 ), RWORK( DIFL+ST1 ), + $ RWORK( DIFR+ST1 ), RWORK( Z+ST1 ), + $ RWORK( POLES+ST1 ), IWORK( GIVPTR+ST1 ), + $ IWORK( GIVCOL+ST1 ), N, IWORK( PERM+ST1 ), + $ RWORK( GIVNUM+ST1 ), RWORK( C+ST1 ), + $ RWORK( S+ST1 ), RWORK( NRWORK ), + $ IWORK( IWK ), INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + BXST = BX + ST1 + CALL CLALSA( ICMPQ2, SMLSIZ, NSIZE, NRHS, B( ST, 1 ), + $ LDB, WORK( BXST ), N, RWORK( U+ST1 ), N, + $ RWORK( VT+ST1 ), IWORK( K+ST1 ), + $ RWORK( DIFL+ST1 ), RWORK( DIFR+ST1 ), + $ RWORK( Z+ST1 ), RWORK( POLES+ST1 ), + $ IWORK( GIVPTR+ST1 ), IWORK( GIVCOL+ST1 ), N, + $ IWORK( PERM+ST1 ), RWORK( GIVNUM+ST1 ), + $ RWORK( C+ST1 ), RWORK( S+ST1 ), + $ RWORK( NRWORK ), IWORK( IWK ), INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + END IF + ST = I + 1 + END IF + 240 CONTINUE +* +* Apply the singular values and treat the tiny ones as zero. +* + TOL = RCND*ABS( D( ISAMAX( N, D, 1 ) ) ) +* + DO 250 I = 1, N +* +* Some of the elements in D can be negative because 1-by-1 +* subproblems were not solved explicitly. +* + IF( ABS( D( I ) ).LE.TOL ) THEN + CALL CLASET( 'A', 1, NRHS, CZERO, CZERO, WORK( BX+I-1 ), N ) + ELSE + RANK = RANK + 1 + CALL CLASCL( 'G', 0, 0, D( I ), ONE, 1, NRHS, + $ WORK( BX+I-1 ), N, INFO ) + END IF + D( I ) = ABS( D( I ) ) + 250 CONTINUE +* +* Now apply back the right singular vectors. +* + ICMPQ2 = 1 + DO 320 I = 1, NSUB + ST = IWORK( I ) + ST1 = ST - 1 + NSIZE = IWORK( SIZEI+I-1 ) + BXST = BX + ST1 + IF( NSIZE.EQ.1 ) THEN + CALL CCOPY( NRHS, WORK( BXST ), N, B( ST, 1 ), LDB ) + ELSE IF( NSIZE.LE.SMLSIZ ) THEN +* +* Since B and BX are complex, the following call to SGEMM +* is performed in two steps (real and imaginary parts). +* +* CALL SGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE, +* $ RWORK( VT+ST1 ), N, RWORK( BXST ), N, ZERO, +* $ B( ST, 1 ), LDB ) +* + J = BXST - N - 1 + JREAL = IRWB - 1 + DO 270 JCOL = 1, NRHS + J = J + N + DO 260 JROW = 1, NSIZE + JREAL = JREAL + 1 + RWORK( JREAL ) = REAL( WORK( J+JROW ) ) + 260 CONTINUE + 270 CONTINUE + CALL SGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE, + $ RWORK( VT+ST1 ), N, RWORK( IRWB ), NSIZE, ZERO, + $ RWORK( IRWRB ), NSIZE ) + J = BXST - N - 1 + JIMAG = IRWB - 1 + DO 290 JCOL = 1, NRHS + J = J + N + DO 280 JROW = 1, NSIZE + JIMAG = JIMAG + 1 + RWORK( JIMAG ) = AIMAG( WORK( J+JROW ) ) + 280 CONTINUE + 290 CONTINUE + CALL SGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE, + $ RWORK( VT+ST1 ), N, RWORK( IRWB ), NSIZE, ZERO, + $ RWORK( IRWIB ), NSIZE ) + JREAL = IRWRB - 1 + JIMAG = IRWIB - 1 + DO 310 JCOL = 1, NRHS + DO 300 JROW = ST, ST + NSIZE - 1 + JREAL = JREAL + 1 + JIMAG = JIMAG + 1 + B( JROW, JCOL ) = CMPLX( RWORK( JREAL ), + $ RWORK( JIMAG ) ) + 300 CONTINUE + 310 CONTINUE + ELSE + CALL CLALSA( ICMPQ2, SMLSIZ, NSIZE, NRHS, WORK( BXST ), N, + $ B( ST, 1 ), LDB, RWORK( U+ST1 ), N, + $ RWORK( VT+ST1 ), IWORK( K+ST1 ), + $ RWORK( DIFL+ST1 ), RWORK( DIFR+ST1 ), + $ RWORK( Z+ST1 ), RWORK( POLES+ST1 ), + $ IWORK( GIVPTR+ST1 ), IWORK( GIVCOL+ST1 ), N, + $ IWORK( PERM+ST1 ), RWORK( GIVNUM+ST1 ), + $ RWORK( C+ST1 ), RWORK( S+ST1 ), + $ RWORK( NRWORK ), IWORK( IWK ), INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + END IF + 320 CONTINUE +* +* Unscale and sort the singular values. +* + CALL SLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO ) + CALL SLASRT( 'D', N, D, INFO ) + CALL CLASCL( 'G', 0, 0, ORGNRM, ONE, N, NRHS, B, LDB, INFO ) +* + RETURN +* +* End of CLALSD +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/clange.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/clange.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,145 @@ + REAL FUNCTION CLANGE( NORM, M, N, A, LDA, WORK ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER LDA, M, N +* .. +* .. Array Arguments .. + REAL WORK( * ) + COMPLEX A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* CLANGE returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of a +* complex matrix A. +* +* Description +* =========== +* +* CLANGE returns the value +* +* CLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in CLANGE as described +* above. +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. When M = 0, +* CLANGE is set to zero. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. When N = 0, +* CLANGE is set to zero. +* +* A (input) COMPLEX array, dimension (LDA,N) +* The m by n matrix A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(M,1). +* +* WORK (workspace) REAL array, dimension (MAX(1,LWORK)), +* where LWORK >= M when NORM = 'I'; otherwise, WORK is not +* referenced. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL SCALE, SUM, VALUE +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( MIN( M, N ).EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + DO 20 J = 1, N + DO 10 I = 1, M + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 10 CONTINUE + 20 CONTINUE + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + DO 40 J = 1, N + SUM = ZERO + DO 30 I = 1, M + SUM = SUM + ABS( A( I, J ) ) + 30 CONTINUE + VALUE = MAX( VALUE, SUM ) + 40 CONTINUE + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + DO 50 I = 1, M + WORK( I ) = ZERO + 50 CONTINUE + DO 70 J = 1, N + DO 60 I = 1, M + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 60 CONTINUE + 70 CONTINUE + VALUE = ZERO + DO 80 I = 1, M + VALUE = MAX( VALUE, WORK( I ) ) + 80 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + DO 90 J = 1, N + CALL CLASSQ( M, A( 1, J ), 1, SCALE, SUM ) + 90 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* + CLANGE = VALUE + RETURN +* +* End of CLANGE +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/clanhe.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/clanhe.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,187 @@ + REAL FUNCTION CLANHE( NORM, UPLO, N, A, LDA, WORK ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER NORM, UPLO + INTEGER LDA, N +* .. +* .. Array Arguments .. + REAL WORK( * ) + COMPLEX A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* CLANHE returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of a +* complex hermitian matrix A. +* +* Description +* =========== +* +* CLANHE returns the value +* +* CLANHE = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in CLANHE as described +* above. +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* hermitian matrix A is to be referenced. +* = 'U': Upper triangular part of A is referenced +* = 'L': Lower triangular part of A is referenced +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. When N = 0, CLANHE is +* set to zero. +* +* A (input) COMPLEX array, dimension (LDA,N) +* The hermitian matrix A. If UPLO = 'U', the leading n by n +* upper triangular part of A contains the upper triangular part +* of the matrix A, and the strictly lower triangular part of A +* is not referenced. If UPLO = 'L', the leading n by n lower +* triangular part of A contains the lower triangular part of +* the matrix A, and the strictly upper triangular part of A is +* not referenced. Note that the imaginary parts of the diagonal +* elements need not be set and are assumed to be zero. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(N,1). +* +* WORK (workspace) REAL array, dimension (MAX(1,LWORK)), +* where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, +* WORK is not referenced. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL ABSA, SCALE, SUM, VALUE +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, REAL, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = 1, J - 1 + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 10 CONTINUE + VALUE = MAX( VALUE, ABS( REAL( A( J, J ) ) ) ) + 20 CONTINUE + ELSE + DO 40 J = 1, N + VALUE = MAX( VALUE, ABS( REAL( A( J, J ) ) ) ) + DO 30 I = J + 1, N + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. + $ ( NORM.EQ.'1' ) ) THEN +* +* Find normI(A) ( = norm1(A), since A is hermitian). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + SUM = ZERO + DO 50 I = 1, J - 1 + ABSA = ABS( A( I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 50 CONTINUE + WORK( J ) = SUM + ABS( REAL( A( J, J ) ) ) + 60 CONTINUE + DO 70 I = 1, N + VALUE = MAX( VALUE, WORK( I ) ) + 70 CONTINUE + ELSE + DO 80 I = 1, N + WORK( I ) = ZERO + 80 CONTINUE + DO 100 J = 1, N + SUM = WORK( J ) + ABS( REAL( A( J, J ) ) ) + DO 90 I = J + 1, N + ABSA = ABS( A( I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 90 CONTINUE + VALUE = MAX( VALUE, SUM ) + 100 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 2, N + CALL CLASSQ( J-1, A( 1, J ), 1, SCALE, SUM ) + 110 CONTINUE + ELSE + DO 120 J = 1, N - 1 + CALL CLASSQ( N-J, A( J+1, J ), 1, SCALE, SUM ) + 120 CONTINUE + END IF + SUM = 2*SUM + DO 130 I = 1, N + IF( REAL( A( I, I ) ).NE.ZERO ) THEN + ABSA = ABS( REAL( A( I, I ) ) ) + IF( SCALE.LT.ABSA ) THEN + SUM = ONE + SUM*( SCALE / ABSA )**2 + SCALE = ABSA + ELSE + SUM = SUM + ( ABSA / SCALE )**2 + END IF + END IF + 130 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* + CLANHE = VALUE + RETURN +* +* End of CLANHE +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/clanhs.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/clanhs.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,142 @@ + REAL FUNCTION CLANHS( NORM, N, A, LDA, WORK ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER LDA, N +* .. +* .. Array Arguments .. + REAL WORK( * ) + COMPLEX A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* CLANHS returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of a +* Hessenberg matrix A. +* +* Description +* =========== +* +* CLANHS returns the value +* +* CLANHS = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in CLANHS as described +* above. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. When N = 0, CLANHS is +* set to zero. +* +* A (input) COMPLEX array, dimension (LDA,N) +* The n by n upper Hessenberg matrix A; the part of A below the +* first sub-diagonal is not referenced. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(N,1). +* +* WORK (workspace) REAL array, dimension (MAX(1,LWORK)), +* where LWORK >= N when NORM = 'I'; otherwise, WORK is not +* referenced. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL SCALE, SUM, VALUE +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + DO 20 J = 1, N + DO 10 I = 1, MIN( N, J+1 ) + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 10 CONTINUE + 20 CONTINUE + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + DO 40 J = 1, N + SUM = ZERO + DO 30 I = 1, MIN( N, J+1 ) + SUM = SUM + ABS( A( I, J ) ) + 30 CONTINUE + VALUE = MAX( VALUE, SUM ) + 40 CONTINUE + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + DO 50 I = 1, N + WORK( I ) = ZERO + 50 CONTINUE + DO 70 J = 1, N + DO 60 I = 1, MIN( N, J+1 ) + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 60 CONTINUE + 70 CONTINUE + VALUE = ZERO + DO 80 I = 1, N + VALUE = MAX( VALUE, WORK( I ) ) + 80 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + DO 90 J = 1, N + CALL CLASSQ( MIN( N, J+1 ), A( 1, J ), 1, SCALE, SUM ) + 90 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* + CLANHS = VALUE + RETURN +* +* End of CLANHS +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/clantr.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/clantr.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,277 @@ + REAL FUNCTION CLANTR( NORM, UPLO, DIAG, M, N, A, LDA, + $ WORK ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER LDA, M, N +* .. +* .. Array Arguments .. + REAL WORK( * ) + COMPLEX A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* CLANTR returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of a +* trapezoidal or triangular matrix A. +* +* Description +* =========== +* +* CLANTR returns the value +* +* CLANTR = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in CLANTR as described +* above. +* +* UPLO (input) CHARACTER*1 +* Specifies whether the matrix A is upper or lower trapezoidal. +* = 'U': Upper trapezoidal +* = 'L': Lower trapezoidal +* Note that A is triangular instead of trapezoidal if M = N. +* +* DIAG (input) CHARACTER*1 +* Specifies whether or not the matrix A has unit diagonal. +* = 'N': Non-unit diagonal +* = 'U': Unit diagonal +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0, and if +* UPLO = 'U', M <= N. When M = 0, CLANTR is set to zero. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0, and if +* UPLO = 'L', N <= M. When N = 0, CLANTR is set to zero. +* +* A (input) COMPLEX array, dimension (LDA,N) +* The trapezoidal matrix A (A is triangular if M = N). +* If UPLO = 'U', the leading m by n upper trapezoidal part of +* the array A contains the upper trapezoidal matrix, and the +* strictly lower triangular part of A is not referenced. +* If UPLO = 'L', the leading m by n lower trapezoidal part of +* the array A contains the lower trapezoidal matrix, and the +* strictly upper triangular part of A is not referenced. Note +* that when DIAG = 'U', the diagonal elements of A are not +* referenced and are assumed to be one. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(M,1). +* +* WORK (workspace) REAL array, dimension (MAX(1,LWORK)), +* where LWORK >= M when NORM = 'I'; otherwise, WORK is not +* referenced. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UDIAG + INTEGER I, J + REAL SCALE, SUM, VALUE +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( MIN( M, N ).EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + IF( LSAME( DIAG, 'U' ) ) THEN + VALUE = ONE + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = 1, MIN( M, J-1 ) + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1, N + DO 30 I = J + 1, M + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + DO 50 I = 1, MIN( M, J ) + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 50 CONTINUE + 60 CONTINUE + ELSE + DO 80 J = 1, N + DO 70 I = J, M + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 70 CONTINUE + 80 CONTINUE + END IF + END IF + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + UDIAG = LSAME( DIAG, 'U' ) + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 1, N + IF( ( UDIAG ) .AND. ( J.LE.M ) ) THEN + SUM = ONE + DO 90 I = 1, J - 1 + SUM = SUM + ABS( A( I, J ) ) + 90 CONTINUE + ELSE + SUM = ZERO + DO 100 I = 1, MIN( M, J ) + SUM = SUM + ABS( A( I, J ) ) + 100 CONTINUE + END IF + VALUE = MAX( VALUE, SUM ) + 110 CONTINUE + ELSE + DO 140 J = 1, N + IF( UDIAG ) THEN + SUM = ONE + DO 120 I = J + 1, M + SUM = SUM + ABS( A( I, J ) ) + 120 CONTINUE + ELSE + SUM = ZERO + DO 130 I = J, M + SUM = SUM + ABS( A( I, J ) ) + 130 CONTINUE + END IF + VALUE = MAX( VALUE, SUM ) + 140 CONTINUE + END IF + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + DO 150 I = 1, M + WORK( I ) = ONE + 150 CONTINUE + DO 170 J = 1, N + DO 160 I = 1, MIN( M, J-1 ) + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 160 CONTINUE + 170 CONTINUE + ELSE + DO 180 I = 1, M + WORK( I ) = ZERO + 180 CONTINUE + DO 200 J = 1, N + DO 190 I = 1, MIN( M, J ) + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 190 CONTINUE + 200 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + DO 210 I = 1, N + WORK( I ) = ONE + 210 CONTINUE + DO 220 I = N + 1, M + WORK( I ) = ZERO + 220 CONTINUE + DO 240 J = 1, N + DO 230 I = J + 1, M + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 230 CONTINUE + 240 CONTINUE + ELSE + DO 250 I = 1, M + WORK( I ) = ZERO + 250 CONTINUE + DO 270 J = 1, N + DO 260 I = J, M + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 260 CONTINUE + 270 CONTINUE + END IF + END IF + VALUE = ZERO + DO 280 I = 1, M + VALUE = MAX( VALUE, WORK( I ) ) + 280 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = MIN( M, N ) + DO 290 J = 2, N + CALL CLASSQ( MIN( M, J-1 ), A( 1, J ), 1, SCALE, SUM ) + 290 CONTINUE + ELSE + SCALE = ZERO + SUM = ONE + DO 300 J = 1, N + CALL CLASSQ( MIN( M, J ), A( 1, J ), 1, SCALE, SUM ) + 300 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = MIN( M, N ) + DO 310 J = 1, N + CALL CLASSQ( M-J, A( MIN( M, J+1 ), J ), 1, SCALE, + $ SUM ) + 310 CONTINUE + ELSE + SCALE = ZERO + SUM = ONE + DO 320 J = 1, N + CALL CLASSQ( M-J+1, A( J, J ), 1, SCALE, SUM ) + 320 CONTINUE + END IF + END IF + VALUE = SCALE*SQRT( SUM ) + END IF +* + CLANTR = VALUE + RETURN +* +* End of CLANTR +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/claqp2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/claqp2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,179 @@ + SUBROUTINE CLAQP2( M, N, OFFSET, A, LDA, JPVT, TAU, VN1, VN2, + $ WORK ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER LDA, M, N, OFFSET +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + REAL VN1( * ), VN2( * ) + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CLAQP2 computes a QR factorization with column pivoting of +* the block A(OFFSET+1:M,1:N). +* The block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* OFFSET (input) INTEGER +* The number of rows of the matrix A that must be pivoted +* but no factorized. OFFSET >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, the upper triangle of block A(OFFSET+1:M,1:N) is +* the triangular factor obtained; the elements in block +* A(OFFSET+1:M,1:N) below the diagonal, together with the +* array TAU, represent the orthogonal matrix Q as a product of +* elementary reflectors. Block A(1:OFFSET,1:N) has been +* accordingly pivoted, but no factorized. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* JPVT (input/output) INTEGER array, dimension (N) +* On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted +* to the front of A*P (a leading column); if JPVT(i) = 0, +* the i-th column of A is a free column. +* On exit, if JPVT(i) = k, then the i-th column of A*P +* was the k-th column of A. +* +* TAU (output) COMPLEX array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors. +* +* VN1 (input/output) REAL array, dimension (N) +* The vector with the partial column norms. +* +* VN2 (input/output) REAL array, dimension (N) +* The vector with the exact column norms. +* +* WORK (workspace) COMPLEX array, dimension (N) +* +* Further Details +* =============== +* +* Based on contributions by +* G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain +* X. Sun, Computer Science Dept., Duke University, USA +* +* Partial column norm updating strategy modified by +* Z. Drmac and Z. Bujanovic, Dept. of Mathematics, +* University of Zagreb, Croatia. +* June 2006. +* For more details see LAPACK Working Note 176. +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + COMPLEX CONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, + $ CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, ITEMP, J, MN, OFFPI, PVT + REAL TEMP, TEMP2, TOL3Z + COMPLEX AII +* .. +* .. External Subroutines .. + EXTERNAL CLARF, CLARFG, CSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, CONJG, MAX, MIN, SQRT +* .. +* .. External Functions .. + INTEGER ISAMAX + REAL SCNRM2, SLAMCH + EXTERNAL ISAMAX, SCNRM2, SLAMCH +* .. +* .. Executable Statements .. +* + MN = MIN( M-OFFSET, N ) + TOL3Z = SQRT(SLAMCH('Epsilon')) +* +* Compute factorization. +* + DO 20 I = 1, MN +* + OFFPI = OFFSET + I +* +* Determine ith pivot column and swap if necessary. +* + PVT = ( I-1 ) + ISAMAX( N-I+1, VN1( I ), 1 ) +* + IF( PVT.NE.I ) THEN + CALL CSWAP( M, A( 1, PVT ), 1, A( 1, I ), 1 ) + ITEMP = JPVT( PVT ) + JPVT( PVT ) = JPVT( I ) + JPVT( I ) = ITEMP + VN1( PVT ) = VN1( I ) + VN2( PVT ) = VN2( I ) + END IF +* +* Generate elementary reflector H(i). +* + IF( OFFPI.LT.M ) THEN + CALL CLARFG( M-OFFPI+1, A( OFFPI, I ), A( OFFPI+1, I ), 1, + $ TAU( I ) ) + ELSE + CALL CLARFG( 1, A( M, I ), A( M, I ), 1, TAU( I ) ) + END IF +* + IF( I.LT.N ) THEN +* +* Apply H(i)' to A(offset+i:m,i+1:n) from the left. +* + AII = A( OFFPI, I ) + A( OFFPI, I ) = CONE + CALL CLARF( 'Left', M-OFFPI+1, N-I, A( OFFPI, I ), 1, + $ CONJG( TAU( I ) ), A( OFFPI, I+1 ), LDA, + $ WORK( 1 ) ) + A( OFFPI, I ) = AII + END IF +* +* Update partial column norms. +* + DO 10 J = I + 1, N + IF( VN1( J ).NE.ZERO ) THEN +* +* NOTE: The following 4 lines follow from the analysis in +* Lapack Working Note 176. +* + TEMP = ONE - ( ABS( A( OFFPI, J ) ) / VN1( J ) )**2 + TEMP = MAX( TEMP, ZERO ) + TEMP2 = TEMP*( VN1( J ) / VN2( J ) )**2 + IF( TEMP2 .LE. TOL3Z ) THEN + IF( OFFPI.LT.M ) THEN + VN1( J ) = SCNRM2( M-OFFPI, A( OFFPI+1, J ), 1 ) + VN2( J ) = VN1( J ) + ELSE + VN1( J ) = ZERO + VN2( J ) = ZERO + END IF + ELSE + VN1( J ) = VN1( J )*SQRT( TEMP ) + END IF + END IF + 10 CONTINUE +* + 20 CONTINUE +* + RETURN +* +* End of CLAQP2 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/claqps.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/claqps.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,271 @@ + SUBROUTINE CLAQPS( M, N, OFFSET, NB, KB, A, LDA, JPVT, TAU, VN1, + $ VN2, AUXV, F, LDF ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER KB, LDA, LDF, M, N, NB, OFFSET +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + REAL VN1( * ), VN2( * ) + COMPLEX A( LDA, * ), AUXV( * ), F( LDF, * ), TAU( * ) +* .. +* +* Purpose +* ======= +* +* CLAQPS computes a step of QR factorization with column pivoting +* of a complex M-by-N matrix A by using Blas-3. It tries to factorize +* NB columns from A starting from the row OFFSET+1, and updates all +* of the matrix with Blas-3 xGEMM. +* +* In some cases, due to catastrophic cancellations, it cannot +* factorize NB columns. Hence, the actual number of factorized +* columns is returned in KB. +* +* Block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0 +* +* OFFSET (input) INTEGER +* The number of rows of A that have been factorized in +* previous steps. +* +* NB (input) INTEGER +* The number of columns to factorize. +* +* KB (output) INTEGER +* The number of columns actually factorized. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, block A(OFFSET+1:M,1:KB) is the triangular +* factor obtained and block A(1:OFFSET,1:N) has been +* accordingly pivoted, but no factorized. +* The rest of the matrix, block A(OFFSET+1:M,KB+1:N) has +* been updated. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* JPVT (input/output) INTEGER array, dimension (N) +* JPVT(I) = K <==> Column K of the full matrix A has been +* permuted into position I in AP. +* +* TAU (output) COMPLEX array, dimension (KB) +* The scalar factors of the elementary reflectors. +* +* VN1 (input/output) REAL array, dimension (N) +* The vector with the partial column norms. +* +* VN2 (input/output) REAL array, dimension (N) +* The vector with the exact column norms. +* +* AUXV (input/output) COMPLEX array, dimension (NB) +* Auxiliar vector. +* +* F (input/output) COMPLEX array, dimension (LDF,NB) +* Matrix F' = L*Y'*A. +* +* LDF (input) INTEGER +* The leading dimension of the array F. LDF >= max(1,N). +* +* Further Details +* =============== +* +* Based on contributions by +* G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain +* X. Sun, Computer Science Dept., Duke University, USA +* +* Partial column norm updating strategy modified by +* Z. Drmac and Z. Bujanovic, Dept. of Mathematics, +* University of Zagreb, Croatia. +* June 2006. +* For more details see LAPACK Working Note 176. +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + COMPLEX CZERO, CONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, + $ CZERO = ( 0.0E+0, 0.0E+0 ), + $ CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER ITEMP, J, K, LASTRK, LSTICC, PVT, RK + REAL TEMP, TEMP2, TOL3Z + COMPLEX AKK +* .. +* .. External Subroutines .. + EXTERNAL CGEMM, CGEMV, CLARFG, CSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, CONJG, MAX, MIN, NINT, REAL, SQRT +* .. +* .. External Functions .. + INTEGER ISAMAX + REAL SCNRM2, SLAMCH + EXTERNAL ISAMAX, SCNRM2, SLAMCH +* .. +* .. Executable Statements .. +* + LASTRK = MIN( M, N+OFFSET ) + LSTICC = 0 + K = 0 + TOL3Z = SQRT(SLAMCH('Epsilon')) +* +* Beginning of while loop. +* + 10 CONTINUE + IF( ( K.LT.NB ) .AND. ( LSTICC.EQ.0 ) ) THEN + K = K + 1 + RK = OFFSET + K +* +* Determine ith pivot column and swap if necessary +* + PVT = ( K-1 ) + ISAMAX( N-K+1, VN1( K ), 1 ) + IF( PVT.NE.K ) THEN + CALL CSWAP( M, A( 1, PVT ), 1, A( 1, K ), 1 ) + CALL CSWAP( K-1, F( PVT, 1 ), LDF, F( K, 1 ), LDF ) + ITEMP = JPVT( PVT ) + JPVT( PVT ) = JPVT( K ) + JPVT( K ) = ITEMP + VN1( PVT ) = VN1( K ) + VN2( PVT ) = VN2( K ) + END IF +* +* Apply previous Householder reflectors to column K: +* A(RK:M,K) := A(RK:M,K) - A(RK:M,1:K-1)*F(K,1:K-1)'. +* + IF( K.GT.1 ) THEN + DO 20 J = 1, K - 1 + F( K, J ) = CONJG( F( K, J ) ) + 20 CONTINUE + CALL CGEMV( 'No transpose', M-RK+1, K-1, -CONE, A( RK, 1 ), + $ LDA, F( K, 1 ), LDF, CONE, A( RK, K ), 1 ) + DO 30 J = 1, K - 1 + F( K, J ) = CONJG( F( K, J ) ) + 30 CONTINUE + END IF +* +* Generate elementary reflector H(k). +* + IF( RK.LT.M ) THEN + CALL CLARFG( M-RK+1, A( RK, K ), A( RK+1, K ), 1, TAU( K ) ) + ELSE + CALL CLARFG( 1, A( RK, K ), A( RK, K ), 1, TAU( K ) ) + END IF +* + AKK = A( RK, K ) + A( RK, K ) = CONE +* +* Compute Kth column of F: +* +* Compute F(K+1:N,K) := tau(K)*A(RK:M,K+1:N)'*A(RK:M,K). +* + IF( K.LT.N ) THEN + CALL CGEMV( 'Conjugate transpose', M-RK+1, N-K, TAU( K ), + $ A( RK, K+1 ), LDA, A( RK, K ), 1, CZERO, + $ F( K+1, K ), 1 ) + END IF +* +* Padding F(1:K,K) with zeros. +* + DO 40 J = 1, K + F( J, K ) = CZERO + 40 CONTINUE +* +* Incremental updating of F: +* F(1:N,K) := F(1:N,K) - tau(K)*F(1:N,1:K-1)*A(RK:M,1:K-1)' +* *A(RK:M,K). +* + IF( K.GT.1 ) THEN + CALL CGEMV( 'Conjugate transpose', M-RK+1, K-1, -TAU( K ), + $ A( RK, 1 ), LDA, A( RK, K ), 1, CZERO, + $ AUXV( 1 ), 1 ) +* + CALL CGEMV( 'No transpose', N, K-1, CONE, F( 1, 1 ), LDF, + $ AUXV( 1 ), 1, CONE, F( 1, K ), 1 ) + END IF +* +* Update the current row of A: +* A(RK,K+1:N) := A(RK,K+1:N) - A(RK,1:K)*F(K+1:N,1:K)'. +* + IF( K.LT.N ) THEN + CALL CGEMM( 'No transpose', 'Conjugate transpose', 1, N-K, + $ K, -CONE, A( RK, 1 ), LDA, F( K+1, 1 ), LDF, + $ CONE, A( RK, K+1 ), LDA ) + END IF +* +* Update partial column norms. +* + IF( RK.LT.LASTRK ) THEN + DO 50 J = K + 1, N + IF( VN1( J ).NE.ZERO ) THEN +* +* NOTE: The following 4 lines follow from the analysis in +* Lapack Working Note 176. +* + TEMP = ABS( A( RK, J ) ) / VN1( J ) + TEMP = MAX( ZERO, ( ONE+TEMP )*( ONE-TEMP ) ) + TEMP2 = TEMP*( VN1( J ) / VN2( J ) )**2 + IF( TEMP2 .LE. TOL3Z ) THEN + VN2( J ) = REAL( LSTICC ) + LSTICC = J + ELSE + VN1( J ) = VN1( J )*SQRT( TEMP ) + END IF + END IF + 50 CONTINUE + END IF +* + A( RK, K ) = AKK +* +* End of while loop. +* + GO TO 10 + END IF + KB = K + RK = OFFSET + KB +* +* Apply the block reflector to the rest of the matrix: +* A(OFFSET+KB+1:M,KB+1:N) := A(OFFSET+KB+1:M,KB+1:N) - +* A(OFFSET+KB+1:M,1:KB)*F(KB+1:N,1:KB)'. +* + IF( KB.LT.MIN( N, M-OFFSET ) ) THEN + CALL CGEMM( 'No transpose', 'Conjugate transpose', M-RK, N-KB, + $ KB, -CONE, A( RK+1, 1 ), LDA, F( KB+1, 1 ), LDF, + $ CONE, A( RK+1, KB+1 ), LDA ) + END IF +* +* Recomputation of difficult columns. +* + 60 CONTINUE + IF( LSTICC.GT.0 ) THEN + ITEMP = NINT( VN2( LSTICC ) ) + VN1( LSTICC ) = SCNRM2( M-RK, A( RK+1, LSTICC ), 1 ) +* +* NOTE: The computation of VN1( LSTICC ) relies on the fact that +* SNRM2 does not fail on vectors with norm below the value of +* SQRT(DLAMCH('S')) +* + VN2( LSTICC ) = VN1( LSTICC ) + LSTICC = ITEMP + GO TO 60 + END IF +* + RETURN +* +* End of CLAQPS +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/claqr0.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/claqr0.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,601 @@ + SUBROUTINE CLAQR0( WANTT, WANTZ, N, ILO, IHI, H, LDH, W, ILOZ, + $ IHIZ, Z, LDZ, WORK, LWORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER IHI, IHIZ, ILO, ILOZ, INFO, LDH, LDZ, LWORK, N + LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. + COMPLEX H( LDH, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* CLAQR0 computes the eigenvalues of a Hessenberg matrix H +* and, optionally, the matrices T and Z from the Schur decomposition +* H = Z T Z**H, where T is an upper triangular matrix (the +* Schur form), and Z is the unitary matrix of Schur vectors. +* +* Optionally Z may be postmultiplied into an input unitary +* matrix Q so that this routine can give the Schur factorization +* of a matrix A which has been reduced to the Hessenberg form H +* by the unitary matrix Q: A = Q*H*Q**H = (QZ)*H*(QZ)**H. +* +* Arguments +* ========= +* +* WANTT (input) LOGICAL +* = .TRUE. : the full Schur form T is required; +* = .FALSE.: only eigenvalues are required. +* +* WANTZ (input) LOGICAL +* = .TRUE. : the matrix of Schur vectors Z is required; +* = .FALSE.: Schur vectors are not required. +* +* N (input) INTEGER +* The order of the matrix H. N .GE. 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* It is assumed that H is already upper triangular in rows +* and columns 1:ILO-1 and IHI+1:N and, if ILO.GT.1, +* H(ILO,ILO-1) is zero. ILO and IHI are normally set by a +* previous call to CGEBAL, and then passed to CGEHRD when the +* matrix output by CGEBAL is reduced to Hessenberg form. +* Otherwise, ILO and IHI should be set to 1 and N, +* respectively. If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N. +* If N = 0, then ILO = 1 and IHI = 0. +* +* H (input/output) COMPLEX array, dimension (LDH,N) +* On entry, the upper Hessenberg matrix H. +* On exit, if INFO = 0 and WANTT is .TRUE., then H +* contains the upper triangular matrix T from the Schur +* decomposition (the Schur form). If INFO = 0 and WANT is +* .FALSE., then the contents of H are unspecified on exit. +* (The output value of H when INFO.GT.0 is given under the +* description of INFO below.) +* +* This subroutine may explicitly set H(i,j) = 0 for i.GT.j and +* j = 1, 2, ... ILO-1 or j = IHI+1, IHI+2, ... N. +* +* LDH (input) INTEGER +* The leading dimension of the array H. LDH .GE. max(1,N). +* +* W (output) COMPLEX array, dimension (N) +* The computed eigenvalues of H(ILO:IHI,ILO:IHI) are stored +* in W(ILO:IHI). If WANTT is .TRUE., then the eigenvalues are +* stored in the same order as on the diagonal of the Schur +* form returned in H, with W(i) = H(i,i). +* +* Z (input/output) COMPLEX array, dimension (LDZ,IHI) +* If WANTZ is .FALSE., then Z is not referenced. +* If WANTZ is .TRUE., then Z(ILO:IHI,ILOZ:IHIZ) is +* replaced by Z(ILO:IHI,ILOZ:IHIZ)*U where U is the +* orthogonal Schur factor of H(ILO:IHI,ILO:IHI). +* (The output value of Z when INFO.GT.0 is given under +* the description of INFO below.) +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. if WANTZ is .TRUE. +* then LDZ.GE.MAX(1,IHIZ). Otherwize, LDZ.GE.1. +* +* WORK (workspace/output) COMPLEX array, dimension LWORK +* On exit, if LWORK = -1, WORK(1) returns an estimate of +* the optimal value for LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK .GE. max(1,N) +* is sufficient, but LWORK typically as large as 6*N may +* be required for optimal performance. A workspace query +* to determine the optimal workspace size is recommended. +* +* If LWORK = -1, then CLAQR0 does a workspace query. +* In this case, CLAQR0 checks the input parameters and +* estimates the optimal workspace size for the given +* values of N, ILO and IHI. The estimate is returned +* in WORK(1). No error message related to LWORK is +* issued by XERBLA. Neither H nor Z are accessed. +* +* +* INFO (output) INTEGER +* = 0: successful exit +* .GT. 0: if INFO = i, CLAQR0 failed to compute all of +* the eigenvalues. Elements 1:ilo-1 and i+1:n of WR +* and WI contain those eigenvalues which have been +* successfully computed. (Failures are rare.) +* +* If INFO .GT. 0 and WANT is .FALSE., then on exit, +* the remaining unconverged eigenvalues are the eigen- +* values of the upper Hessenberg matrix rows and +* columns ILO through INFO of the final, output +* value of H. +* +* If INFO .GT. 0 and WANTT is .TRUE., then on exit +* +* (*) (initial value of H)*U = U*(final value of H) +* +* where U is a unitary matrix. The final +* value of H is upper Hessenberg and triangular in +* rows and columns INFO+1 through IHI. +* +* If INFO .GT. 0 and WANTZ is .TRUE., then on exit +* +* (final value of Z(ILO:IHI,ILOZ:IHIZ) +* = (initial value of Z(ILO:IHI,ILOZ:IHIZ)*U +* +* where U is the unitary matrix in (*) (regard- +* less of the value of WANTT.) +* +* If INFO .GT. 0 and WANTZ is .FALSE., then Z is not +* accessed. +* +* ================================================================ +* Based on contributions by +* Karen Braman and Ralph Byers, Department of Mathematics, +* University of Kansas, USA +* +* ================================================================ +* References: +* K. Braman, R. Byers and R. Mathias, The Multi-Shift QR +* Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 +* Performance, SIAM Journal of Matrix Analysis, volume 23, pages +* 929--947, 2002. +* +* K. Braman, R. Byers and R. Mathias, The Multi-Shift QR +* Algorithm Part II: Aggressive Early Deflation, SIAM Journal +* of Matrix Analysis, volume 23, pages 948--973, 2002. +* +* ================================================================ +* .. Parameters .. +* +* ==== Matrices of order NTINY or smaller must be processed by +* . CLAHQR because of insufficient subdiagonal scratch space. +* . (This is a hard limit.) ==== +* +* ==== Exceptional deflation windows: try to cure rare +* . slow convergence by increasing the size of the +* . deflation window after KEXNW iterations. ===== +* +* ==== Exceptional shifts: try to cure rare slow convergence +* . with ad-hoc exceptional shifts every KEXSH iterations. +* . The constants WILK1 and WILK2 are used to form the +* . exceptional shifts. ==== +* + INTEGER NTINY + PARAMETER ( NTINY = 11 ) + INTEGER KEXNW, KEXSH + PARAMETER ( KEXNW = 5, KEXSH = 6 ) + REAL WILK1 + PARAMETER ( WILK1 = 0.75e0 ) + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0e0, 0.0e0 ), + $ ONE = ( 1.0e0, 0.0e0 ) ) + REAL TWO + PARAMETER ( TWO = 2.0e0 ) +* .. +* .. Local Scalars .. + COMPLEX AA, BB, CC, CDUM, DD, DET, RTDISC, SWAP, TR2 + REAL S + INTEGER I, INF, IT, ITMAX, K, KACC22, KBOT, KDU, KS, + $ KT, KTOP, KU, KV, KWH, KWTOP, KWV, LD, LS, + $ LWKOPT, NDFL, NH, NHO, NIBBLE, NMIN, NS, NSMAX, + $ NSR, NVE, NW, NWMAX, NWR + LOGICAL NWINC, SORTED + CHARACTER JBCMPZ*2 +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Local Arrays .. + COMPLEX ZDUM( 1, 1 ) +* .. +* .. External Subroutines .. + EXTERNAL CLACPY, CLAHQR, CLAQR3, CLAQR4, CLAQR5 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, INT, MAX, MIN, MOD, REAL, + $ SQRT +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) ) +* .. +* .. Executable Statements .. + INFO = 0 +* +* ==== Quick return for N = 0: nothing to do. ==== +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = ONE + RETURN + END IF +* +* ==== Set up job flags for ILAENV. ==== +* + IF( WANTT ) THEN + JBCMPZ( 1: 1 ) = 'S' + ELSE + JBCMPZ( 1: 1 ) = 'E' + END IF + IF( WANTZ ) THEN + JBCMPZ( 2: 2 ) = 'V' + ELSE + JBCMPZ( 2: 2 ) = 'N' + END IF +* +* ==== Tiny matrices must use CLAHQR. ==== +* + IF( N.LE.NTINY ) THEN +* +* ==== Estimate optimal workspace. ==== +* + LWKOPT = 1 + IF( LWORK.NE.-1 ) + $ CALL CLAHQR( WANTT, WANTZ, N, ILO, IHI, H, LDH, W, ILOZ, + $ IHIZ, Z, LDZ, INFO ) + ELSE +* +* ==== Use small bulge multi-shift QR with aggressive early +* . deflation on larger-than-tiny matrices. ==== +* +* ==== Hope for the best. ==== +* + INFO = 0 +* +* ==== NWR = recommended deflation window size. At this +* . point, N .GT. NTINY = 11, so there is enough +* . subdiagonal workspace for NWR.GE.2 as required. +* . (In fact, there is enough subdiagonal space for +* . NWR.GE.3.) ==== +* + NWR = ILAENV( 13, 'CLAQR0', JBCMPZ, N, ILO, IHI, LWORK ) + NWR = MAX( 2, NWR ) + NWR = MIN( IHI-ILO+1, ( N-1 ) / 3, NWR ) + NW = NWR +* +* ==== NSR = recommended number of simultaneous shifts. +* . At this point N .GT. NTINY = 11, so there is at +* . enough subdiagonal workspace for NSR to be even +* . and greater than or equal to two as required. ==== +* + NSR = ILAENV( 15, 'CLAQR0', JBCMPZ, N, ILO, IHI, LWORK ) + NSR = MIN( NSR, ( N+6 ) / 9, IHI-ILO ) + NSR = MAX( 2, NSR-MOD( NSR, 2 ) ) +* +* ==== Estimate optimal workspace ==== +* +* ==== Workspace query call to CLAQR3 ==== +* + CALL CLAQR3( WANTT, WANTZ, N, ILO, IHI, NWR+1, H, LDH, ILOZ, + $ IHIZ, Z, LDZ, LS, LD, W, H, LDH, N, H, LDH, N, H, + $ LDH, WORK, -1 ) +* +* ==== Optimal workspace = MAX(CLAQR5, CLAQR3) ==== +* + LWKOPT = MAX( 3*NSR / 2, INT( WORK( 1 ) ) ) +* +* ==== Quick return in case of workspace query. ==== +* + IF( LWORK.EQ.-1 ) THEN + WORK( 1 ) = CMPLX( LWKOPT, 0 ) + RETURN + END IF +* +* ==== CLAHQR/CLAQR0 crossover point ==== +* + NMIN = ILAENV( 12, 'CLAQR0', JBCMPZ, N, ILO, IHI, LWORK ) + NMIN = MAX( NTINY, NMIN ) +* +* ==== Nibble crossover point ==== +* + NIBBLE = ILAENV( 14, 'CLAQR0', JBCMPZ, N, ILO, IHI, LWORK ) + NIBBLE = MAX( 0, NIBBLE ) +* +* ==== Accumulate reflections during ttswp? Use block +* . 2-by-2 structure during matrix-matrix multiply? ==== +* + KACC22 = ILAENV( 16, 'CLAQR0', JBCMPZ, N, ILO, IHI, LWORK ) + KACC22 = MAX( 0, KACC22 ) + KACC22 = MIN( 2, KACC22 ) +* +* ==== NWMAX = the largest possible deflation window for +* . which there is sufficient workspace. ==== +* + NWMAX = MIN( ( N-1 ) / 3, LWORK / 2 ) +* +* ==== NSMAX = the Largest number of simultaneous shifts +* . for which there is sufficient workspace. ==== +* + NSMAX = MIN( ( N+6 ) / 9, 2*LWORK / 3 ) + NSMAX = NSMAX - MOD( NSMAX, 2 ) +* +* ==== NDFL: an iteration count restarted at deflation. ==== +* + NDFL = 1 +* +* ==== ITMAX = iteration limit ==== +* + ITMAX = MAX( 30, 2*KEXSH )*MAX( 10, ( IHI-ILO+1 ) ) +* +* ==== Last row and column in the active block ==== +* + KBOT = IHI +* +* ==== Main Loop ==== +* + DO 70 IT = 1, ITMAX +* +* ==== Done when KBOT falls below ILO ==== +* + IF( KBOT.LT.ILO ) + $ GO TO 80 +* +* ==== Locate active block ==== +* + DO 10 K = KBOT, ILO + 1, -1 + IF( H( K, K-1 ).EQ.ZERO ) + $ GO TO 20 + 10 CONTINUE + K = ILO + 20 CONTINUE + KTOP = K +* +* ==== Select deflation window size ==== +* + NH = KBOT - KTOP + 1 + IF( NDFL.LT.KEXNW .OR. NH.LT.NW ) THEN +* +* ==== Typical deflation window. If possible and +* . advisable, nibble the entire active block. +* . If not, use size NWR or NWR+1 depending upon +* . which has the smaller corresponding subdiagonal +* . entry (a heuristic). ==== +* + NWINC = .TRUE. + IF( NH.LE.MIN( NMIN, NWMAX ) ) THEN + NW = NH + ELSE + NW = MIN( NWR, NH, NWMAX ) + IF( NW.LT.NWMAX ) THEN + IF( NW.GE.NH-1 ) THEN + NW = NH + ELSE + KWTOP = KBOT - NW + 1 + IF( CABS1( H( KWTOP, KWTOP-1 ) ).GT. + $ CABS1( H( KWTOP-1, KWTOP-2 ) ) )NW = NW + 1 + END IF + END IF + END IF + ELSE +* +* ==== Exceptional deflation window. If there have +* . been no deflations in KEXNW or more iterations, +* . then vary the deflation window size. At first, +* . because, larger windows are, in general, more +* . powerful than smaller ones, rapidly increase the +* . window up to the maximum reasonable and possible. +* . Then maybe try a slightly smaller window. ==== +* + IF( NWINC .AND. NW.LT.MIN( NWMAX, NH ) ) THEN + NW = MIN( NWMAX, NH, 2*NW ) + ELSE + NWINC = .FALSE. + IF( NW.EQ.NH .AND. NH.GT.2 ) + $ NW = NH - 1 + END IF + END IF +* +* ==== Aggressive early deflation: +* . split workspace under the subdiagonal into +* . - an nw-by-nw work array V in the lower +* . left-hand-corner, +* . - an NW-by-at-least-NW-but-more-is-better +* . (NW-by-NHO) horizontal work array along +* . the bottom edge, +* . - an at-least-NW-but-more-is-better (NHV-by-NW) +* . vertical work array along the left-hand-edge. +* . ==== +* + KV = N - NW + 1 + KT = NW + 1 + NHO = ( N-NW-1 ) - KT + 1 + KWV = NW + 2 + NVE = ( N-NW ) - KWV + 1 +* +* ==== Aggressive early deflation ==== +* + CALL CLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, + $ IHIZ, Z, LDZ, LS, LD, W, H( KV, 1 ), LDH, NHO, + $ H( KV, KT ), LDH, NVE, H( KWV, 1 ), LDH, WORK, + $ LWORK ) +* +* ==== Adjust KBOT accounting for new deflations. ==== +* + KBOT = KBOT - LD +* +* ==== KS points to the shifts. ==== +* + KS = KBOT - LS + 1 +* +* ==== Skip an expensive QR sweep if there is a (partly +* . heuristic) reason to expect that many eigenvalues +* . will deflate without it. Here, the QR sweep is +* . skipped if many eigenvalues have just been deflated +* . or if the remaining active block is small. +* + IF( ( LD.EQ.0 ) .OR. ( ( 100*LD.LE.NW*NIBBLE ) .AND. ( KBOT- + $ KTOP+1.GT.MIN( NMIN, NWMAX ) ) ) ) THEN +* +* ==== NS = nominal number of simultaneous shifts. +* . This may be lowered (slightly) if CLAQR3 +* . did not provide that many shifts. ==== +* + NS = MIN( NSMAX, NSR, MAX( 2, KBOT-KTOP ) ) + NS = NS - MOD( NS, 2 ) +* +* ==== If there have been no deflations +* . in a multiple of KEXSH iterations, +* . then try exceptional shifts. +* . Otherwise use shifts provided by +* . CLAQR3 above or from the eigenvalues +* . of a trailing principal submatrix. ==== +* + IF( MOD( NDFL, KEXSH ).EQ.0 ) THEN + KS = KBOT - NS + 1 + DO 30 I = KBOT, KS + 1, -2 + W( I ) = H( I, I ) + WILK1*CABS1( H( I, I-1 ) ) + W( I-1 ) = W( I ) + 30 CONTINUE + ELSE +* +* ==== Got NS/2 or fewer shifts? Use CLAQR4 or +* . CLAHQR on a trailing principal submatrix to +* . get more. (Since NS.LE.NSMAX.LE.(N+6)/9, +* . there is enough space below the subdiagonal +* . to fit an NS-by-NS scratch array.) ==== +* + IF( KBOT-KS+1.LE.NS / 2 ) THEN + KS = KBOT - NS + 1 + KT = N - NS + 1 + CALL CLACPY( 'A', NS, NS, H( KS, KS ), LDH, + $ H( KT, 1 ), LDH ) + IF( NS.GT.NMIN ) THEN + CALL CLAQR4( .false., .false., NS, 1, NS, + $ H( KT, 1 ), LDH, W( KS ), 1, 1, + $ ZDUM, 1, WORK, LWORK, INF ) + ELSE + CALL CLAHQR( .false., .false., NS, 1, NS, + $ H( KT, 1 ), LDH, W( KS ), 1, 1, + $ ZDUM, 1, INF ) + END IF + KS = KS + INF +* +* ==== In case of a rare QR failure use +* . eigenvalues of the trailing 2-by-2 +* . principal submatrix. Scale to avoid +* . overflows, underflows and subnormals. +* . (The scale factor S can not be zero, +* . because H(KBOT,KBOT-1) is nonzero.) ==== +* + IF( KS.GE.KBOT ) THEN + S = CABS1( H( KBOT-1, KBOT-1 ) ) + + $ CABS1( H( KBOT, KBOT-1 ) ) + + $ CABS1( H( KBOT-1, KBOT ) ) + + $ CABS1( H( KBOT, KBOT ) ) + AA = H( KBOT-1, KBOT-1 ) / S + CC = H( KBOT, KBOT-1 ) / S + BB = H( KBOT-1, KBOT ) / S + DD = H( KBOT, KBOT ) / S + TR2 = ( AA+DD ) / TWO + DET = ( AA-TR2 )*( DD-TR2 ) - BB*CC + RTDISC = SQRT( -DET ) + W( KBOT-1 ) = ( TR2+RTDISC )*S + W( KBOT ) = ( TR2-RTDISC )*S +* + KS = KBOT - 1 + END IF + END IF +* + IF( KBOT-KS+1.GT.NS ) THEN +* +* ==== Sort the shifts (Helps a little) ==== +* + SORTED = .false. + DO 50 K = KBOT, KS + 1, -1 + IF( SORTED ) + $ GO TO 60 + SORTED = .true. + DO 40 I = KS, K - 1 + IF( CABS1( W( I ) ).LT.CABS1( W( I+1 ) ) ) + $ THEN + SORTED = .false. + SWAP = W( I ) + W( I ) = W( I+1 ) + W( I+1 ) = SWAP + END IF + 40 CONTINUE + 50 CONTINUE + 60 CONTINUE + END IF + END IF +* +* ==== If there are only two shifts, then use +* . only one. ==== +* + IF( KBOT-KS+1.EQ.2 ) THEN + IF( CABS1( W( KBOT )-H( KBOT, KBOT ) ).LT. + $ CABS1( W( KBOT-1 )-H( KBOT, KBOT ) ) ) THEN + W( KBOT-1 ) = W( KBOT ) + ELSE + W( KBOT ) = W( KBOT-1 ) + END IF + END IF +* +* ==== Use up to NS of the the smallest magnatiude +* . shifts. If there aren't NS shifts available, +* . then use them all, possibly dropping one to +* . make the number of shifts even. ==== +* + NS = MIN( NS, KBOT-KS+1 ) + NS = NS - MOD( NS, 2 ) + KS = KBOT - NS + 1 +* +* ==== Small-bulge multi-shift QR sweep: +* . split workspace under the subdiagonal into +* . - a KDU-by-KDU work array U in the lower +* . left-hand-corner, +* . - a KDU-by-at-least-KDU-but-more-is-better +* . (KDU-by-NHo) horizontal work array WH along +* . the bottom edge, +* . - and an at-least-KDU-but-more-is-better-by-KDU +* . (NVE-by-KDU) vertical work WV arrow along +* . the left-hand-edge. ==== +* + KDU = 3*NS - 3 + KU = N - KDU + 1 + KWH = KDU + 1 + NHO = ( N-KDU+1-4 ) - ( KDU+1 ) + 1 + KWV = KDU + 4 + NVE = N - KDU - KWV + 1 +* +* ==== Small-bulge multi-shift QR sweep ==== +* + CALL CLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NS, + $ W( KS ), H, LDH, ILOZ, IHIZ, Z, LDZ, WORK, + $ 3, H( KU, 1 ), LDH, NVE, H( KWV, 1 ), LDH, + $ NHO, H( KU, KWH ), LDH ) + END IF +* +* ==== Note progress (or the lack of it). ==== +* + IF( LD.GT.0 ) THEN + NDFL = 1 + ELSE + NDFL = NDFL + 1 + END IF +* +* ==== End of main loop ==== + 70 CONTINUE +* +* ==== Iteration limit exceeded. Set INFO to show where +* . the problem occurred and exit. ==== +* + INFO = KBOT + 80 CONTINUE + END IF +* +* ==== Return the optimal value of LWORK. ==== +* + WORK( 1 ) = CMPLX( LWKOPT, 0 ) +* +* ==== End of CLAQR0 ==== +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/claqr1.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/claqr1.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,97 @@ + SUBROUTINE CLAQR1( N, H, LDH, S1, S2, V ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + COMPLEX S1, S2 + INTEGER LDH, N +* .. +* .. Array Arguments .. + COMPLEX H( LDH, * ), V( * ) +* .. +* +* Given a 2-by-2 or 3-by-3 matrix H, CLAQR1 sets v to a +* scalar multiple of the first column of the product +* +* (*) K = (H - s1*I)*(H - s2*I) +* +* scaling to avoid overflows and most underflows. +* +* This is useful for starting double implicit shift bulges +* in the QR algorithm. +* +* +* N (input) integer +* Order of the matrix H. N must be either 2 or 3. +* +* H (input) COMPLEX array of dimension (LDH,N) +* The 2-by-2 or 3-by-3 matrix H in (*). +* +* LDH (input) integer +* The leading dimension of H as declared in +* the calling procedure. LDH.GE.N +* +* S1 (input) COMPLEX +* S2 S1 and S2 are the shifts defining K in (*) above. +* +* V (output) COMPLEX array of dimension N +* A scalar multiple of the first column of the +* matrix K in (*). +* +* ================================================================ +* Based on contributions by +* Karen Braman and Ralph Byers, Department of Mathematics, +* University of Kansas, USA +* +* ================================================================ +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0e0, 0.0e0 ) ) + REAL RZERO + PARAMETER ( RZERO = 0.0e0 ) +* .. +* .. Local Scalars .. + COMPLEX CDUM + REAL H21S, H31S, S +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) ) +* .. +* .. Executable Statements .. + IF( N.EQ.2 ) THEN + S = CABS1( H( 1, 1 )-S2 ) + CABS1( H( 2, 1 ) ) + IF( S.EQ.RZERO ) THEN + V( 1 ) = ZERO + V( 2 ) = ZERO + ELSE + H21S = H( 2, 1 ) / S + V( 1 ) = H21S*H( 1, 2 ) + ( H( 1, 1 )-S1 )* + $ ( ( H( 1, 1 )-S2 ) / S ) + V( 2 ) = H21S*( H( 1, 1 )+H( 2, 2 )-S1-S2 ) + END IF + ELSE + S = CABS1( H( 1, 1 )-S2 ) + CABS1( H( 2, 1 ) ) + + $ CABS1( H( 3, 1 ) ) + IF( S.EQ.ZERO ) THEN + V( 1 ) = ZERO + V( 2 ) = ZERO + V( 3 ) = ZERO + ELSE + H21S = H( 2, 1 ) / S + H31S = H( 3, 1 ) / S + V( 1 ) = ( H( 1, 1 )-S1 )*( ( H( 1, 1 )-S2 ) / S ) + + $ H( 1, 2 )*H21S + H( 1, 3 )*H31S + V( 2 ) = H21S*( H( 1, 1 )+H( 2, 2 )-S1-S2 ) + H( 2, 3 )*H31S + V( 3 ) = H31S*( H( 1, 1 )+H( 3, 3 )-S1-S2 ) + H21S*H( 3, 2 ) + END IF + END IF + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/claqr2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/claqr2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,438 @@ + SUBROUTINE CLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, + $ IHIZ, Z, LDZ, NS, ND, SH, V, LDV, NH, T, LDT, + $ NV, WV, LDWV, WORK, LWORK ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV, + $ LDZ, LWORK, N, ND, NH, NS, NV, NW + LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. + COMPLEX H( LDH, * ), SH( * ), T( LDT, * ), V( LDV, * ), + $ WORK( * ), WV( LDWV, * ), Z( LDZ, * ) +* .. +* +* This subroutine is identical to CLAQR3 except that it avoids +* recursion by calling CLAHQR instead of CLAQR4. +* +* +* ****************************************************************** +* Aggressive early deflation: +* +* This subroutine accepts as input an upper Hessenberg matrix +* H and performs an unitary similarity transformation +* designed to detect and deflate fully converged eigenvalues from +* a trailing principal submatrix. On output H has been over- +* written by a new Hessenberg matrix that is a perturbation of +* an unitary similarity transformation of H. It is to be +* hoped that the final version of H has many zero subdiagonal +* entries. +* +* ****************************************************************** +* WANTT (input) LOGICAL +* If .TRUE., then the Hessenberg matrix H is fully updated +* so that the triangular Schur factor may be +* computed (in cooperation with the calling subroutine). +* If .FALSE., then only enough of H is updated to preserve +* the eigenvalues. +* +* WANTZ (input) LOGICAL +* If .TRUE., then the unitary matrix Z is updated so +* so that the unitary Schur factor may be computed +* (in cooperation with the calling subroutine). +* If .FALSE., then Z is not referenced. +* +* N (input) INTEGER +* The order of the matrix H and (if WANTZ is .TRUE.) the +* order of the unitary matrix Z. +* +* KTOP (input) INTEGER +* It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0. +* KBOT and KTOP together determine an isolated block +* along the diagonal of the Hessenberg matrix. +* +* KBOT (input) INTEGER +* It is assumed without a check that either +* KBOT = N or H(KBOT+1,KBOT)=0. KBOT and KTOP together +* determine an isolated block along the diagonal of the +* Hessenberg matrix. +* +* NW (input) INTEGER +* Deflation window size. 1 .LE. NW .LE. (KBOT-KTOP+1). +* +* H (input/output) COMPLEX array, dimension (LDH,N) +* On input the initial N-by-N section of H stores the +* Hessenberg matrix undergoing aggressive early deflation. +* On output H has been transformed by a unitary +* similarity transformation, perturbed, and the returned +* to Hessenberg form that (it is to be hoped) has some +* zero subdiagonal entries. +* +* LDH (input) integer +* Leading dimension of H just as declared in the calling +* subroutine. N .LE. LDH +* +* ILOZ (input) INTEGER +* IHIZ (input) INTEGER +* Specify the rows of Z to which transformations must be +* applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N. +* +* Z (input/output) COMPLEX array, dimension (LDZ,IHI) +* IF WANTZ is .TRUE., then on output, the unitary +* similarity transformation mentioned above has been +* accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right. +* If WANTZ is .FALSE., then Z is unreferenced. +* +* LDZ (input) integer +* The leading dimension of Z just as declared in the +* calling subroutine. 1 .LE. LDZ. +* +* NS (output) integer +* The number of unconverged (ie approximate) eigenvalues +* returned in SR and SI that may be used as shifts by the +* calling subroutine. +* +* ND (output) integer +* The number of converged eigenvalues uncovered by this +* subroutine. +* +* SH (output) COMPLEX array, dimension KBOT +* On output, approximate eigenvalues that may +* be used for shifts are stored in SH(KBOT-ND-NS+1) +* through SR(KBOT-ND). Converged eigenvalues are +* stored in SH(KBOT-ND+1) through SH(KBOT). +* +* V (workspace) COMPLEX array, dimension (LDV,NW) +* An NW-by-NW work array. +* +* LDV (input) integer scalar +* The leading dimension of V just as declared in the +* calling subroutine. NW .LE. LDV +* +* NH (input) integer scalar +* The number of columns of T. NH.GE.NW. +* +* T (workspace) COMPLEX array, dimension (LDT,NW) +* +* LDT (input) integer +* The leading dimension of T just as declared in the +* calling subroutine. NW .LE. LDT +* +* NV (input) integer +* The number of rows of work array WV available for +* workspace. NV.GE.NW. +* +* WV (workspace) COMPLEX array, dimension (LDWV,NW) +* +* LDWV (input) integer +* The leading dimension of W just as declared in the +* calling subroutine. NW .LE. LDV +* +* WORK (workspace) COMPLEX array, dimension LWORK. +* On exit, WORK(1) is set to an estimate of the optimal value +* of LWORK for the given values of N, NW, KTOP and KBOT. +* +* LWORK (input) integer +* The dimension of the work array WORK. LWORK = 2*NW +* suffices, but greater efficiency may result from larger +* values of LWORK. +* +* If LWORK = -1, then a workspace query is assumed; CLAQR2 +* only estimates the optimal workspace size for the given +* values of N, NW, KTOP and KBOT. The estimate is returned +* in WORK(1). No error message related to LWORK is issued +* by XERBLA. Neither H nor Z are accessed. +* +* ================================================================ +* Based on contributions by +* Karen Braman and Ralph Byers, Department of Mathematics, +* University of Kansas, USA +* +* ================================================================== +* .. Parameters .. + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0e0, 0.0e0 ), + $ ONE = ( 1.0e0, 0.0e0 ) ) + REAL RZERO, RONE + PARAMETER ( RZERO = 0.0e0, RONE = 1.0e0 ) +* .. +* .. Local Scalars .. + COMPLEX BETA, CDUM, S, TAU + REAL FOO, SAFMAX, SAFMIN, SMLNUM, ULP + INTEGER I, IFST, ILST, INFO, INFQR, J, JW, KCOL, KLN, + $ KNT, KROW, KWTOP, LTOP, LWK1, LWK2, LWKOPT +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CGEHRD, CGEMM, CLACPY, CLAHQR, CLARF, + $ CLARFG, CLASET, CTREXC, CUNGHR, SLABAD +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, CONJG, INT, MAX, MIN, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) ) +* .. +* .. Executable Statements .. +* +* ==== Estimate optimal workspace. ==== +* + JW = MIN( NW, KBOT-KTOP+1 ) + IF( JW.LE.2 ) THEN + LWKOPT = 1 + ELSE +* +* ==== Workspace query call to CGEHRD ==== +* + CALL CGEHRD( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO ) + LWK1 = INT( WORK( 1 ) ) +* +* ==== Workspace query call to CUNGHR ==== +* + CALL CUNGHR( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO ) + LWK2 = INT( WORK( 1 ) ) +* +* ==== Optimal workspace ==== +* + LWKOPT = JW + MAX( LWK1, LWK2 ) + END IF +* +* ==== Quick return in case of workspace query. ==== +* + IF( LWORK.EQ.-1 ) THEN + WORK( 1 ) = CMPLX( LWKOPT, 0 ) + RETURN + END IF +* +* ==== Nothing to do ... +* ... for an empty active block ... ==== + NS = 0 + ND = 0 + IF( KTOP.GT.KBOT ) + $ RETURN +* ... nor for an empty deflation window. ==== + IF( NW.LT.1 ) + $ RETURN +* +* ==== Machine constants ==== +* + SAFMIN = SLAMCH( 'SAFE MINIMUM' ) + SAFMAX = RONE / SAFMIN + CALL SLABAD( SAFMIN, SAFMAX ) + ULP = SLAMCH( 'PRECISION' ) + SMLNUM = SAFMIN*( REAL( N ) / ULP ) +* +* ==== Setup deflation window ==== +* + JW = MIN( NW, KBOT-KTOP+1 ) + KWTOP = KBOT - JW + 1 + IF( KWTOP.EQ.KTOP ) THEN + S = ZERO + ELSE + S = H( KWTOP, KWTOP-1 ) + END IF +* + IF( KBOT.EQ.KWTOP ) THEN +* +* ==== 1-by-1 deflation window: not much to do ==== +* + SH( KWTOP ) = H( KWTOP, KWTOP ) + NS = 1 + ND = 0 + IF( CABS1( S ).LE.MAX( SMLNUM, ULP*CABS1( H( KWTOP, + $ KWTOP ) ) ) ) THEN + + NS = 0 + ND = 1 + IF( KWTOP.GT.KTOP ) + $ H( KWTOP, KWTOP-1 ) = ZERO + END IF + RETURN + END IF +* +* ==== Convert to spike-triangular form. (In case of a +* . rare QR failure, this routine continues to do +* . aggressive early deflation using that part of +* . the deflation window that converged using INFQR +* . here and there to keep track.) ==== +* + CALL CLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT ) + CALL CCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), LDT+1 ) +* + CALL CLASET( 'A', JW, JW, ZERO, ONE, V, LDV ) + CALL CLAHQR( .true., .true., JW, 1, JW, T, LDT, SH( KWTOP ), 1, + $ JW, V, LDV, INFQR ) +* +* ==== Deflation detection loop ==== +* + NS = JW + ILST = INFQR + 1 + DO 10 KNT = INFQR + 1, JW +* +* ==== Small spike tip deflation test ==== +* + FOO = CABS1( T( NS, NS ) ) + IF( FOO.EQ.RZERO ) + $ FOO = CABS1( S ) + IF( CABS1( S )*CABS1( V( 1, NS ) ).LE.MAX( SMLNUM, ULP*FOO ) ) + $ THEN +* +* ==== One more converged eigenvalue ==== +* + NS = NS - 1 + ELSE +* +* ==== One undflatable eigenvalue. Move it up out of the +* . way. (CTREXC can not fail in this case.) ==== +* + IFST = NS + CALL CTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, INFO ) + ILST = ILST + 1 + END IF + 10 CONTINUE +* +* ==== Return to Hessenberg form ==== +* + IF( NS.EQ.0 ) + $ S = ZERO +* + IF( NS.LT.JW ) THEN +* +* ==== sorting the diagonal of T improves accuracy for +* . graded matrices. ==== +* + DO 30 I = INFQR + 1, NS + IFST = I + DO 20 J = I + 1, NS + IF( CABS1( T( J, J ) ).GT.CABS1( T( IFST, IFST ) ) ) + $ IFST = J + 20 CONTINUE + ILST = I + IF( IFST.NE.ILST ) + $ CALL CTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, INFO ) + 30 CONTINUE + END IF +* +* ==== Restore shift/eigenvalue array from T ==== +* + DO 40 I = INFQR + 1, JW + SH( KWTOP+I-1 ) = T( I, I ) + 40 CONTINUE +* +* + IF( NS.LT.JW .OR. S.EQ.ZERO ) THEN + IF( NS.GT.1 .AND. S.NE.ZERO ) THEN +* +* ==== Reflect spike back into lower triangle ==== +* + CALL CCOPY( NS, V, LDV, WORK, 1 ) + DO 50 I = 1, NS + WORK( I ) = CONJG( WORK( I ) ) + 50 CONTINUE + BETA = WORK( 1 ) + CALL CLARFG( NS, BETA, WORK( 2 ), 1, TAU ) + WORK( 1 ) = ONE +* + CALL CLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), LDT ) +* + CALL CLARF( 'L', NS, JW, WORK, 1, CONJG( TAU ), T, LDT, + $ WORK( JW+1 ) ) + CALL CLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT, + $ WORK( JW+1 ) ) + CALL CLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV, + $ WORK( JW+1 ) ) +* + CALL CGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ), + $ LWORK-JW, INFO ) + END IF +* +* ==== Copy updated reduced window into place ==== +* + IF( KWTOP.GT.1 ) + $ H( KWTOP, KWTOP-1 ) = S*CONJG( V( 1, 1 ) ) + CALL CLACPY( 'U', JW, JW, T, LDT, H( KWTOP, KWTOP ), LDH ) + CALL CCOPY( JW-1, T( 2, 1 ), LDT+1, H( KWTOP+1, KWTOP ), + $ LDH+1 ) +* +* ==== Accumulate orthogonal matrix in order update +* . H and Z, if requested. (A modified version +* . of CUNGHR that accumulates block Householder +* . transformations into V directly might be +* . marginally more efficient than the following.) ==== +* + IF( NS.GT.1 .AND. S.NE.ZERO ) THEN + CALL CUNGHR( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ), + $ LWORK-JW, INFO ) + CALL CGEMM( 'N', 'N', JW, NS, NS, ONE, V, LDV, T, LDT, ZERO, + $ WV, LDWV ) + CALL CLACPY( 'A', JW, NS, WV, LDWV, V, LDV ) + END IF +* +* ==== Update vertical slab in H ==== +* + IF( WANTT ) THEN + LTOP = 1 + ELSE + LTOP = KTOP + END IF + DO 60 KROW = LTOP, KWTOP - 1, NV + KLN = MIN( NV, KWTOP-KROW ) + CALL CGEMM( 'N', 'N', KLN, JW, JW, ONE, H( KROW, KWTOP ), + $ LDH, V, LDV, ZERO, WV, LDWV ) + CALL CLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH ) + 60 CONTINUE +* +* ==== Update horizontal slab in H ==== +* + IF( WANTT ) THEN + DO 70 KCOL = KBOT + 1, N, NH + KLN = MIN( NH, N-KCOL+1 ) + CALL CGEMM( 'C', 'N', JW, KLN, JW, ONE, V, LDV, + $ H( KWTOP, KCOL ), LDH, ZERO, T, LDT ) + CALL CLACPY( 'A', JW, KLN, T, LDT, H( KWTOP, KCOL ), + $ LDH ) + 70 CONTINUE + END IF +* +* ==== Update vertical slab in Z ==== +* + IF( WANTZ ) THEN + DO 80 KROW = ILOZ, IHIZ, NV + KLN = MIN( NV, IHIZ-KROW+1 ) + CALL CGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, KWTOP ), + $ LDZ, V, LDV, ZERO, WV, LDWV ) + CALL CLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ), + $ LDZ ) + 80 CONTINUE + END IF + END IF +* +* ==== Return the number of deflations ... ==== +* + ND = JW - NS +* +* ==== ... and the number of shifts. (Subtracting +* . INFQR from the spike length takes care +* . of the case of a rare QR failure while +* . calculating eigenvalues of the deflation +* . window.) ==== +* + NS = NS - INFQR +* +* ==== Return optimal workspace. ==== +* + WORK( 1 ) = CMPLX( LWKOPT, 0 ) +* +* ==== End of CLAQR2 ==== +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/claqr3.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/claqr3.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,448 @@ + SUBROUTINE CLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, + $ IHIZ, Z, LDZ, NS, ND, SH, V, LDV, NH, T, LDT, + $ NV, WV, LDWV, WORK, LWORK ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV, + $ LDZ, LWORK, N, ND, NH, NS, NV, NW + LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. + COMPLEX H( LDH, * ), SH( * ), T( LDT, * ), V( LDV, * ), + $ WORK( * ), WV( LDWV, * ), Z( LDZ, * ) +* .. +* +* ****************************************************************** +* Aggressive early deflation: +* +* This subroutine accepts as input an upper Hessenberg matrix +* H and performs an unitary similarity transformation +* designed to detect and deflate fully converged eigenvalues from +* a trailing principal submatrix. On output H has been over- +* written by a new Hessenberg matrix that is a perturbation of +* an unitary similarity transformation of H. It is to be +* hoped that the final version of H has many zero subdiagonal +* entries. +* +* ****************************************************************** +* WANTT (input) LOGICAL +* If .TRUE., then the Hessenberg matrix H is fully updated +* so that the triangular Schur factor may be +* computed (in cooperation with the calling subroutine). +* If .FALSE., then only enough of H is updated to preserve +* the eigenvalues. +* +* WANTZ (input) LOGICAL +* If .TRUE., then the unitary matrix Z is updated so +* so that the unitary Schur factor may be computed +* (in cooperation with the calling subroutine). +* If .FALSE., then Z is not referenced. +* +* N (input) INTEGER +* The order of the matrix H and (if WANTZ is .TRUE.) the +* order of the unitary matrix Z. +* +* KTOP (input) INTEGER +* It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0. +* KBOT and KTOP together determine an isolated block +* along the diagonal of the Hessenberg matrix. +* +* KBOT (input) INTEGER +* It is assumed without a check that either +* KBOT = N or H(KBOT+1,KBOT)=0. KBOT and KTOP together +* determine an isolated block along the diagonal of the +* Hessenberg matrix. +* +* NW (input) INTEGER +* Deflation window size. 1 .LE. NW .LE. (KBOT-KTOP+1). +* +* H (input/output) COMPLEX array, dimension (LDH,N) +* On input the initial N-by-N section of H stores the +* Hessenberg matrix undergoing aggressive early deflation. +* On output H has been transformed by a unitary +* similarity transformation, perturbed, and the returned +* to Hessenberg form that (it is to be hoped) has some +* zero subdiagonal entries. +* +* LDH (input) integer +* Leading dimension of H just as declared in the calling +* subroutine. N .LE. LDH +* +* ILOZ (input) INTEGER +* IHIZ (input) INTEGER +* Specify the rows of Z to which transformations must be +* applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N. +* +* Z (input/output) COMPLEX array, dimension (LDZ,IHI) +* IF WANTZ is .TRUE., then on output, the unitary +* similarity transformation mentioned above has been +* accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right. +* If WANTZ is .FALSE., then Z is unreferenced. +* +* LDZ (input) integer +* The leading dimension of Z just as declared in the +* calling subroutine. 1 .LE. LDZ. +* +* NS (output) integer +* The number of unconverged (ie approximate) eigenvalues +* returned in SR and SI that may be used as shifts by the +* calling subroutine. +* +* ND (output) integer +* The number of converged eigenvalues uncovered by this +* subroutine. +* +* SH (output) COMPLEX array, dimension KBOT +* On output, approximate eigenvalues that may +* be used for shifts are stored in SH(KBOT-ND-NS+1) +* through SR(KBOT-ND). Converged eigenvalues are +* stored in SH(KBOT-ND+1) through SH(KBOT). +* +* V (workspace) COMPLEX array, dimension (LDV,NW) +* An NW-by-NW work array. +* +* LDV (input) integer scalar +* The leading dimension of V just as declared in the +* calling subroutine. NW .LE. LDV +* +* NH (input) integer scalar +* The number of columns of T. NH.GE.NW. +* +* T (workspace) COMPLEX array, dimension (LDT,NW) +* +* LDT (input) integer +* The leading dimension of T just as declared in the +* calling subroutine. NW .LE. LDT +* +* NV (input) integer +* The number of rows of work array WV available for +* workspace. NV.GE.NW. +* +* WV (workspace) COMPLEX array, dimension (LDWV,NW) +* +* LDWV (input) integer +* The leading dimension of W just as declared in the +* calling subroutine. NW .LE. LDV +* +* WORK (workspace) COMPLEX array, dimension LWORK. +* On exit, WORK(1) is set to an estimate of the optimal value +* of LWORK for the given values of N, NW, KTOP and KBOT. +* +* LWORK (input) integer +* The dimension of the work array WORK. LWORK = 2*NW +* suffices, but greater efficiency may result from larger +* values of LWORK. +* +* If LWORK = -1, then a workspace query is assumed; CLAQR3 +* only estimates the optimal workspace size for the given +* values of N, NW, KTOP and KBOT. The estimate is returned +* in WORK(1). No error message related to LWORK is issued +* by XERBLA. Neither H nor Z are accessed. +* +* ================================================================ +* Based on contributions by +* Karen Braman and Ralph Byers, Department of Mathematics, +* University of Kansas, USA +* +* ================================================================== +* .. Parameters .. + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0e0, 0.0e0 ), + $ ONE = ( 1.0e0, 0.0e0 ) ) + REAL RZERO, RONE + PARAMETER ( RZERO = 0.0e0, RONE = 1.0e0 ) +* .. +* .. Local Scalars .. + COMPLEX BETA, CDUM, S, TAU + REAL FOO, SAFMAX, SAFMIN, SMLNUM, ULP + INTEGER I, IFST, ILST, INFO, INFQR, J, JW, KCOL, KLN, + $ KNT, KROW, KWTOP, LTOP, LWK1, LWK2, LWK3, + $ LWKOPT, NMIN +* .. +* .. External Functions .. + REAL SLAMCH + INTEGER ILAENV + EXTERNAL SLAMCH, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CGEHRD, CGEMM, CLACPY, CLAHQR, CLAQR4, + $ CLARF, CLARFG, CLASET, CTREXC, CUNGHR, SLABAD +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, CONJG, INT, MAX, MIN, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) ) +* .. +* .. Executable Statements .. +* +* ==== Estimate optimal workspace. ==== +* + JW = MIN( NW, KBOT-KTOP+1 ) + IF( JW.LE.2 ) THEN + LWKOPT = 1 + ELSE +* +* ==== Workspace query call to CGEHRD ==== +* + CALL CGEHRD( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO ) + LWK1 = INT( WORK( 1 ) ) +* +* ==== Workspace query call to CUNGHR ==== +* + CALL CUNGHR( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO ) + LWK2 = INT( WORK( 1 ) ) +* +* ==== Workspace query call to CLAQR4 ==== +* + CALL CLAQR4( .true., .true., JW, 1, JW, T, LDT, SH, 1, JW, V, + $ LDV, WORK, -1, INFQR ) + LWK3 = INT( WORK( 1 ) ) +* +* ==== Optimal workspace ==== +* + LWKOPT = MAX( JW+MAX( LWK1, LWK2 ), LWK3 ) + END IF +* +* ==== Quick return in case of workspace query. ==== +* + IF( LWORK.EQ.-1 ) THEN + WORK( 1 ) = CMPLX( LWKOPT, 0 ) + RETURN + END IF +* +* ==== Nothing to do ... +* ... for an empty active block ... ==== + NS = 0 + ND = 0 + IF( KTOP.GT.KBOT ) + $ RETURN +* ... nor for an empty deflation window. ==== + IF( NW.LT.1 ) + $ RETURN +* +* ==== Machine constants ==== +* + SAFMIN = SLAMCH( 'SAFE MINIMUM' ) + SAFMAX = RONE / SAFMIN + CALL SLABAD( SAFMIN, SAFMAX ) + ULP = SLAMCH( 'PRECISION' ) + SMLNUM = SAFMIN*( REAL( N ) / ULP ) +* +* ==== Setup deflation window ==== +* + JW = MIN( NW, KBOT-KTOP+1 ) + KWTOP = KBOT - JW + 1 + IF( KWTOP.EQ.KTOP ) THEN + S = ZERO + ELSE + S = H( KWTOP, KWTOP-1 ) + END IF +* + IF( KBOT.EQ.KWTOP ) THEN +* +* ==== 1-by-1 deflation window: not much to do ==== +* + SH( KWTOP ) = H( KWTOP, KWTOP ) + NS = 1 + ND = 0 + IF( CABS1( S ).LE.MAX( SMLNUM, ULP*CABS1( H( KWTOP, + $ KWTOP ) ) ) ) THEN + + NS = 0 + ND = 1 + IF( KWTOP.GT.KTOP ) + $ H( KWTOP, KWTOP-1 ) = ZERO + END IF + RETURN + END IF +* +* ==== Convert to spike-triangular form. (In case of a +* . rare QR failure, this routine continues to do +* . aggressive early deflation using that part of +* . the deflation window that converged using INFQR +* . here and there to keep track.) ==== +* + CALL CLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT ) + CALL CCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), LDT+1 ) +* + CALL CLASET( 'A', JW, JW, ZERO, ONE, V, LDV ) + NMIN = ILAENV( 12, 'CLAQR3', 'SV', JW, 1, JW, LWORK ) + IF( JW.GT.NMIN ) THEN + CALL CLAQR4( .true., .true., JW, 1, JW, T, LDT, SH( KWTOP ), 1, + $ JW, V, LDV, WORK, LWORK, INFQR ) + ELSE + CALL CLAHQR( .true., .true., JW, 1, JW, T, LDT, SH( KWTOP ), 1, + $ JW, V, LDV, INFQR ) + END IF +* +* ==== Deflation detection loop ==== +* + NS = JW + ILST = INFQR + 1 + DO 10 KNT = INFQR + 1, JW +* +* ==== Small spike tip deflation test ==== +* + FOO = CABS1( T( NS, NS ) ) + IF( FOO.EQ.RZERO ) + $ FOO = CABS1( S ) + IF( CABS1( S )*CABS1( V( 1, NS ) ).LE.MAX( SMLNUM, ULP*FOO ) ) + $ THEN +* +* ==== One more converged eigenvalue ==== +* + NS = NS - 1 + ELSE +* +* ==== One undflatable eigenvalue. Move it up out of the +* . way. (CTREXC can not fail in this case.) ==== +* + IFST = NS + CALL CTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, INFO ) + ILST = ILST + 1 + END IF + 10 CONTINUE +* +* ==== Return to Hessenberg form ==== +* + IF( NS.EQ.0 ) + $ S = ZERO +* + IF( NS.LT.JW ) THEN +* +* ==== sorting the diagonal of T improves accuracy for +* . graded matrices. ==== +* + DO 30 I = INFQR + 1, NS + IFST = I + DO 20 J = I + 1, NS + IF( CABS1( T( J, J ) ).GT.CABS1( T( IFST, IFST ) ) ) + $ IFST = J + 20 CONTINUE + ILST = I + IF( IFST.NE.ILST ) + $ CALL CTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, INFO ) + 30 CONTINUE + END IF +* +* ==== Restore shift/eigenvalue array from T ==== +* + DO 40 I = INFQR + 1, JW + SH( KWTOP+I-1 ) = T( I, I ) + 40 CONTINUE +* +* + IF( NS.LT.JW .OR. S.EQ.ZERO ) THEN + IF( NS.GT.1 .AND. S.NE.ZERO ) THEN +* +* ==== Reflect spike back into lower triangle ==== +* + CALL CCOPY( NS, V, LDV, WORK, 1 ) + DO 50 I = 1, NS + WORK( I ) = CONJG( WORK( I ) ) + 50 CONTINUE + BETA = WORK( 1 ) + CALL CLARFG( NS, BETA, WORK( 2 ), 1, TAU ) + WORK( 1 ) = ONE +* + CALL CLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), LDT ) +* + CALL CLARF( 'L', NS, JW, WORK, 1, CONJG( TAU ), T, LDT, + $ WORK( JW+1 ) ) + CALL CLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT, + $ WORK( JW+1 ) ) + CALL CLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV, + $ WORK( JW+1 ) ) +* + CALL CGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ), + $ LWORK-JW, INFO ) + END IF +* +* ==== Copy updated reduced window into place ==== +* + IF( KWTOP.GT.1 ) + $ H( KWTOP, KWTOP-1 ) = S*CONJG( V( 1, 1 ) ) + CALL CLACPY( 'U', JW, JW, T, LDT, H( KWTOP, KWTOP ), LDH ) + CALL CCOPY( JW-1, T( 2, 1 ), LDT+1, H( KWTOP+1, KWTOP ), + $ LDH+1 ) +* +* ==== Accumulate orthogonal matrix in order update +* . H and Z, if requested. (A modified version +* . of CUNGHR that accumulates block Householder +* . transformations into V directly might be +* . marginally more efficient than the following.) ==== +* + IF( NS.GT.1 .AND. S.NE.ZERO ) THEN + CALL CUNGHR( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ), + $ LWORK-JW, INFO ) + CALL CGEMM( 'N', 'N', JW, NS, NS, ONE, V, LDV, T, LDT, ZERO, + $ WV, LDWV ) + CALL CLACPY( 'A', JW, NS, WV, LDWV, V, LDV ) + END IF +* +* ==== Update vertical slab in H ==== +* + IF( WANTT ) THEN + LTOP = 1 + ELSE + LTOP = KTOP + END IF + DO 60 KROW = LTOP, KWTOP - 1, NV + KLN = MIN( NV, KWTOP-KROW ) + CALL CGEMM( 'N', 'N', KLN, JW, JW, ONE, H( KROW, KWTOP ), + $ LDH, V, LDV, ZERO, WV, LDWV ) + CALL CLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH ) + 60 CONTINUE +* +* ==== Update horizontal slab in H ==== +* + IF( WANTT ) THEN + DO 70 KCOL = KBOT + 1, N, NH + KLN = MIN( NH, N-KCOL+1 ) + CALL CGEMM( 'C', 'N', JW, KLN, JW, ONE, V, LDV, + $ H( KWTOP, KCOL ), LDH, ZERO, T, LDT ) + CALL CLACPY( 'A', JW, KLN, T, LDT, H( KWTOP, KCOL ), + $ LDH ) + 70 CONTINUE + END IF +* +* ==== Update vertical slab in Z ==== +* + IF( WANTZ ) THEN + DO 80 KROW = ILOZ, IHIZ, NV + KLN = MIN( NV, IHIZ-KROW+1 ) + CALL CGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, KWTOP ), + $ LDZ, V, LDV, ZERO, WV, LDWV ) + CALL CLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ), + $ LDZ ) + 80 CONTINUE + END IF + END IF +* +* ==== Return the number of deflations ... ==== +* + ND = JW - NS +* +* ==== ... and the number of shifts. (Subtracting +* . INFQR from the spike length takes care +* . of the case of a rare QR failure while +* . calculating eigenvalues of the deflation +* . window.) ==== +* + NS = NS - INFQR +* +* ==== Return optimal workspace. ==== +* + WORK( 1 ) = CMPLX( LWKOPT, 0 ) +* +* ==== End of CLAQR3 ==== +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/claqr4.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/claqr4.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,602 @@ + SUBROUTINE CLAQR4( WANTT, WANTZ, N, ILO, IHI, H, LDH, W, ILOZ, + $ IHIZ, Z, LDZ, WORK, LWORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER IHI, IHIZ, ILO, ILOZ, INFO, LDH, LDZ, LWORK, N + LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. + COMPLEX H( LDH, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* This subroutine implements one level of recursion for CLAQR0. +* It is a complete implementation of the small bulge multi-shift +* QR algorithm. It may be called by CLAQR0 and, for large enough +* deflation window size, it may be called by CLAQR3. This +* subroutine is identical to CLAQR0 except that it calls CLAQR2 +* instead of CLAQR3. +* +* Purpose +* ======= +* +* CLAQR4 computes the eigenvalues of a Hessenberg matrix H +* and, optionally, the matrices T and Z from the Schur decomposition +* H = Z T Z**H, where T is an upper triangular matrix (the +* Schur form), and Z is the unitary matrix of Schur vectors. +* +* Optionally Z may be postmultiplied into an input unitary +* matrix Q so that this routine can give the Schur factorization +* of a matrix A which has been reduced to the Hessenberg form H +* by the unitary matrix Q: A = Q*H*Q**H = (QZ)*H*(QZ)**H. +* +* Arguments +* ========= +* +* WANTT (input) LOGICAL +* = .TRUE. : the full Schur form T is required; +* = .FALSE.: only eigenvalues are required. +* +* WANTZ (input) LOGICAL +* = .TRUE. : the matrix of Schur vectors Z is required; +* = .FALSE.: Schur vectors are not required. +* +* N (input) INTEGER +* The order of the matrix H. N .GE. 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* It is assumed that H is already upper triangular in rows +* and columns 1:ILO-1 and IHI+1:N and, if ILO.GT.1, +* H(ILO,ILO-1) is zero. ILO and IHI are normally set by a +* previous call to CGEBAL, and then passed to CGEHRD when the +* matrix output by CGEBAL is reduced to Hessenberg form. +* Otherwise, ILO and IHI should be set to 1 and N, +* respectively. If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N. +* If N = 0, then ILO = 1 and IHI = 0. +* +* H (input/output) COMPLEX array, dimension (LDH,N) +* On entry, the upper Hessenberg matrix H. +* On exit, if INFO = 0 and WANTT is .TRUE., then H +* contains the upper triangular matrix T from the Schur +* decomposition (the Schur form). If INFO = 0 and WANT is +* .FALSE., then the contents of H are unspecified on exit. +* (The output value of H when INFO.GT.0 is given under the +* description of INFO below.) +* +* This subroutine may explicitly set H(i,j) = 0 for i.GT.j and +* j = 1, 2, ... ILO-1 or j = IHI+1, IHI+2, ... N. +* +* LDH (input) INTEGER +* The leading dimension of the array H. LDH .GE. max(1,N). +* +* W (output) COMPLEX array, dimension (N) +* The computed eigenvalues of H(ILO:IHI,ILO:IHI) are stored +* in W(ILO:IHI). If WANTT is .TRUE., then the eigenvalues are +* stored in the same order as on the diagonal of the Schur +* form returned in H, with W(i) = H(i,i). +* +* Z (input/output) COMPLEX array, dimension (LDZ,IHI) +* If WANTZ is .FALSE., then Z is not referenced. +* If WANTZ is .TRUE., then Z(ILO:IHI,ILOZ:IHIZ) is +* replaced by Z(ILO:IHI,ILOZ:IHIZ)*U where U is the +* orthogonal Schur factor of H(ILO:IHI,ILO:IHI). +* (The output value of Z when INFO.GT.0 is given under +* the description of INFO below.) +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. if WANTZ is .TRUE. +* then LDZ.GE.MAX(1,IHIZ). Otherwize, LDZ.GE.1. +* +* WORK (workspace/output) COMPLEX array, dimension LWORK +* On exit, if LWORK = -1, WORK(1) returns an estimate of +* the optimal value for LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK .GE. max(1,N) +* is sufficient, but LWORK typically as large as 6*N may +* be required for optimal performance. A workspace query +* to determine the optimal workspace size is recommended. +* +* If LWORK = -1, then CLAQR4 does a workspace query. +* In this case, CLAQR4 checks the input parameters and +* estimates the optimal workspace size for the given +* values of N, ILO and IHI. The estimate is returned +* in WORK(1). No error message related to LWORK is +* issued by XERBLA. Neither H nor Z are accessed. +* +* +* INFO (output) INTEGER +* = 0: successful exit +* .GT. 0: if INFO = i, CLAQR4 failed to compute all of +* the eigenvalues. Elements 1:ilo-1 and i+1:n of WR +* and WI contain those eigenvalues which have been +* successfully computed. (Failures are rare.) +* +* If INFO .GT. 0 and WANT is .FALSE., then on exit, +* the remaining unconverged eigenvalues are the eigen- +* values of the upper Hessenberg matrix rows and +* columns ILO through INFO of the final, output +* value of H. +* +* If INFO .GT. 0 and WANTT is .TRUE., then on exit +* +* (*) (initial value of H)*U = U*(final value of H) +* +* where U is a unitary matrix. The final +* value of H is upper Hessenberg and triangular in +* rows and columns INFO+1 through IHI. +* +* If INFO .GT. 0 and WANTZ is .TRUE., then on exit +* +* (final value of Z(ILO:IHI,ILOZ:IHIZ) +* = (initial value of Z(ILO:IHI,ILOZ:IHIZ)*U +* +* where U is the unitary matrix in (*) (regard- +* less of the value of WANTT.) +* +* If INFO .GT. 0 and WANTZ is .FALSE., then Z is not +* accessed. +* +* ================================================================ +* Based on contributions by +* Karen Braman and Ralph Byers, Department of Mathematics, +* University of Kansas, USA +* +* ================================================================ +* References: +* K. Braman, R. Byers and R. Mathias, The Multi-Shift QR +* Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 +* Performance, SIAM Journal of Matrix Analysis, volume 23, pages +* 929--947, 2002. +* +* K. Braman, R. Byers and R. Mathias, The Multi-Shift QR +* Algorithm Part II: Aggressive Early Deflation, SIAM Journal +* of Matrix Analysis, volume 23, pages 948--973, 2002. +* +* ================================================================ +* .. Parameters .. +* +* ==== Matrices of order NTINY or smaller must be processed by +* . CLAHQR because of insufficient subdiagonal scratch space. +* . (This is a hard limit.) ==== +* +* ==== Exceptional deflation windows: try to cure rare +* . slow convergence by increasing the size of the +* . deflation window after KEXNW iterations. ===== +* +* ==== Exceptional shifts: try to cure rare slow convergence +* . with ad-hoc exceptional shifts every KEXSH iterations. +* . The constants WILK1 and WILK2 are used to form the +* . exceptional shifts. ==== +* + INTEGER NTINY + PARAMETER ( NTINY = 11 ) + INTEGER KEXNW, KEXSH + PARAMETER ( KEXNW = 5, KEXSH = 6 ) + REAL WILK1 + PARAMETER ( WILK1 = 0.75e0 ) + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0e0, 0.0e0 ), + $ ONE = ( 1.0e0, 0.0e0 ) ) + REAL TWO + PARAMETER ( TWO = 2.0e0 ) +* .. +* .. Local Scalars .. + COMPLEX AA, BB, CC, CDUM, DD, DET, RTDISC, SWAP, TR2 + REAL S + INTEGER I, INF, IT, ITMAX, K, KACC22, KBOT, KDU, KS, + $ KT, KTOP, KU, KV, KWH, KWTOP, KWV, LD, LS, + $ LWKOPT, NDFL, NH, NHO, NIBBLE, NMIN, NS, NSMAX, + $ NSR, NVE, NW, NWMAX, NWR + LOGICAL NWINC, SORTED + CHARACTER JBCMPZ*2 +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Local Arrays .. + COMPLEX ZDUM( 1, 1 ) +* .. +* .. External Subroutines .. + EXTERNAL CLACPY, CLAHQR, CLAQR2, CLAQR5 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, INT, MAX, MIN, MOD, REAL, + $ SQRT +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) ) +* .. +* .. Executable Statements .. + INFO = 0 +* +* ==== Quick return for N = 0: nothing to do. ==== +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = ONE + RETURN + END IF +* +* ==== Set up job flags for ILAENV. ==== +* + IF( WANTT ) THEN + JBCMPZ( 1: 1 ) = 'S' + ELSE + JBCMPZ( 1: 1 ) = 'E' + END IF + IF( WANTZ ) THEN + JBCMPZ( 2: 2 ) = 'V' + ELSE + JBCMPZ( 2: 2 ) = 'N' + END IF +* +* ==== Tiny matrices must use CLAHQR. ==== +* + IF( N.LE.NTINY ) THEN +* +* ==== Estimate optimal workspace. ==== +* + LWKOPT = 1 + IF( LWORK.NE.-1 ) + $ CALL CLAHQR( WANTT, WANTZ, N, ILO, IHI, H, LDH, W, ILOZ, + $ IHIZ, Z, LDZ, INFO ) + ELSE +* +* ==== Use small bulge multi-shift QR with aggressive early +* . deflation on larger-than-tiny matrices. ==== +* +* ==== Hope for the best. ==== +* + INFO = 0 +* +* ==== NWR = recommended deflation window size. At this +* . point, N .GT. NTINY = 11, so there is enough +* . subdiagonal workspace for NWR.GE.2 as required. +* . (In fact, there is enough subdiagonal space for +* . NWR.GE.3.) ==== +* + NWR = ILAENV( 13, 'CLAQR4', JBCMPZ, N, ILO, IHI, LWORK ) + NWR = MAX( 2, NWR ) + NWR = MIN( IHI-ILO+1, ( N-1 ) / 3, NWR ) + NW = NWR +* +* ==== NSR = recommended number of simultaneous shifts. +* . At this point N .GT. NTINY = 11, so there is at +* . enough subdiagonal workspace for NSR to be even +* . and greater than or equal to two as required. ==== +* + NSR = ILAENV( 15, 'CLAQR4', JBCMPZ, N, ILO, IHI, LWORK ) + NSR = MIN( NSR, ( N+6 ) / 9, IHI-ILO ) + NSR = MAX( 2, NSR-MOD( NSR, 2 ) ) +* +* ==== Estimate optimal workspace ==== +* +* ==== Workspace query call to CLAQR2 ==== +* + CALL CLAQR2( WANTT, WANTZ, N, ILO, IHI, NWR+1, H, LDH, ILOZ, + $ IHIZ, Z, LDZ, LS, LD, W, H, LDH, N, H, LDH, N, H, + $ LDH, WORK, -1 ) +* +* ==== Optimal workspace = MAX(CLAQR5, CLAQR2) ==== +* + LWKOPT = MAX( 3*NSR / 2, INT( WORK( 1 ) ) ) +* +* ==== Quick return in case of workspace query. ==== +* + IF( LWORK.EQ.-1 ) THEN + WORK( 1 ) = CMPLX( LWKOPT, 0 ) + RETURN + END IF +* +* ==== CLAHQR/CLAQR0 crossover point ==== +* + NMIN = ILAENV( 12, 'CLAQR4', JBCMPZ, N, ILO, IHI, LWORK ) + NMIN = MAX( NTINY, NMIN ) +* +* ==== Nibble crossover point ==== +* + NIBBLE = ILAENV( 14, 'CLAQR4', JBCMPZ, N, ILO, IHI, LWORK ) + NIBBLE = MAX( 0, NIBBLE ) +* +* ==== Accumulate reflections during ttswp? Use block +* . 2-by-2 structure during matrix-matrix multiply? ==== +* + KACC22 = ILAENV( 16, 'CLAQR4', JBCMPZ, N, ILO, IHI, LWORK ) + KACC22 = MAX( 0, KACC22 ) + KACC22 = MIN( 2, KACC22 ) +* +* ==== NWMAX = the largest possible deflation window for +* . which there is sufficient workspace. ==== +* + NWMAX = MIN( ( N-1 ) / 3, LWORK / 2 ) +* +* ==== NSMAX = the Largest number of simultaneous shifts +* . for which there is sufficient workspace. ==== +* + NSMAX = MIN( ( N+6 ) / 9, 2*LWORK / 3 ) + NSMAX = NSMAX - MOD( NSMAX, 2 ) +* +* ==== NDFL: an iteration count restarted at deflation. ==== +* + NDFL = 1 +* +* ==== ITMAX = iteration limit ==== +* + ITMAX = MAX( 30, 2*KEXSH )*MAX( 10, ( IHI-ILO+1 ) ) +* +* ==== Last row and column in the active block ==== +* + KBOT = IHI +* +* ==== Main Loop ==== +* + DO 70 IT = 1, ITMAX +* +* ==== Done when KBOT falls below ILO ==== +* + IF( KBOT.LT.ILO ) + $ GO TO 80 +* +* ==== Locate active block ==== +* + DO 10 K = KBOT, ILO + 1, -1 + IF( H( K, K-1 ).EQ.ZERO ) + $ GO TO 20 + 10 CONTINUE + K = ILO + 20 CONTINUE + KTOP = K +* +* ==== Select deflation window size ==== +* + NH = KBOT - KTOP + 1 + IF( NDFL.LT.KEXNW .OR. NH.LT.NW ) THEN +* +* ==== Typical deflation window. If possible and +* . advisable, nibble the entire active block. +* . If not, use size NWR or NWR+1 depending upon +* . which has the smaller corresponding subdiagonal +* . entry (a heuristic). ==== +* + NWINC = .TRUE. + IF( NH.LE.MIN( NMIN, NWMAX ) ) THEN + NW = NH + ELSE + NW = MIN( NWR, NH, NWMAX ) + IF( NW.LT.NWMAX ) THEN + IF( NW.GE.NH-1 ) THEN + NW = NH + ELSE + KWTOP = KBOT - NW + 1 + IF( CABS1( H( KWTOP, KWTOP-1 ) ).GT. + $ CABS1( H( KWTOP-1, KWTOP-2 ) ) )NW = NW + 1 + END IF + END IF + END IF + ELSE +* +* ==== Exceptional deflation window. If there have +* . been no deflations in KEXNW or more iterations, +* . then vary the deflation window size. At first, +* . because, larger windows are, in general, more +* . powerful than smaller ones, rapidly increase the +* . window up to the maximum reasonable and possible. +* . Then maybe try a slightly smaller window. ==== +* + IF( NWINC .AND. NW.LT.MIN( NWMAX, NH ) ) THEN + NW = MIN( NWMAX, NH, 2*NW ) + ELSE + NWINC = .FALSE. + IF( NW.EQ.NH .AND. NH.GT.2 ) + $ NW = NH - 1 + END IF + END IF +* +* ==== Aggressive early deflation: +* . split workspace under the subdiagonal into +* . - an nw-by-nw work array V in the lower +* . left-hand-corner, +* . - an NW-by-at-least-NW-but-more-is-better +* . (NW-by-NHO) horizontal work array along +* . the bottom edge, +* . - an at-least-NW-but-more-is-better (NHV-by-NW) +* . vertical work array along the left-hand-edge. +* . ==== +* + KV = N - NW + 1 + KT = NW + 1 + NHO = ( N-NW-1 ) - KT + 1 + KWV = NW + 2 + NVE = ( N-NW ) - KWV + 1 +* +* ==== Aggressive early deflation ==== +* + CALL CLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, + $ IHIZ, Z, LDZ, LS, LD, W, H( KV, 1 ), LDH, NHO, + $ H( KV, KT ), LDH, NVE, H( KWV, 1 ), LDH, WORK, + $ LWORK ) +* +* ==== Adjust KBOT accounting for new deflations. ==== +* + KBOT = KBOT - LD +* +* ==== KS points to the shifts. ==== +* + KS = KBOT - LS + 1 +* +* ==== Skip an expensive QR sweep if there is a (partly +* . heuristic) reason to expect that many eigenvalues +* . will deflate without it. Here, the QR sweep is +* . skipped if many eigenvalues have just been deflated +* . or if the remaining active block is small. +* + IF( ( LD.EQ.0 ) .OR. ( ( 100*LD.LE.NW*NIBBLE ) .AND. ( KBOT- + $ KTOP+1.GT.MIN( NMIN, NWMAX ) ) ) ) THEN +* +* ==== NS = nominal number of simultaneous shifts. +* . This may be lowered (slightly) if CLAQR2 +* . did not provide that many shifts. ==== +* + NS = MIN( NSMAX, NSR, MAX( 2, KBOT-KTOP ) ) + NS = NS - MOD( NS, 2 ) +* +* ==== If there have been no deflations +* . in a multiple of KEXSH iterations, +* . then try exceptional shifts. +* . Otherwise use shifts provided by +* . CLAQR2 above or from the eigenvalues +* . of a trailing principal submatrix. ==== +* + IF( MOD( NDFL, KEXSH ).EQ.0 ) THEN + KS = KBOT - NS + 1 + DO 30 I = KBOT, KS + 1, -2 + W( I ) = H( I, I ) + WILK1*CABS1( H( I, I-1 ) ) + W( I-1 ) = W( I ) + 30 CONTINUE + ELSE +* +* ==== Got NS/2 or fewer shifts? Use CLAHQR +* . on a trailing principal submatrix to +* . get more. (Since NS.LE.NSMAX.LE.(N+6)/9, +* . there is enough space below the subdiagonal +* . to fit an NS-by-NS scratch array.) ==== +* + IF( KBOT-KS+1.LE.NS / 2 ) THEN + KS = KBOT - NS + 1 + KT = N - NS + 1 + CALL CLACPY( 'A', NS, NS, H( KS, KS ), LDH, + $ H( KT, 1 ), LDH ) + CALL CLAHQR( .false., .false., NS, 1, NS, + $ H( KT, 1 ), LDH, W( KS ), 1, 1, ZDUM, + $ 1, INF ) + KS = KS + INF +* +* ==== In case of a rare QR failure use +* . eigenvalues of the trailing 2-by-2 +* . principal submatrix. Scale to avoid +* . overflows, underflows and subnormals. +* . (The scale factor S can not be zero, +* . because H(KBOT,KBOT-1) is nonzero.) ==== +* + IF( KS.GE.KBOT ) THEN + S = CABS1( H( KBOT-1, KBOT-1 ) ) + + $ CABS1( H( KBOT, KBOT-1 ) ) + + $ CABS1( H( KBOT-1, KBOT ) ) + + $ CABS1( H( KBOT, KBOT ) ) + AA = H( KBOT-1, KBOT-1 ) / S + CC = H( KBOT, KBOT-1 ) / S + BB = H( KBOT-1, KBOT ) / S + DD = H( KBOT, KBOT ) / S + TR2 = ( AA+DD ) / TWO + DET = ( AA-TR2 )*( DD-TR2 ) - BB*CC + RTDISC = SQRT( -DET ) + W( KBOT-1 ) = ( TR2+RTDISC )*S + W( KBOT ) = ( TR2-RTDISC )*S +* + KS = KBOT - 1 + END IF + END IF +* + IF( KBOT-KS+1.GT.NS ) THEN +* +* ==== Sort the shifts (Helps a little) ==== +* + SORTED = .false. + DO 50 K = KBOT, KS + 1, -1 + IF( SORTED ) + $ GO TO 60 + SORTED = .true. + DO 40 I = KS, K - 1 + IF( CABS1( W( I ) ).LT.CABS1( W( I+1 ) ) ) + $ THEN + SORTED = .false. + SWAP = W( I ) + W( I ) = W( I+1 ) + W( I+1 ) = SWAP + END IF + 40 CONTINUE + 50 CONTINUE + 60 CONTINUE + END IF + END IF +* +* ==== If there are only two shifts, then use +* . only one. ==== +* + IF( KBOT-KS+1.EQ.2 ) THEN + IF( CABS1( W( KBOT )-H( KBOT, KBOT ) ).LT. + $ CABS1( W( KBOT-1 )-H( KBOT, KBOT ) ) ) THEN + W( KBOT-1 ) = W( KBOT ) + ELSE + W( KBOT ) = W( KBOT-1 ) + END IF + END IF +* +* ==== Use up to NS of the the smallest magnatiude +* . shifts. If there aren't NS shifts available, +* . then use them all, possibly dropping one to +* . make the number of shifts even. ==== +* + NS = MIN( NS, KBOT-KS+1 ) + NS = NS - MOD( NS, 2 ) + KS = KBOT - NS + 1 +* +* ==== Small-bulge multi-shift QR sweep: +* . split workspace under the subdiagonal into +* . - a KDU-by-KDU work array U in the lower +* . left-hand-corner, +* . - a KDU-by-at-least-KDU-but-more-is-better +* . (KDU-by-NHo) horizontal work array WH along +* . the bottom edge, +* . - and an at-least-KDU-but-more-is-better-by-KDU +* . (NVE-by-KDU) vertical work WV arrow along +* . the left-hand-edge. ==== +* + KDU = 3*NS - 3 + KU = N - KDU + 1 + KWH = KDU + 1 + NHO = ( N-KDU+1-4 ) - ( KDU+1 ) + 1 + KWV = KDU + 4 + NVE = N - KDU - KWV + 1 +* +* ==== Small-bulge multi-shift QR sweep ==== +* + CALL CLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NS, + $ W( KS ), H, LDH, ILOZ, IHIZ, Z, LDZ, WORK, + $ 3, H( KU, 1 ), LDH, NVE, H( KWV, 1 ), LDH, + $ NHO, H( KU, KWH ), LDH ) + END IF +* +* ==== Note progress (or the lack of it). ==== +* + IF( LD.GT.0 ) THEN + NDFL = 1 + ELSE + NDFL = NDFL + 1 + END IF +* +* ==== End of main loop ==== + 70 CONTINUE +* +* ==== Iteration limit exceeded. Set INFO to show where +* . the problem occurred and exit. ==== +* + INFO = KBOT + 80 CONTINUE + END IF +* +* ==== Return the optimal value of LWORK. ==== +* + WORK( 1 ) = CMPLX( LWKOPT, 0 ) +* +* ==== End of CLAQR4 ==== +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/claqr5.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/claqr5.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,809 @@ + SUBROUTINE CLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, S, + $ H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U, LDU, NV, + $ WV, LDWV, NH, WH, LDWH ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV, + $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV + LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. + COMPLEX H( LDH, * ), S( * ), U( LDU, * ), V( LDV, * ), + $ WH( LDWH, * ), WV( LDWV, * ), Z( LDZ, * ) +* .. +* +* This auxiliary subroutine called by CLAQR0 performs a +* single small-bulge multi-shift QR sweep. +* +* WANTT (input) logical scalar +* WANTT = .true. if the triangular Schur factor +* is being computed. WANTT is set to .false. otherwise. +* +* WANTZ (input) logical scalar +* WANTZ = .true. if the unitary Schur factor is being +* computed. WANTZ is set to .false. otherwise. +* +* KACC22 (input) integer with value 0, 1, or 2. +* Specifies the computation mode of far-from-diagonal +* orthogonal updates. +* = 0: CLAQR5 does not accumulate reflections and does not +* use matrix-matrix multiply to update far-from-diagonal +* matrix entries. +* = 1: CLAQR5 accumulates reflections and uses matrix-matrix +* multiply to update the far-from-diagonal matrix entries. +* = 2: CLAQR5 accumulates reflections, uses matrix-matrix +* multiply to update the far-from-diagonal matrix entries, +* and takes advantage of 2-by-2 block structure during +* matrix multiplies. +* +* N (input) integer scalar +* N is the order of the Hessenberg matrix H upon which this +* subroutine operates. +* +* KTOP (input) integer scalar +* KBOT (input) integer scalar +* These are the first and last rows and columns of an +* isolated diagonal block upon which the QR sweep is to be +* applied. It is assumed without a check that +* either KTOP = 1 or H(KTOP,KTOP-1) = 0 +* and +* either KBOT = N or H(KBOT+1,KBOT) = 0. +* +* NSHFTS (input) integer scalar +* NSHFTS gives the number of simultaneous shifts. NSHFTS +* must be positive and even. +* +* S (input) COMPLEX array of size (NSHFTS) +* S contains the shifts of origin that define the multi- +* shift QR sweep. +* +* H (input/output) COMPLEX array of size (LDH,N) +* On input H contains a Hessenberg matrix. On output a +* multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied +* to the isolated diagonal block in rows and columns KTOP +* through KBOT. +* +* LDH (input) integer scalar +* LDH is the leading dimension of H just as declared in the +* calling procedure. LDH.GE.MAX(1,N). +* +* ILOZ (input) INTEGER +* IHIZ (input) INTEGER +* Specify the rows of Z to which transformations must be +* applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N +* +* Z (input/output) COMPLEX array of size (LDZ,IHI) +* If WANTZ = .TRUE., then the QR Sweep unitary +* similarity transformation is accumulated into +* Z(ILOZ:IHIZ,ILO:IHI) from the right. +* If WANTZ = .FALSE., then Z is unreferenced. +* +* LDZ (input) integer scalar +* LDA is the leading dimension of Z just as declared in +* the calling procedure. LDZ.GE.N. +* +* V (workspace) COMPLEX array of size (LDV,NSHFTS/2) +* +* LDV (input) integer scalar +* LDV is the leading dimension of V as declared in the +* calling procedure. LDV.GE.3. +* +* U (workspace) COMPLEX array of size +* (LDU,3*NSHFTS-3) +* +* LDU (input) integer scalar +* LDU is the leading dimension of U just as declared in the +* in the calling subroutine. LDU.GE.3*NSHFTS-3. +* +* NH (input) integer scalar +* NH is the number of columns in array WH available for +* workspace. NH.GE.1. +* +* WH (workspace) COMPLEX array of size (LDWH,NH) +* +* LDWH (input) integer scalar +* Leading dimension of WH just as declared in the +* calling procedure. LDWH.GE.3*NSHFTS-3. +* +* NV (input) integer scalar +* NV is the number of rows in WV agailable for workspace. +* NV.GE.1. +* +* WV (workspace) COMPLEX array of size +* (LDWV,3*NSHFTS-3) +* +* LDWV (input) integer scalar +* LDWV is the leading dimension of WV as declared in the +* in the calling subroutine. LDWV.GE.NV. +* +* +* ================================================================ +* Based on contributions by +* Karen Braman and Ralph Byers, Department of Mathematics, +* University of Kansas, USA +* +* ============================================================ +* Reference: +* +* K. Braman, R. Byers and R. Mathias, The Multi-Shift QR +* Algorithm Part I: Maintaining Well Focused Shifts, and +* Level 3 Performance, SIAM Journal of Matrix Analysis, +* volume 23, pages 929--947, 2002. +* +* ============================================================ +* .. Parameters .. + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0e0, 0.0e0 ), + $ ONE = ( 1.0e0, 0.0e0 ) ) + REAL RZERO, RONE + PARAMETER ( RZERO = 0.0e0, RONE = 1.0e0 ) +* .. +* .. Local Scalars .. + COMPLEX ALPHA, BETA, CDUM, REFSUM + REAL H11, H12, H21, H22, SAFMAX, SAFMIN, SCL, + $ SMLNUM, TST1, TST2, ULP + INTEGER I2, I4, INCOL, J, J2, J4, JBOT, JCOL, JLEN, + $ JROW, JTOP, K, K1, KDU, KMS, KNZ, KRCOL, KZS, + $ M, M22, MBOT, MEND, MSTART, MTOP, NBMPS, NDCOL, + $ NS, NU + LOGICAL ACCUM, BLK22, BMP22 +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. Intrinsic Functions .. +* + INTRINSIC ABS, AIMAG, CONJG, MAX, MIN, MOD, REAL +* .. +* .. Local Arrays .. + COMPLEX VT( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL CGEMM, CLACPY, CLAQR1, CLARFG, CLASET, CTRMM, + $ SLABAD +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) ) +* .. +* .. Executable Statements .. +* +* ==== If there are no shifts, then there is nothing to do. ==== +* + IF( NSHFTS.LT.2 ) + $ RETURN +* +* ==== If the active block is empty or 1-by-1, then there +* . is nothing to do. ==== +* + IF( KTOP.GE.KBOT ) + $ RETURN +* +* ==== NSHFTS is supposed to be even, but if is odd, +* . then simply reduce it by one. ==== +* + NS = NSHFTS - MOD( NSHFTS, 2 ) +* +* ==== Machine constants for deflation ==== +* + SAFMIN = SLAMCH( 'SAFE MINIMUM' ) + SAFMAX = RONE / SAFMIN + CALL SLABAD( SAFMIN, SAFMAX ) + ULP = SLAMCH( 'PRECISION' ) + SMLNUM = SAFMIN*( REAL( N ) / ULP ) +* +* ==== Use accumulated reflections to update far-from-diagonal +* . entries ? ==== +* + ACCUM = ( KACC22.EQ.1 ) .OR. ( KACC22.EQ.2 ) +* +* ==== If so, exploit the 2-by-2 block structure? ==== +* + BLK22 = ( NS.GT.2 ) .AND. ( KACC22.EQ.2 ) +* +* ==== clear trash ==== +* + IF( KTOP+2.LE.KBOT ) + $ H( KTOP+2, KTOP ) = ZERO +* +* ==== NBMPS = number of 2-shift bulges in the chain ==== +* + NBMPS = NS / 2 +* +* ==== KDU = width of slab ==== +* + KDU = 6*NBMPS - 3 +* +* ==== Create and chase chains of NBMPS bulges ==== +* + DO 210 INCOL = 3*( 1-NBMPS ) + KTOP - 1, KBOT - 2, 3*NBMPS - 2 + NDCOL = INCOL + KDU + IF( ACCUM ) + $ CALL CLASET( 'ALL', KDU, KDU, ZERO, ONE, U, LDU ) +* +* ==== Near-the-diagonal bulge chase. The following loop +* . performs the near-the-diagonal part of a small bulge +* . multi-shift QR sweep. Each 6*NBMPS-2 column diagonal +* . chunk extends from column INCOL to column NDCOL +* . (including both column INCOL and column NDCOL). The +* . following loop chases a 3*NBMPS column long chain of +* . NBMPS bulges 3*NBMPS-2 columns to the right. (INCOL +* . may be less than KTOP and and NDCOL may be greater than +* . KBOT indicating phantom columns from which to chase +* . bulges before they are actually introduced or to which +* . to chase bulges beyond column KBOT.) ==== +* + DO 140 KRCOL = INCOL, MIN( INCOL+3*NBMPS-3, KBOT-2 ) +* +* ==== Bulges number MTOP to MBOT are active double implicit +* . shift bulges. There may or may not also be small +* . 2-by-2 bulge, if there is room. The inactive bulges +* . (if any) must wait until the active bulges have moved +* . down the diagonal to make room. The phantom matrix +* . paradigm described above helps keep track. ==== +* + MTOP = MAX( 1, ( ( KTOP-1 )-KRCOL+2 ) / 3+1 ) + MBOT = MIN( NBMPS, ( KBOT-KRCOL ) / 3 ) + M22 = MBOT + 1 + BMP22 = ( MBOT.LT.NBMPS ) .AND. ( KRCOL+3*( M22-1 ) ).EQ. + $ ( KBOT-2 ) +* +* ==== Generate reflections to chase the chain right +* . one column. (The minimum value of K is KTOP-1.) ==== +* + DO 10 M = MTOP, MBOT + K = KRCOL + 3*( M-1 ) + IF( K.EQ.KTOP-1 ) THEN + CALL CLAQR1( 3, H( KTOP, KTOP ), LDH, S( 2*M-1 ), + $ S( 2*M ), V( 1, M ) ) + ALPHA = V( 1, M ) + CALL CLARFG( 3, ALPHA, V( 2, M ), 1, V( 1, M ) ) + ELSE + BETA = H( K+1, K ) + V( 2, M ) = H( K+2, K ) + V( 3, M ) = H( K+3, K ) + CALL CLARFG( 3, BETA, V( 2, M ), 1, V( 1, M ) ) +* +* ==== A Bulge may collapse because of vigilant +* . deflation or destructive underflow. (The +* . initial bulge is always collapsed.) Use +* . the two-small-subdiagonals trick to try +* . to get it started again. If V(2,M).NE.0 and +* . V(3,M) = H(K+3,K+1) = H(K+3,K+2) = 0, then +* . this bulge is collapsing into a zero +* . subdiagonal. It will be restarted next +* . trip through the loop.) +* + IF( V( 1, M ).NE.ZERO .AND. + $ ( V( 3, M ).NE.ZERO .OR. ( H( K+3, + $ K+1 ).EQ.ZERO .AND. H( K+3, K+2 ).EQ.ZERO ) ) ) + $ THEN +* +* ==== Typical case: not collapsed (yet). ==== +* + H( K+1, K ) = BETA + H( K+2, K ) = ZERO + H( K+3, K ) = ZERO + ELSE +* +* ==== Atypical case: collapsed. Attempt to +* . reintroduce ignoring H(K+1,K). If the +* . fill resulting from the new reflector +* . is too large, then abandon it. +* . Otherwise, use the new one. ==== +* + CALL CLAQR1( 3, H( K+1, K+1 ), LDH, S( 2*M-1 ), + $ S( 2*M ), VT ) + SCL = CABS1( VT( 1 ) ) + CABS1( VT( 2 ) ) + + $ CABS1( VT( 3 ) ) + IF( SCL.NE.RZERO ) THEN + VT( 1 ) = VT( 1 ) / SCL + VT( 2 ) = VT( 2 ) / SCL + VT( 3 ) = VT( 3 ) / SCL + END IF +* +* ==== The following is the traditional and +* . conservative two-small-subdiagonals +* . test. ==== +* . + IF( CABS1( H( K+1, K ) )* + $ ( CABS1( VT( 2 ) )+CABS1( VT( 3 ) ) ).GT.ULP* + $ CABS1( VT( 1 ) )*( CABS1( H( K, + $ K ) )+CABS1( H( K+1, K+1 ) )+CABS1( H( K+2, + $ K+2 ) ) ) ) THEN +* +* ==== Starting a new bulge here would +* . create non-negligible fill. If +* . the old reflector is diagonal (only +* . possible with underflows), then +* . change it to I. Otherwise, use +* . it with trepidation. ==== +* + IF( V( 2, M ).EQ.ZERO .AND. V( 3, M ).EQ.ZERO ) + $ THEN + V( 1, M ) = ZERO + ELSE + H( K+1, K ) = BETA + H( K+2, K ) = ZERO + H( K+3, K ) = ZERO + END IF + ELSE +* +* ==== Stating a new bulge here would +* . create only negligible fill. +* . Replace the old reflector with +* . the new one. ==== +* + ALPHA = VT( 1 ) + CALL CLARFG( 3, ALPHA, VT( 2 ), 1, VT( 1 ) ) + REFSUM = H( K+1, K ) + + $ H( K+2, K )*CONJG( VT( 2 ) ) + + $ H( K+3, K )*CONJG( VT( 3 ) ) + H( K+1, K ) = H( K+1, K ) - + $ CONJG( VT( 1 ) )*REFSUM + H( K+2, K ) = ZERO + H( K+3, K ) = ZERO + V( 1, M ) = VT( 1 ) + V( 2, M ) = VT( 2 ) + V( 3, M ) = VT( 3 ) + END IF + END IF + END IF + 10 CONTINUE +* +* ==== Generate a 2-by-2 reflection, if needed. ==== +* + K = KRCOL + 3*( M22-1 ) + IF( BMP22 ) THEN + IF( K.EQ.KTOP-1 ) THEN + CALL CLAQR1( 2, H( K+1, K+1 ), LDH, S( 2*M22-1 ), + $ S( 2*M22 ), V( 1, M22 ) ) + BETA = V( 1, M22 ) + CALL CLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) ) + ELSE + BETA = H( K+1, K ) + V( 2, M22 ) = H( K+2, K ) + CALL CLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) ) + H( K+1, K ) = BETA + H( K+2, K ) = ZERO + END IF + ELSE +* +* ==== Initialize V(1,M22) here to avoid possible undefined +* . variable problems later. ==== +* + V( 1, M22 ) = ZERO + END IF +* +* ==== Multiply H by reflections from the left ==== +* + IF( ACCUM ) THEN + JBOT = MIN( NDCOL, KBOT ) + ELSE IF( WANTT ) THEN + JBOT = N + ELSE + JBOT = KBOT + END IF + DO 30 J = MAX( KTOP, KRCOL ), JBOT + MEND = MIN( MBOT, ( J-KRCOL+2 ) / 3 ) + DO 20 M = MTOP, MEND + K = KRCOL + 3*( M-1 ) + REFSUM = CONJG( V( 1, M ) )* + $ ( H( K+1, J )+CONJG( V( 2, M ) )*H( K+2, J )+ + $ CONJG( V( 3, M ) )*H( K+3, J ) ) + H( K+1, J ) = H( K+1, J ) - REFSUM + H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M ) + H( K+3, J ) = H( K+3, J ) - REFSUM*V( 3, M ) + 20 CONTINUE + 30 CONTINUE + IF( BMP22 ) THEN + K = KRCOL + 3*( M22-1 ) + DO 40 J = MAX( K+1, KTOP ), JBOT + REFSUM = CONJG( V( 1, M22 ) )* + $ ( H( K+1, J )+CONJG( V( 2, M22 ) )* + $ H( K+2, J ) ) + H( K+1, J ) = H( K+1, J ) - REFSUM + H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M22 ) + 40 CONTINUE + END IF +* +* ==== Multiply H by reflections from the right. +* . Delay filling in the last row until the +* . vigilant deflation check is complete. ==== +* + IF( ACCUM ) THEN + JTOP = MAX( KTOP, INCOL ) + ELSE IF( WANTT ) THEN + JTOP = 1 + ELSE + JTOP = KTOP + END IF + DO 80 M = MTOP, MBOT + IF( V( 1, M ).NE.ZERO ) THEN + K = KRCOL + 3*( M-1 ) + DO 50 J = JTOP, MIN( KBOT, K+3 ) + REFSUM = V( 1, M )*( H( J, K+1 )+V( 2, M )* + $ H( J, K+2 )+V( 3, M )*H( J, K+3 ) ) + H( J, K+1 ) = H( J, K+1 ) - REFSUM + H( J, K+2 ) = H( J, K+2 ) - + $ REFSUM*CONJG( V( 2, M ) ) + H( J, K+3 ) = H( J, K+3 ) - + $ REFSUM*CONJG( V( 3, M ) ) + 50 CONTINUE +* + IF( ACCUM ) THEN +* +* ==== Accumulate U. (If necessary, update Z later +* . with with an efficient matrix-matrix +* . multiply.) ==== +* + KMS = K - INCOL + DO 60 J = MAX( 1, KTOP-INCOL ), KDU + REFSUM = V( 1, M )*( U( J, KMS+1 )+V( 2, M )* + $ U( J, KMS+2 )+V( 3, M )*U( J, KMS+3 ) ) + U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM + U( J, KMS+2 ) = U( J, KMS+2 ) - + $ REFSUM*CONJG( V( 2, M ) ) + U( J, KMS+3 ) = U( J, KMS+3 ) - + $ REFSUM*CONJG( V( 3, M ) ) + 60 CONTINUE + ELSE IF( WANTZ ) THEN +* +* ==== U is not accumulated, so update Z +* . now by multiplying by reflections +* . from the right. ==== +* + DO 70 J = ILOZ, IHIZ + REFSUM = V( 1, M )*( Z( J, K+1 )+V( 2, M )* + $ Z( J, K+2 )+V( 3, M )*Z( J, K+3 ) ) + Z( J, K+1 ) = Z( J, K+1 ) - REFSUM + Z( J, K+2 ) = Z( J, K+2 ) - + $ REFSUM*CONJG( V( 2, M ) ) + Z( J, K+3 ) = Z( J, K+3 ) - + $ REFSUM*CONJG( V( 3, M ) ) + 70 CONTINUE + END IF + END IF + 80 CONTINUE +* +* ==== Special case: 2-by-2 reflection (if needed) ==== +* + K = KRCOL + 3*( M22-1 ) + IF( BMP22 .AND. ( V( 1, M22 ).NE.ZERO ) ) THEN + DO 90 J = JTOP, MIN( KBOT, K+3 ) + REFSUM = V( 1, M22 )*( H( J, K+1 )+V( 2, M22 )* + $ H( J, K+2 ) ) + H( J, K+1 ) = H( J, K+1 ) - REFSUM + H( J, K+2 ) = H( J, K+2 ) - + $ REFSUM*CONJG( V( 2, M22 ) ) + 90 CONTINUE +* + IF( ACCUM ) THEN + KMS = K - INCOL + DO 100 J = MAX( 1, KTOP-INCOL ), KDU + REFSUM = V( 1, M22 )*( U( J, KMS+1 )+V( 2, M22 )* + $ U( J, KMS+2 ) ) + U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM + U( J, KMS+2 ) = U( J, KMS+2 ) - + $ REFSUM*CONJG( V( 2, M22 ) ) + 100 CONTINUE + ELSE IF( WANTZ ) THEN + DO 110 J = ILOZ, IHIZ + REFSUM = V( 1, M22 )*( Z( J, K+1 )+V( 2, M22 )* + $ Z( J, K+2 ) ) + Z( J, K+1 ) = Z( J, K+1 ) - REFSUM + Z( J, K+2 ) = Z( J, K+2 ) - + $ REFSUM*CONJG( V( 2, M22 ) ) + 110 CONTINUE + END IF + END IF +* +* ==== Vigilant deflation check ==== +* + MSTART = MTOP + IF( KRCOL+3*( MSTART-1 ).LT.KTOP ) + $ MSTART = MSTART + 1 + MEND = MBOT + IF( BMP22 ) + $ MEND = MEND + 1 + IF( KRCOL.EQ.KBOT-2 ) + $ MEND = MEND + 1 + DO 120 M = MSTART, MEND + K = MIN( KBOT-1, KRCOL+3*( M-1 ) ) +* +* ==== The following convergence test requires that +* . the tradition small-compared-to-nearby-diagonals +* . criterion and the Ahues & Tisseur (LAWN 122, 1997) +* . criteria both be satisfied. The latter improves +* . accuracy in some examples. Falling back on an +* . alternate convergence criterion when TST1 or TST2 +* . is zero (as done here) is traditional but probably +* . unnecessary. ==== +* + IF( H( K+1, K ).NE.ZERO ) THEN + TST1 = CABS1( H( K, K ) ) + CABS1( H( K+1, K+1 ) ) + IF( TST1.EQ.RZERO ) THEN + IF( K.GE.KTOP+1 ) + $ TST1 = TST1 + CABS1( H( K, K-1 ) ) + IF( K.GE.KTOP+2 ) + $ TST1 = TST1 + CABS1( H( K, K-2 ) ) + IF( K.GE.KTOP+3 ) + $ TST1 = TST1 + CABS1( H( K, K-3 ) ) + IF( K.LE.KBOT-2 ) + $ TST1 = TST1 + CABS1( H( K+2, K+1 ) ) + IF( K.LE.KBOT-3 ) + $ TST1 = TST1 + CABS1( H( K+3, K+1 ) ) + IF( K.LE.KBOT-4 ) + $ TST1 = TST1 + CABS1( H( K+4, K+1 ) ) + END IF + IF( CABS1( H( K+1, K ) ).LE.MAX( SMLNUM, ULP*TST1 ) ) + $ THEN + H12 = MAX( CABS1( H( K+1, K ) ), + $ CABS1( H( K, K+1 ) ) ) + H21 = MIN( CABS1( H( K+1, K ) ), + $ CABS1( H( K, K+1 ) ) ) + H11 = MAX( CABS1( H( K+1, K+1 ) ), + $ CABS1( H( K, K )-H( K+1, K+1 ) ) ) + H22 = MIN( CABS1( H( K+1, K+1 ) ), + $ CABS1( H( K, K )-H( K+1, K+1 ) ) ) + SCL = H11 + H12 + TST2 = H22*( H11 / SCL ) +* + IF( TST2.EQ.RZERO .OR. H21*( H12 / SCL ).LE. + $ MAX( SMLNUM, ULP*TST2 ) )H( K+1, K ) = ZERO + END IF + END IF + 120 CONTINUE +* +* ==== Fill in the last row of each bulge. ==== +* + MEND = MIN( NBMPS, ( KBOT-KRCOL-1 ) / 3 ) + DO 130 M = MTOP, MEND + K = KRCOL + 3*( M-1 ) + REFSUM = V( 1, M )*V( 3, M )*H( K+4, K+3 ) + H( K+4, K+1 ) = -REFSUM + H( K+4, K+2 ) = -REFSUM*CONJG( V( 2, M ) ) + H( K+4, K+3 ) = H( K+4, K+3 ) - REFSUM*CONJG( V( 3, M ) ) + 130 CONTINUE +* +* ==== End of near-the-diagonal bulge chase. ==== +* + 140 CONTINUE +* +* ==== Use U (if accumulated) to update far-from-diagonal +* . entries in H. If required, use U to update Z as +* . well. ==== +* + IF( ACCUM ) THEN + IF( WANTT ) THEN + JTOP = 1 + JBOT = N + ELSE + JTOP = KTOP + JBOT = KBOT + END IF + IF( ( .NOT.BLK22 ) .OR. ( INCOL.LT.KTOP ) .OR. + $ ( NDCOL.GT.KBOT ) .OR. ( NS.LE.2 ) ) THEN +* +* ==== Updates not exploiting the 2-by-2 block +* . structure of U. K1 and NU keep track of +* . the location and size of U in the special +* . cases of introducing bulges and chasing +* . bulges off the bottom. In these special +* . cases and in case the number of shifts +* . is NS = 2, there is no 2-by-2 block +* . structure to exploit. ==== +* + K1 = MAX( 1, KTOP-INCOL ) + NU = ( KDU-MAX( 0, NDCOL-KBOT ) ) - K1 + 1 +* +* ==== Horizontal Multiply ==== +* + DO 150 JCOL = MIN( NDCOL, KBOT ) + 1, JBOT, NH + JLEN = MIN( NH, JBOT-JCOL+1 ) + CALL CGEMM( 'C', 'N', NU, JLEN, NU, ONE, U( K1, K1 ), + $ LDU, H( INCOL+K1, JCOL ), LDH, ZERO, WH, + $ LDWH ) + CALL CLACPY( 'ALL', NU, JLEN, WH, LDWH, + $ H( INCOL+K1, JCOL ), LDH ) + 150 CONTINUE +* +* ==== Vertical multiply ==== +* + DO 160 JROW = JTOP, MAX( KTOP, INCOL ) - 1, NV + JLEN = MIN( NV, MAX( KTOP, INCOL )-JROW ) + CALL CGEMM( 'N', 'N', JLEN, NU, NU, ONE, + $ H( JROW, INCOL+K1 ), LDH, U( K1, K1 ), + $ LDU, ZERO, WV, LDWV ) + CALL CLACPY( 'ALL', JLEN, NU, WV, LDWV, + $ H( JROW, INCOL+K1 ), LDH ) + 160 CONTINUE +* +* ==== Z multiply (also vertical) ==== +* + IF( WANTZ ) THEN + DO 170 JROW = ILOZ, IHIZ, NV + JLEN = MIN( NV, IHIZ-JROW+1 ) + CALL CGEMM( 'N', 'N', JLEN, NU, NU, ONE, + $ Z( JROW, INCOL+K1 ), LDZ, U( K1, K1 ), + $ LDU, ZERO, WV, LDWV ) + CALL CLACPY( 'ALL', JLEN, NU, WV, LDWV, + $ Z( JROW, INCOL+K1 ), LDZ ) + 170 CONTINUE + END IF + ELSE +* +* ==== Updates exploiting U's 2-by-2 block structure. +* . (I2, I4, J2, J4 are the last rows and columns +* . of the blocks.) ==== +* + I2 = ( KDU+1 ) / 2 + I4 = KDU + J2 = I4 - I2 + J4 = KDU +* +* ==== KZS and KNZ deal with the band of zeros +* . along the diagonal of one of the triangular +* . blocks. ==== +* + KZS = ( J4-J2 ) - ( NS+1 ) + KNZ = NS + 1 +* +* ==== Horizontal multiply ==== +* + DO 180 JCOL = MIN( NDCOL, KBOT ) + 1, JBOT, NH + JLEN = MIN( NH, JBOT-JCOL+1 ) +* +* ==== Copy bottom of H to top+KZS of scratch ==== +* (The first KZS rows get multiplied by zero.) ==== +* + CALL CLACPY( 'ALL', KNZ, JLEN, H( INCOL+1+J2, JCOL ), + $ LDH, WH( KZS+1, 1 ), LDWH ) +* +* ==== Multiply by U21' ==== +* + CALL CLASET( 'ALL', KZS, JLEN, ZERO, ZERO, WH, LDWH ) + CALL CTRMM( 'L', 'U', 'C', 'N', KNZ, JLEN, ONE, + $ U( J2+1, 1+KZS ), LDU, WH( KZS+1, 1 ), + $ LDWH ) +* +* ==== Multiply top of H by U11' ==== +* + CALL CGEMM( 'C', 'N', I2, JLEN, J2, ONE, U, LDU, + $ H( INCOL+1, JCOL ), LDH, ONE, WH, LDWH ) +* +* ==== Copy top of H bottom of WH ==== +* + CALL CLACPY( 'ALL', J2, JLEN, H( INCOL+1, JCOL ), LDH, + $ WH( I2+1, 1 ), LDWH ) +* +* ==== Multiply by U21' ==== +* + CALL CTRMM( 'L', 'L', 'C', 'N', J2, JLEN, ONE, + $ U( 1, I2+1 ), LDU, WH( I2+1, 1 ), LDWH ) +* +* ==== Multiply by U22 ==== +* + CALL CGEMM( 'C', 'N', I4-I2, JLEN, J4-J2, ONE, + $ U( J2+1, I2+1 ), LDU, + $ H( INCOL+1+J2, JCOL ), LDH, ONE, + $ WH( I2+1, 1 ), LDWH ) +* +* ==== Copy it back ==== +* + CALL CLACPY( 'ALL', KDU, JLEN, WH, LDWH, + $ H( INCOL+1, JCOL ), LDH ) + 180 CONTINUE +* +* ==== Vertical multiply ==== +* + DO 190 JROW = JTOP, MAX( INCOL, KTOP ) - 1, NV + JLEN = MIN( NV, MAX( INCOL, KTOP )-JROW ) +* +* ==== Copy right of H to scratch (the first KZS +* . columns get multiplied by zero) ==== +* + CALL CLACPY( 'ALL', JLEN, KNZ, H( JROW, INCOL+1+J2 ), + $ LDH, WV( 1, 1+KZS ), LDWV ) +* +* ==== Multiply by U21 ==== +* + CALL CLASET( 'ALL', JLEN, KZS, ZERO, ZERO, WV, LDWV ) + CALL CTRMM( 'R', 'U', 'N', 'N', JLEN, KNZ, ONE, + $ U( J2+1, 1+KZS ), LDU, WV( 1, 1+KZS ), + $ LDWV ) +* +* ==== Multiply by U11 ==== +* + CALL CGEMM( 'N', 'N', JLEN, I2, J2, ONE, + $ H( JROW, INCOL+1 ), LDH, U, LDU, ONE, WV, + $ LDWV ) +* +* ==== Copy left of H to right of scratch ==== +* + CALL CLACPY( 'ALL', JLEN, J2, H( JROW, INCOL+1 ), LDH, + $ WV( 1, 1+I2 ), LDWV ) +* +* ==== Multiply by U21 ==== +* + CALL CTRMM( 'R', 'L', 'N', 'N', JLEN, I4-I2, ONE, + $ U( 1, I2+1 ), LDU, WV( 1, 1+I2 ), LDWV ) +* +* ==== Multiply by U22 ==== +* + CALL CGEMM( 'N', 'N', JLEN, I4-I2, J4-J2, ONE, + $ H( JROW, INCOL+1+J2 ), LDH, + $ U( J2+1, I2+1 ), LDU, ONE, WV( 1, 1+I2 ), + $ LDWV ) +* +* ==== Copy it back ==== +* + CALL CLACPY( 'ALL', JLEN, KDU, WV, LDWV, + $ H( JROW, INCOL+1 ), LDH ) + 190 CONTINUE +* +* ==== Multiply Z (also vertical) ==== +* + IF( WANTZ ) THEN + DO 200 JROW = ILOZ, IHIZ, NV + JLEN = MIN( NV, IHIZ-JROW+1 ) +* +* ==== Copy right of Z to left of scratch (first +* . KZS columns get multiplied by zero) ==== +* + CALL CLACPY( 'ALL', JLEN, KNZ, + $ Z( JROW, INCOL+1+J2 ), LDZ, + $ WV( 1, 1+KZS ), LDWV ) +* +* ==== Multiply by U12 ==== +* + CALL CLASET( 'ALL', JLEN, KZS, ZERO, ZERO, WV, + $ LDWV ) + CALL CTRMM( 'R', 'U', 'N', 'N', JLEN, KNZ, ONE, + $ U( J2+1, 1+KZS ), LDU, WV( 1, 1+KZS ), + $ LDWV ) +* +* ==== Multiply by U11 ==== +* + CALL CGEMM( 'N', 'N', JLEN, I2, J2, ONE, + $ Z( JROW, INCOL+1 ), LDZ, U, LDU, ONE, + $ WV, LDWV ) +* +* ==== Copy left of Z to right of scratch ==== +* + CALL CLACPY( 'ALL', JLEN, J2, Z( JROW, INCOL+1 ), + $ LDZ, WV( 1, 1+I2 ), LDWV ) +* +* ==== Multiply by U21 ==== +* + CALL CTRMM( 'R', 'L', 'N', 'N', JLEN, I4-I2, ONE, + $ U( 1, I2+1 ), LDU, WV( 1, 1+I2 ), + $ LDWV ) +* +* ==== Multiply by U22 ==== +* + CALL CGEMM( 'N', 'N', JLEN, I4-I2, J4-J2, ONE, + $ Z( JROW, INCOL+1+J2 ), LDZ, + $ U( J2+1, I2+1 ), LDU, ONE, + $ WV( 1, 1+I2 ), LDWV ) +* +* ==== Copy the result back to Z ==== +* + CALL CLACPY( 'ALL', JLEN, KDU, WV, LDWV, + $ Z( JROW, INCOL+1 ), LDZ ) + 200 CONTINUE + END IF + END IF + END IF + 210 CONTINUE +* +* ==== End of CLAQR5 ==== +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/clarf.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/clarf.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,120 @@ + SUBROUTINE CLARF( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER INCV, LDC, M, N + COMPLEX TAU +* .. +* .. Array Arguments .. + COMPLEX C( LDC, * ), V( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CLARF applies a complex elementary reflector H to a complex M-by-N +* matrix C, from either the left or the right. H is represented in the +* form +* +* H = I - tau * v * v' +* +* where tau is a complex scalar and v is a complex vector. +* +* If tau = 0, then H is taken to be the unit matrix. +* +* To apply H' (the conjugate transpose of H), supply conjg(tau) instead +* tau. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': form H * C +* = 'R': form C * H +* +* M (input) INTEGER +* The number of rows of the matrix C. +* +* N (input) INTEGER +* The number of columns of the matrix C. +* +* V (input) COMPLEX array, dimension +* (1 + (M-1)*abs(INCV)) if SIDE = 'L' +* or (1 + (N-1)*abs(INCV)) if SIDE = 'R' +* The vector v in the representation of H. V is not used if +* TAU = 0. +* +* INCV (input) INTEGER +* The increment between elements of v. INCV <> 0. +* +* TAU (input) COMPLEX +* The value tau in the representation of H. +* +* C (input/output) COMPLEX array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by the matrix H * C if SIDE = 'L', +* or C * H if SIDE = 'R'. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace) COMPLEX array, dimension +* (N) if SIDE = 'L' +* or (M) if SIDE = 'R' +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. External Subroutines .. + EXTERNAL CGEMV, CGERC +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C +* + IF( TAU.NE.ZERO ) THEN +* +* w := C' * v +* + CALL CGEMV( 'Conjugate transpose', M, N, ONE, C, LDC, V, + $ INCV, ZERO, WORK, 1 ) +* +* C := C - v * w' +* + CALL CGERC( M, N, -TAU, V, INCV, WORK, 1, C, LDC ) + END IF + ELSE +* +* Form C * H +* + IF( TAU.NE.ZERO ) THEN +* +* w := C * v +* + CALL CGEMV( 'No transpose', M, N, ONE, C, LDC, V, INCV, + $ ZERO, WORK, 1 ) +* +* C := C - w * v' +* + CALL CGERC( M, N, -TAU, WORK, 1, V, INCV, C, LDC ) + END IF + END IF + RETURN +* +* End of CLARF +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/clarfb.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/clarfb.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,608 @@ + SUBROUTINE CLARFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, LDV, + $ T, LDT, C, LDC, WORK, LDWORK ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, SIDE, STOREV, TRANS + INTEGER K, LDC, LDT, LDV, LDWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX C( LDC, * ), T( LDT, * ), V( LDV, * ), + $ WORK( LDWORK, * ) +* .. +* +* Purpose +* ======= +* +* CLARFB applies a complex block reflector H or its transpose H' to a +* complex M-by-N matrix C, from either the left or the right. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply H or H' from the Left +* = 'R': apply H or H' from the Right +* +* TRANS (input) CHARACTER*1 +* = 'N': apply H (No transpose) +* = 'C': apply H' (Conjugate transpose) +* +* DIRECT (input) CHARACTER*1 +* Indicates how H is formed from a product of elementary +* reflectors +* = 'F': H = H(1) H(2) . . . H(k) (Forward) +* = 'B': H = H(k) . . . H(2) H(1) (Backward) +* +* STOREV (input) CHARACTER*1 +* Indicates how the vectors which define the elementary +* reflectors are stored: +* = 'C': Columnwise +* = 'R': Rowwise +* +* M (input) INTEGER +* The number of rows of the matrix C. +* +* N (input) INTEGER +* The number of columns of the matrix C. +* +* K (input) INTEGER +* The order of the matrix T (= the number of elementary +* reflectors whose product defines the block reflector). +* +* V (input) COMPLEX array, dimension +* (LDV,K) if STOREV = 'C' +* (LDV,M) if STOREV = 'R' and SIDE = 'L' +* (LDV,N) if STOREV = 'R' and SIDE = 'R' +* The matrix V. See further details. +* +* LDV (input) INTEGER +* The leading dimension of the array V. +* If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M); +* if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N); +* if STOREV = 'R', LDV >= K. +* +* T (input) COMPLEX array, dimension (LDT,K) +* The triangular K-by-K matrix T in the representation of the +* block reflector. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= K. +* +* C (input/output) COMPLEX array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by H*C or H'*C or C*H or C*H'. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace) COMPLEX array, dimension (LDWORK,K) +* +* LDWORK (input) INTEGER +* The leading dimension of the array WORK. +* If SIDE = 'L', LDWORK >= max(1,N); +* if SIDE = 'R', LDWORK >= max(1,M). +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + CHARACTER TRANST + INTEGER I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CGEMM, CLACGV, CTRMM +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) + $ RETURN +* + IF( LSAME( TRANS, 'N' ) ) THEN + TRANST = 'C' + ELSE + TRANST = 'N' + END IF +* + IF( LSAME( STOREV, 'C' ) ) THEN +* + IF( LSAME( DIRECT, 'F' ) ) THEN +* +* Let V = ( V1 ) (first K rows) +* ( V2 ) +* where V1 is unit lower triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H' * C where C = ( C1 ) +* ( C2 ) +* +* W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK) +* +* W := C1' +* + DO 10 J = 1, K + CALL CCOPY( N, C( J, 1 ), LDC, WORK( 1, J ), 1 ) + CALL CLACGV( N, WORK( 1, J ), 1 ) + 10 CONTINUE +* +* W := W * V1 +* + CALL CTRMM( 'Right', 'Lower', 'No transpose', 'Unit', N, + $ K, ONE, V, LDV, WORK, LDWORK ) + IF( M.GT.K ) THEN +* +* W := W + C2'*V2 +* + CALL CGEMM( 'Conjugate transpose', 'No transpose', N, + $ K, M-K, ONE, C( K+1, 1 ), LDC, + $ V( K+1, 1 ), LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T' or W * T +* + CALL CTRMM( 'Right', 'Upper', TRANST, 'Non-unit', N, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V * W' +* + IF( M.GT.K ) THEN +* +* C2 := C2 - V2 * W' +* + CALL CGEMM( 'No transpose', 'Conjugate transpose', + $ M-K, N, K, -ONE, V( K+1, 1 ), LDV, WORK, + $ LDWORK, ONE, C( K+1, 1 ), LDC ) + END IF +* +* W := W * V1' +* + CALL CTRMM( 'Right', 'Lower', 'Conjugate transpose', + $ 'Unit', N, K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W' +* + DO 30 J = 1, K + DO 20 I = 1, N + C( J, I ) = C( J, I ) - CONJG( WORK( I, J ) ) + 20 CONTINUE + 30 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H' where C = ( C1 C2 ) +* +* W := C * V = (C1*V1 + C2*V2) (stored in WORK) +* +* W := C1 +* + DO 40 J = 1, K + CALL CCOPY( M, C( 1, J ), 1, WORK( 1, J ), 1 ) + 40 CONTINUE +* +* W := W * V1 +* + CALL CTRMM( 'Right', 'Lower', 'No transpose', 'Unit', M, + $ K, ONE, V, LDV, WORK, LDWORK ) + IF( N.GT.K ) THEN +* +* W := W + C2 * V2 +* + CALL CGEMM( 'No transpose', 'No transpose', M, K, N-K, + $ ONE, C( 1, K+1 ), LDC, V( K+1, 1 ), LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T' +* + CALL CTRMM( 'Right', 'Upper', TRANS, 'Non-unit', M, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V' +* + IF( N.GT.K ) THEN +* +* C2 := C2 - W * V2' +* + CALL CGEMM( 'No transpose', 'Conjugate transpose', M, + $ N-K, K, -ONE, WORK, LDWORK, V( K+1, 1 ), + $ LDV, ONE, C( 1, K+1 ), LDC ) + END IF +* +* W := W * V1' +* + CALL CTRMM( 'Right', 'Lower', 'Conjugate transpose', + $ 'Unit', M, K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W +* + DO 60 J = 1, K + DO 50 I = 1, M + C( I, J ) = C( I, J ) - WORK( I, J ) + 50 CONTINUE + 60 CONTINUE + END IF +* + ELSE +* +* Let V = ( V1 ) +* ( V2 ) (last K rows) +* where V2 is unit upper triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H' * C where C = ( C1 ) +* ( C2 ) +* +* W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK) +* +* W := C2' +* + DO 70 J = 1, K + CALL CCOPY( N, C( M-K+J, 1 ), LDC, WORK( 1, J ), 1 ) + CALL CLACGV( N, WORK( 1, J ), 1 ) + 70 CONTINUE +* +* W := W * V2 +* + CALL CTRMM( 'Right', 'Upper', 'No transpose', 'Unit', N, + $ K, ONE, V( M-K+1, 1 ), LDV, WORK, LDWORK ) + IF( M.GT.K ) THEN +* +* W := W + C1'*V1 +* + CALL CGEMM( 'Conjugate transpose', 'No transpose', N, + $ K, M-K, ONE, C, LDC, V, LDV, ONE, WORK, + $ LDWORK ) + END IF +* +* W := W * T' or W * T +* + CALL CTRMM( 'Right', 'Lower', TRANST, 'Non-unit', N, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V * W' +* + IF( M.GT.K ) THEN +* +* C1 := C1 - V1 * W' +* + CALL CGEMM( 'No transpose', 'Conjugate transpose', + $ M-K, N, K, -ONE, V, LDV, WORK, LDWORK, + $ ONE, C, LDC ) + END IF +* +* W := W * V2' +* + CALL CTRMM( 'Right', 'Upper', 'Conjugate transpose', + $ 'Unit', N, K, ONE, V( M-K+1, 1 ), LDV, WORK, + $ LDWORK ) +* +* C2 := C2 - W' +* + DO 90 J = 1, K + DO 80 I = 1, N + C( M-K+J, I ) = C( M-K+J, I ) - + $ CONJG( WORK( I, J ) ) + 80 CONTINUE + 90 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H' where C = ( C1 C2 ) +* +* W := C * V = (C1*V1 + C2*V2) (stored in WORK) +* +* W := C2 +* + DO 100 J = 1, K + CALL CCOPY( M, C( 1, N-K+J ), 1, WORK( 1, J ), 1 ) + 100 CONTINUE +* +* W := W * V2 +* + CALL CTRMM( 'Right', 'Upper', 'No transpose', 'Unit', M, + $ K, ONE, V( N-K+1, 1 ), LDV, WORK, LDWORK ) + IF( N.GT.K ) THEN +* +* W := W + C1 * V1 +* + CALL CGEMM( 'No transpose', 'No transpose', M, K, N-K, + $ ONE, C, LDC, V, LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T' +* + CALL CTRMM( 'Right', 'Lower', TRANS, 'Non-unit', M, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V' +* + IF( N.GT.K ) THEN +* +* C1 := C1 - W * V1' +* + CALL CGEMM( 'No transpose', 'Conjugate transpose', M, + $ N-K, K, -ONE, WORK, LDWORK, V, LDV, ONE, + $ C, LDC ) + END IF +* +* W := W * V2' +* + CALL CTRMM( 'Right', 'Upper', 'Conjugate transpose', + $ 'Unit', M, K, ONE, V( N-K+1, 1 ), LDV, WORK, + $ LDWORK ) +* +* C2 := C2 - W +* + DO 120 J = 1, K + DO 110 I = 1, M + C( I, N-K+J ) = C( I, N-K+J ) - WORK( I, J ) + 110 CONTINUE + 120 CONTINUE + END IF + END IF +* + ELSE IF( LSAME( STOREV, 'R' ) ) THEN +* + IF( LSAME( DIRECT, 'F' ) ) THEN +* +* Let V = ( V1 V2 ) (V1: first K columns) +* where V1 is unit upper triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H' * C where C = ( C1 ) +* ( C2 ) +* +* W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK) +* +* W := C1' +* + DO 130 J = 1, K + CALL CCOPY( N, C( J, 1 ), LDC, WORK( 1, J ), 1 ) + CALL CLACGV( N, WORK( 1, J ), 1 ) + 130 CONTINUE +* +* W := W * V1' +* + CALL CTRMM( 'Right', 'Upper', 'Conjugate transpose', + $ 'Unit', N, K, ONE, V, LDV, WORK, LDWORK ) + IF( M.GT.K ) THEN +* +* W := W + C2'*V2' +* + CALL CGEMM( 'Conjugate transpose', + $ 'Conjugate transpose', N, K, M-K, ONE, + $ C( K+1, 1 ), LDC, V( 1, K+1 ), LDV, ONE, + $ WORK, LDWORK ) + END IF +* +* W := W * T' or W * T +* + CALL CTRMM( 'Right', 'Upper', TRANST, 'Non-unit', N, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V' * W' +* + IF( M.GT.K ) THEN +* +* C2 := C2 - V2' * W' +* + CALL CGEMM( 'Conjugate transpose', + $ 'Conjugate transpose', M-K, N, K, -ONE, + $ V( 1, K+1 ), LDV, WORK, LDWORK, ONE, + $ C( K+1, 1 ), LDC ) + END IF +* +* W := W * V1 +* + CALL CTRMM( 'Right', 'Upper', 'No transpose', 'Unit', N, + $ K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W' +* + DO 150 J = 1, K + DO 140 I = 1, N + C( J, I ) = C( J, I ) - CONJG( WORK( I, J ) ) + 140 CONTINUE + 150 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H' where C = ( C1 C2 ) +* +* W := C * V' = (C1*V1' + C2*V2') (stored in WORK) +* +* W := C1 +* + DO 160 J = 1, K + CALL CCOPY( M, C( 1, J ), 1, WORK( 1, J ), 1 ) + 160 CONTINUE +* +* W := W * V1' +* + CALL CTRMM( 'Right', 'Upper', 'Conjugate transpose', + $ 'Unit', M, K, ONE, V, LDV, WORK, LDWORK ) + IF( N.GT.K ) THEN +* +* W := W + C2 * V2' +* + CALL CGEMM( 'No transpose', 'Conjugate transpose', M, + $ K, N-K, ONE, C( 1, K+1 ), LDC, + $ V( 1, K+1 ), LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T' +* + CALL CTRMM( 'Right', 'Upper', TRANS, 'Non-unit', M, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V +* + IF( N.GT.K ) THEN +* +* C2 := C2 - W * V2 +* + CALL CGEMM( 'No transpose', 'No transpose', M, N-K, K, + $ -ONE, WORK, LDWORK, V( 1, K+1 ), LDV, ONE, + $ C( 1, K+1 ), LDC ) + END IF +* +* W := W * V1 +* + CALL CTRMM( 'Right', 'Upper', 'No transpose', 'Unit', M, + $ K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W +* + DO 180 J = 1, K + DO 170 I = 1, M + C( I, J ) = C( I, J ) - WORK( I, J ) + 170 CONTINUE + 180 CONTINUE +* + END IF +* + ELSE +* +* Let V = ( V1 V2 ) (V2: last K columns) +* where V2 is unit lower triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H' * C where C = ( C1 ) +* ( C2 ) +* +* W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK) +* +* W := C2' +* + DO 190 J = 1, K + CALL CCOPY( N, C( M-K+J, 1 ), LDC, WORK( 1, J ), 1 ) + CALL CLACGV( N, WORK( 1, J ), 1 ) + 190 CONTINUE +* +* W := W * V2' +* + CALL CTRMM( 'Right', 'Lower', 'Conjugate transpose', + $ 'Unit', N, K, ONE, V( 1, M-K+1 ), LDV, WORK, + $ LDWORK ) + IF( M.GT.K ) THEN +* +* W := W + C1'*V1' +* + CALL CGEMM( 'Conjugate transpose', + $ 'Conjugate transpose', N, K, M-K, ONE, C, + $ LDC, V, LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T' or W * T +* + CALL CTRMM( 'Right', 'Lower', TRANST, 'Non-unit', N, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V' * W' +* + IF( M.GT.K ) THEN +* +* C1 := C1 - V1' * W' +* + CALL CGEMM( 'Conjugate transpose', + $ 'Conjugate transpose', M-K, N, K, -ONE, V, + $ LDV, WORK, LDWORK, ONE, C, LDC ) + END IF +* +* W := W * V2 +* + CALL CTRMM( 'Right', 'Lower', 'No transpose', 'Unit', N, + $ K, ONE, V( 1, M-K+1 ), LDV, WORK, LDWORK ) +* +* C2 := C2 - W' +* + DO 210 J = 1, K + DO 200 I = 1, N + C( M-K+J, I ) = C( M-K+J, I ) - + $ CONJG( WORK( I, J ) ) + 200 CONTINUE + 210 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H' where C = ( C1 C2 ) +* +* W := C * V' = (C1*V1' + C2*V2') (stored in WORK) +* +* W := C2 +* + DO 220 J = 1, K + CALL CCOPY( M, C( 1, N-K+J ), 1, WORK( 1, J ), 1 ) + 220 CONTINUE +* +* W := W * V2' +* + CALL CTRMM( 'Right', 'Lower', 'Conjugate transpose', + $ 'Unit', M, K, ONE, V( 1, N-K+1 ), LDV, WORK, + $ LDWORK ) + IF( N.GT.K ) THEN +* +* W := W + C1 * V1' +* + CALL CGEMM( 'No transpose', 'Conjugate transpose', M, + $ K, N-K, ONE, C, LDC, V, LDV, ONE, WORK, + $ LDWORK ) + END IF +* +* W := W * T or W * T' +* + CALL CTRMM( 'Right', 'Lower', TRANS, 'Non-unit', M, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V +* + IF( N.GT.K ) THEN +* +* C1 := C1 - W * V1 +* + CALL CGEMM( 'No transpose', 'No transpose', M, N-K, K, + $ -ONE, WORK, LDWORK, V, LDV, ONE, C, LDC ) + END IF +* +* W := W * V2 +* + CALL CTRMM( 'Right', 'Lower', 'No transpose', 'Unit', M, + $ K, ONE, V( 1, N-K+1 ), LDV, WORK, LDWORK ) +* +* C1 := C1 - W +* + DO 240 J = 1, K + DO 230 I = 1, M + C( I, N-K+J ) = C( I, N-K+J ) - WORK( I, J ) + 230 CONTINUE + 240 CONTINUE +* + END IF +* + END IF + END IF +* + RETURN +* +* End of CLARFB +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/clarfg.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/clarfg.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,145 @@ + SUBROUTINE CLARFG( N, ALPHA, X, INCX, TAU ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INCX, N + COMPLEX ALPHA, TAU +* .. +* .. Array Arguments .. + COMPLEX X( * ) +* .. +* +* Purpose +* ======= +* +* CLARFG generates a complex elementary reflector H of order n, such +* that +* +* H' * ( alpha ) = ( beta ), H' * H = I. +* ( x ) ( 0 ) +* +* where alpha and beta are scalars, with beta real, and x is an +* (n-1)-element complex vector. H is represented in the form +* +* H = I - tau * ( 1 ) * ( 1 v' ) , +* ( v ) +* +* where tau is a complex scalar and v is a complex (n-1)-element +* vector. Note that H is not hermitian. +* +* If the elements of x are all zero and alpha is real, then tau = 0 +* and H is taken to be the unit matrix. +* +* Otherwise 1 <= real(tau) <= 2 and abs(tau-1) <= 1 . +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the elementary reflector. +* +* ALPHA (input/output) COMPLEX +* On entry, the value alpha. +* On exit, it is overwritten with the value beta. +* +* X (input/output) COMPLEX array, dimension +* (1+(N-2)*abs(INCX)) +* On entry, the vector x. +* On exit, it is overwritten with the vector v. +* +* INCX (input) INTEGER +* The increment between elements of X. INCX > 0. +* +* TAU (output) COMPLEX +* The value tau. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER J, KNT + REAL ALPHI, ALPHR, BETA, RSAFMN, SAFMIN, XNORM +* .. +* .. External Functions .. + REAL SCNRM2, SLAMCH, SLAPY3 + COMPLEX CLADIV + EXTERNAL SCNRM2, SLAMCH, SLAPY3, CLADIV +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, REAL, SIGN +* .. +* .. External Subroutines .. + EXTERNAL CSCAL, CSSCAL +* .. +* .. Executable Statements .. +* + IF( N.LE.0 ) THEN + TAU = ZERO + RETURN + END IF +* + XNORM = SCNRM2( N-1, X, INCX ) + ALPHR = REAL( ALPHA ) + ALPHI = AIMAG( ALPHA ) +* + IF( XNORM.EQ.ZERO .AND. ALPHI.EQ.ZERO ) THEN +* +* H = I +* + TAU = ZERO + ELSE +* +* general case +* + BETA = -SIGN( SLAPY3( ALPHR, ALPHI, XNORM ), ALPHR ) + SAFMIN = SLAMCH( 'S' ) / SLAMCH( 'E' ) + RSAFMN = ONE / SAFMIN +* + IF( ABS( BETA ).LT.SAFMIN ) THEN +* +* XNORM, BETA may be inaccurate; scale X and recompute them +* + KNT = 0 + 10 CONTINUE + KNT = KNT + 1 + CALL CSSCAL( N-1, RSAFMN, X, INCX ) + BETA = BETA*RSAFMN + ALPHI = ALPHI*RSAFMN + ALPHR = ALPHR*RSAFMN + IF( ABS( BETA ).LT.SAFMIN ) + $ GO TO 10 +* +* New BETA is at most 1, at least SAFMIN +* + XNORM = SCNRM2( N-1, X, INCX ) + ALPHA = CMPLX( ALPHR, ALPHI ) + BETA = -SIGN( SLAPY3( ALPHR, ALPHI, XNORM ), ALPHR ) + TAU = CMPLX( ( BETA-ALPHR ) / BETA, -ALPHI / BETA ) + ALPHA = CLADIV( CMPLX( ONE ), ALPHA-BETA ) + CALL CSCAL( N-1, ALPHA, X, INCX ) +* +* If ALPHA is subnormal, it may lose relative accuracy +* + ALPHA = BETA + DO 20 J = 1, KNT + ALPHA = ALPHA*SAFMIN + 20 CONTINUE + ELSE + TAU = CMPLX( ( BETA-ALPHR ) / BETA, -ALPHI / BETA ) + ALPHA = CLADIV( CMPLX( ONE ), ALPHA-BETA ) + CALL CSCAL( N-1, ALPHA, X, INCX ) + ALPHA = BETA + END IF + END IF +* + RETURN +* +* End of CLARFG +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/clarft.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/clarft.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,224 @@ + SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, STOREV + INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. + COMPLEX T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* Purpose +* ======= +* +* CLARFT forms the triangular factor T of a complex block reflector H +* of order n, which is defined as a product of k elementary reflectors. +* +* If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; +* +* If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. +* +* If STOREV = 'C', the vector which defines the elementary reflector +* H(i) is stored in the i-th column of the array V, and +* +* H = I - V * T * V' +* +* If STOREV = 'R', the vector which defines the elementary reflector +* H(i) is stored in the i-th row of the array V, and +* +* H = I - V' * T * V +* +* Arguments +* ========= +* +* DIRECT (input) CHARACTER*1 +* Specifies the order in which the elementary reflectors are +* multiplied to form the block reflector: +* = 'F': H = H(1) H(2) . . . H(k) (Forward) +* = 'B': H = H(k) . . . H(2) H(1) (Backward) +* +* STOREV (input) CHARACTER*1 +* Specifies how the vectors which define the elementary +* reflectors are stored (see also Further Details): +* = 'C': columnwise +* = 'R': rowwise +* +* N (input) INTEGER +* The order of the block reflector H. N >= 0. +* +* K (input) INTEGER +* The order of the triangular factor T (= the number of +* elementary reflectors). K >= 1. +* +* V (input/output) COMPLEX array, dimension +* (LDV,K) if STOREV = 'C' +* (LDV,N) if STOREV = 'R' +* The matrix V. See further details. +* +* LDV (input) INTEGER +* The leading dimension of the array V. +* If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. +* +* TAU (input) COMPLEX array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i). +* +* T (output) COMPLEX array, dimension (LDT,K) +* The k by k triangular factor T of the block reflector. +* If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is +* lower triangular. The rest of the array is not used. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= K. +* +* Further Details +* =============== +* +* The shape of the matrix V and the storage of the vectors which define +* the H(i) is best illustrated by the following example with n = 5 and +* k = 3. The elements equal to 1 are not stored; the corresponding +* array elements are modified but restored on exit. The rest of the +* array is not used. +* +* DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': +* +* V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) +* ( v1 1 ) ( 1 v2 v2 v2 ) +* ( v1 v2 1 ) ( 1 v3 v3 ) +* ( v1 v2 v3 ) +* ( v1 v2 v3 ) +* +* DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': +* +* V = ( v1 v2 v3 ) V = ( v1 v1 1 ) +* ( v1 v2 v3 ) ( v2 v2 v2 1 ) +* ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) +* ( 1 v3 ) +* ( 1 ) +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, J + COMPLEX VII +* .. +* .. External Subroutines .. + EXTERNAL CGEMV, CLACGV, CTRMV +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 20 I = 1, K + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO 10 J = 1, I + T( J, I ) = ZERO + 10 CONTINUE + ELSE +* +* general case +* + VII = V( I, I ) + V( I, I ) = ONE + IF( LSAME( STOREV, 'C' ) ) THEN +* +* T(1:i-1,i) := - tau(i) * V(i:n,1:i-1)' * V(i:n,i) +* + CALL CGEMV( 'Conjugate transpose', N-I+1, I-1, + $ -TAU( I ), V( I, 1 ), LDV, V( I, I ), 1, + $ ZERO, T( 1, I ), 1 ) + ELSE +* +* T(1:i-1,i) := - tau(i) * V(1:i-1,i:n) * V(i,i:n)' +* + IF( I.LT.N ) + $ CALL CLACGV( N-I, V( I, I+1 ), LDV ) + CALL CGEMV( 'No transpose', I-1, N-I+1, -TAU( I ), + $ V( 1, I ), LDV, V( I, I ), LDV, ZERO, + $ T( 1, I ), 1 ) + IF( I.LT.N ) + $ CALL CLACGV( N-I, V( I, I+1 ), LDV ) + END IF + V( I, I ) = VII +* +* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) +* + CALL CTRMV( 'Upper', 'No transpose', 'Non-unit', I-1, T, + $ LDT, T( 1, I ), 1 ) + T( I, I ) = TAU( I ) + END IF + 20 CONTINUE + ELSE + DO 40 I = K, 1, -1 + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO 30 J = I, K + T( J, I ) = ZERO + 30 CONTINUE + ELSE +* +* general case +* + IF( I.LT.K ) THEN + IF( LSAME( STOREV, 'C' ) ) THEN + VII = V( N-K+I, I ) + V( N-K+I, I ) = ONE +* +* T(i+1:k,i) := +* - tau(i) * V(1:n-k+i,i+1:k)' * V(1:n-k+i,i) +* + CALL CGEMV( 'Conjugate transpose', N-K+I, K-I, + $ -TAU( I ), V( 1, I+1 ), LDV, V( 1, I ), + $ 1, ZERO, T( I+1, I ), 1 ) + V( N-K+I, I ) = VII + ELSE + VII = V( I, N-K+I ) + V( I, N-K+I ) = ONE +* +* T(i+1:k,i) := +* - tau(i) * V(i+1:k,1:n-k+i) * V(i,1:n-k+i)' +* + CALL CLACGV( N-K+I-1, V( I, 1 ), LDV ) + CALL CGEMV( 'No transpose', K-I, N-K+I, -TAU( I ), + $ V( I+1, 1 ), LDV, V( I, 1 ), LDV, ZERO, + $ T( I+1, I ), 1 ) + CALL CLACGV( N-K+I-1, V( I, 1 ), LDV ) + V( I, N-K+I ) = VII + END IF +* +* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) +* + CALL CTRMV( 'Lower', 'No transpose', 'Non-unit', K-I, + $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 ) + END IF + T( I, I ) = TAU( I ) + END IF + 40 CONTINUE + END IF + RETURN +* +* End of CLARFT +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/clarfx.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/clarfx.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,640 @@ + SUBROUTINE CLARFX( SIDE, M, N, V, TAU, C, LDC, WORK ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER LDC, M, N + COMPLEX TAU +* .. +* .. Array Arguments .. + COMPLEX C( LDC, * ), V( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CLARFX applies a complex elementary reflector H to a complex m by n +* matrix C, from either the left or the right. H is represented in the +* form +* +* H = I - tau * v * v' +* +* where tau is a complex scalar and v is a complex vector. +* +* If tau = 0, then H is taken to be the unit matrix +* +* This version uses inline code if H has order < 11. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': form H * C +* = 'R': form C * H +* +* M (input) INTEGER +* The number of rows of the matrix C. +* +* N (input) INTEGER +* The number of columns of the matrix C. +* +* V (input) COMPLEX array, dimension (M) if SIDE = 'L' +* or (N) if SIDE = 'R' +* The vector v in the representation of H. +* +* TAU (input) COMPLEX +* The value tau in the representation of H. +* +* C (input/output) COMPLEX array, dimension (LDC,N) +* On entry, the m by n matrix C. +* On exit, C is overwritten by the matrix H * C if SIDE = 'L', +* or C * H if SIDE = 'R'. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDA >= max(1,M). +* +* WORK (workspace) COMPLEX array, dimension (N) if SIDE = 'L' +* or (M) if SIDE = 'R' +* WORK is not referenced if H has order < 11. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), + $ ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER J + COMPLEX SUM, T1, T10, T2, T3, T4, T5, T6, T7, T8, T9, + $ V1, V10, V2, V3, V4, V5, V6, V7, V8, V9 +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CGEMV, CGERC +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG +* .. +* .. Executable Statements .. +* + IF( TAU.EQ.ZERO ) + $ RETURN + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C, where H has order m. +* + GO TO ( 10, 30, 50, 70, 90, 110, 130, 150, + $ 170, 190 )M +* +* Code for general M +* +* w := C'*v +* + CALL CGEMV( 'Conjugate transpose', M, N, ONE, C, LDC, V, 1, + $ ZERO, WORK, 1 ) +* +* C := C - tau * v * w' +* + CALL CGERC( M, N, -TAU, V, 1, WORK, 1, C, LDC ) + GO TO 410 + 10 CONTINUE +* +* Special code for 1 x 1 Householder +* + T1 = ONE - TAU*V( 1 )*CONJG( V( 1 ) ) + DO 20 J = 1, N + C( 1, J ) = T1*C( 1, J ) + 20 CONTINUE + GO TO 410 + 30 CONTINUE +* +* Special code for 2 x 2 Householder +* + V1 = CONJG( V( 1 ) ) + T1 = TAU*CONJG( V1 ) + V2 = CONJG( V( 2 ) ) + T2 = TAU*CONJG( V2 ) + DO 40 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + 40 CONTINUE + GO TO 410 + 50 CONTINUE +* +* Special code for 3 x 3 Householder +* + V1 = CONJG( V( 1 ) ) + T1 = TAU*CONJG( V1 ) + V2 = CONJG( V( 2 ) ) + T2 = TAU*CONJG( V2 ) + V3 = CONJG( V( 3 ) ) + T3 = TAU*CONJG( V3 ) + DO 60 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + 60 CONTINUE + GO TO 410 + 70 CONTINUE +* +* Special code for 4 x 4 Householder +* + V1 = CONJG( V( 1 ) ) + T1 = TAU*CONJG( V1 ) + V2 = CONJG( V( 2 ) ) + T2 = TAU*CONJG( V2 ) + V3 = CONJG( V( 3 ) ) + T3 = TAU*CONJG( V3 ) + V4 = CONJG( V( 4 ) ) + T4 = TAU*CONJG( V4 ) + DO 80 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + 80 CONTINUE + GO TO 410 + 90 CONTINUE +* +* Special code for 5 x 5 Householder +* + V1 = CONJG( V( 1 ) ) + T1 = TAU*CONJG( V1 ) + V2 = CONJG( V( 2 ) ) + T2 = TAU*CONJG( V2 ) + V3 = CONJG( V( 3 ) ) + T3 = TAU*CONJG( V3 ) + V4 = CONJG( V( 4 ) ) + T4 = TAU*CONJG( V4 ) + V5 = CONJG( V( 5 ) ) + T5 = TAU*CONJG( V5 ) + DO 100 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + 100 CONTINUE + GO TO 410 + 110 CONTINUE +* +* Special code for 6 x 6 Householder +* + V1 = CONJG( V( 1 ) ) + T1 = TAU*CONJG( V1 ) + V2 = CONJG( V( 2 ) ) + T2 = TAU*CONJG( V2 ) + V3 = CONJG( V( 3 ) ) + T3 = TAU*CONJG( V3 ) + V4 = CONJG( V( 4 ) ) + T4 = TAU*CONJG( V4 ) + V5 = CONJG( V( 5 ) ) + T5 = TAU*CONJG( V5 ) + V6 = CONJG( V( 6 ) ) + T6 = TAU*CONJG( V6 ) + DO 120 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + C( 6, J ) = C( 6, J ) - SUM*T6 + 120 CONTINUE + GO TO 410 + 130 CONTINUE +* +* Special code for 7 x 7 Householder +* + V1 = CONJG( V( 1 ) ) + T1 = TAU*CONJG( V1 ) + V2 = CONJG( V( 2 ) ) + T2 = TAU*CONJG( V2 ) + V3 = CONJG( V( 3 ) ) + T3 = TAU*CONJG( V3 ) + V4 = CONJG( V( 4 ) ) + T4 = TAU*CONJG( V4 ) + V5 = CONJG( V( 5 ) ) + T5 = TAU*CONJG( V5 ) + V6 = CONJG( V( 6 ) ) + T6 = TAU*CONJG( V6 ) + V7 = CONJG( V( 7 ) ) + T7 = TAU*CONJG( V7 ) + DO 140 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) + + $ V7*C( 7, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + C( 6, J ) = C( 6, J ) - SUM*T6 + C( 7, J ) = C( 7, J ) - SUM*T7 + 140 CONTINUE + GO TO 410 + 150 CONTINUE +* +* Special code for 8 x 8 Householder +* + V1 = CONJG( V( 1 ) ) + T1 = TAU*CONJG( V1 ) + V2 = CONJG( V( 2 ) ) + T2 = TAU*CONJG( V2 ) + V3 = CONJG( V( 3 ) ) + T3 = TAU*CONJG( V3 ) + V4 = CONJG( V( 4 ) ) + T4 = TAU*CONJG( V4 ) + V5 = CONJG( V( 5 ) ) + T5 = TAU*CONJG( V5 ) + V6 = CONJG( V( 6 ) ) + T6 = TAU*CONJG( V6 ) + V7 = CONJG( V( 7 ) ) + T7 = TAU*CONJG( V7 ) + V8 = CONJG( V( 8 ) ) + T8 = TAU*CONJG( V8 ) + DO 160 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) + + $ V7*C( 7, J ) + V8*C( 8, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + C( 6, J ) = C( 6, J ) - SUM*T6 + C( 7, J ) = C( 7, J ) - SUM*T7 + C( 8, J ) = C( 8, J ) - SUM*T8 + 160 CONTINUE + GO TO 410 + 170 CONTINUE +* +* Special code for 9 x 9 Householder +* + V1 = CONJG( V( 1 ) ) + T1 = TAU*CONJG( V1 ) + V2 = CONJG( V( 2 ) ) + T2 = TAU*CONJG( V2 ) + V3 = CONJG( V( 3 ) ) + T3 = TAU*CONJG( V3 ) + V4 = CONJG( V( 4 ) ) + T4 = TAU*CONJG( V4 ) + V5 = CONJG( V( 5 ) ) + T5 = TAU*CONJG( V5 ) + V6 = CONJG( V( 6 ) ) + T6 = TAU*CONJG( V6 ) + V7 = CONJG( V( 7 ) ) + T7 = TAU*CONJG( V7 ) + V8 = CONJG( V( 8 ) ) + T8 = TAU*CONJG( V8 ) + V9 = CONJG( V( 9 ) ) + T9 = TAU*CONJG( V9 ) + DO 180 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) + + $ V7*C( 7, J ) + V8*C( 8, J ) + V9*C( 9, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + C( 6, J ) = C( 6, J ) - SUM*T6 + C( 7, J ) = C( 7, J ) - SUM*T7 + C( 8, J ) = C( 8, J ) - SUM*T8 + C( 9, J ) = C( 9, J ) - SUM*T9 + 180 CONTINUE + GO TO 410 + 190 CONTINUE +* +* Special code for 10 x 10 Householder +* + V1 = CONJG( V( 1 ) ) + T1 = TAU*CONJG( V1 ) + V2 = CONJG( V( 2 ) ) + T2 = TAU*CONJG( V2 ) + V3 = CONJG( V( 3 ) ) + T3 = TAU*CONJG( V3 ) + V4 = CONJG( V( 4 ) ) + T4 = TAU*CONJG( V4 ) + V5 = CONJG( V( 5 ) ) + T5 = TAU*CONJG( V5 ) + V6 = CONJG( V( 6 ) ) + T6 = TAU*CONJG( V6 ) + V7 = CONJG( V( 7 ) ) + T7 = TAU*CONJG( V7 ) + V8 = CONJG( V( 8 ) ) + T8 = TAU*CONJG( V8 ) + V9 = CONJG( V( 9 ) ) + T9 = TAU*CONJG( V9 ) + V10 = CONJG( V( 10 ) ) + T10 = TAU*CONJG( V10 ) + DO 200 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) + + $ V7*C( 7, J ) + V8*C( 8, J ) + V9*C( 9, J ) + + $ V10*C( 10, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + C( 6, J ) = C( 6, J ) - SUM*T6 + C( 7, J ) = C( 7, J ) - SUM*T7 + C( 8, J ) = C( 8, J ) - SUM*T8 + C( 9, J ) = C( 9, J ) - SUM*T9 + C( 10, J ) = C( 10, J ) - SUM*T10 + 200 CONTINUE + GO TO 410 + ELSE +* +* Form C * H, where H has order n. +* + GO TO ( 210, 230, 250, 270, 290, 310, 330, 350, + $ 370, 390 )N +* +* Code for general N +* +* w := C * v +* + CALL CGEMV( 'No transpose', M, N, ONE, C, LDC, V, 1, ZERO, + $ WORK, 1 ) +* +* C := C - tau * w * v' +* + CALL CGERC( M, N, -TAU, WORK, 1, V, 1, C, LDC ) + GO TO 410 + 210 CONTINUE +* +* Special code for 1 x 1 Householder +* + T1 = ONE - TAU*V( 1 )*CONJG( V( 1 ) ) + DO 220 J = 1, M + C( J, 1 ) = T1*C( J, 1 ) + 220 CONTINUE + GO TO 410 + 230 CONTINUE +* +* Special code for 2 x 2 Householder +* + V1 = V( 1 ) + T1 = TAU*CONJG( V1 ) + V2 = V( 2 ) + T2 = TAU*CONJG( V2 ) + DO 240 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + 240 CONTINUE + GO TO 410 + 250 CONTINUE +* +* Special code for 3 x 3 Householder +* + V1 = V( 1 ) + T1 = TAU*CONJG( V1 ) + V2 = V( 2 ) + T2 = TAU*CONJG( V2 ) + V3 = V( 3 ) + T3 = TAU*CONJG( V3 ) + DO 260 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + 260 CONTINUE + GO TO 410 + 270 CONTINUE +* +* Special code for 4 x 4 Householder +* + V1 = V( 1 ) + T1 = TAU*CONJG( V1 ) + V2 = V( 2 ) + T2 = TAU*CONJG( V2 ) + V3 = V( 3 ) + T3 = TAU*CONJG( V3 ) + V4 = V( 4 ) + T4 = TAU*CONJG( V4 ) + DO 280 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + 280 CONTINUE + GO TO 410 + 290 CONTINUE +* +* Special code for 5 x 5 Householder +* + V1 = V( 1 ) + T1 = TAU*CONJG( V1 ) + V2 = V( 2 ) + T2 = TAU*CONJG( V2 ) + V3 = V( 3 ) + T3 = TAU*CONJG( V3 ) + V4 = V( 4 ) + T4 = TAU*CONJG( V4 ) + V5 = V( 5 ) + T5 = TAU*CONJG( V5 ) + DO 300 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + 300 CONTINUE + GO TO 410 + 310 CONTINUE +* +* Special code for 6 x 6 Householder +* + V1 = V( 1 ) + T1 = TAU*CONJG( V1 ) + V2 = V( 2 ) + T2 = TAU*CONJG( V2 ) + V3 = V( 3 ) + T3 = TAU*CONJG( V3 ) + V4 = V( 4 ) + T4 = TAU*CONJG( V4 ) + V5 = V( 5 ) + T5 = TAU*CONJG( V5 ) + V6 = V( 6 ) + T6 = TAU*CONJG( V6 ) + DO 320 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + C( J, 6 ) = C( J, 6 ) - SUM*T6 + 320 CONTINUE + GO TO 410 + 330 CONTINUE +* +* Special code for 7 x 7 Householder +* + V1 = V( 1 ) + T1 = TAU*CONJG( V1 ) + V2 = V( 2 ) + T2 = TAU*CONJG( V2 ) + V3 = V( 3 ) + T3 = TAU*CONJG( V3 ) + V4 = V( 4 ) + T4 = TAU*CONJG( V4 ) + V5 = V( 5 ) + T5 = TAU*CONJG( V5 ) + V6 = V( 6 ) + T6 = TAU*CONJG( V6 ) + V7 = V( 7 ) + T7 = TAU*CONJG( V7 ) + DO 340 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) + + $ V7*C( J, 7 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + C( J, 6 ) = C( J, 6 ) - SUM*T6 + C( J, 7 ) = C( J, 7 ) - SUM*T7 + 340 CONTINUE + GO TO 410 + 350 CONTINUE +* +* Special code for 8 x 8 Householder +* + V1 = V( 1 ) + T1 = TAU*CONJG( V1 ) + V2 = V( 2 ) + T2 = TAU*CONJG( V2 ) + V3 = V( 3 ) + T3 = TAU*CONJG( V3 ) + V4 = V( 4 ) + T4 = TAU*CONJG( V4 ) + V5 = V( 5 ) + T5 = TAU*CONJG( V5 ) + V6 = V( 6 ) + T6 = TAU*CONJG( V6 ) + V7 = V( 7 ) + T7 = TAU*CONJG( V7 ) + V8 = V( 8 ) + T8 = TAU*CONJG( V8 ) + DO 360 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) + + $ V7*C( J, 7 ) + V8*C( J, 8 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + C( J, 6 ) = C( J, 6 ) - SUM*T6 + C( J, 7 ) = C( J, 7 ) - SUM*T7 + C( J, 8 ) = C( J, 8 ) - SUM*T8 + 360 CONTINUE + GO TO 410 + 370 CONTINUE +* +* Special code for 9 x 9 Householder +* + V1 = V( 1 ) + T1 = TAU*CONJG( V1 ) + V2 = V( 2 ) + T2 = TAU*CONJG( V2 ) + V3 = V( 3 ) + T3 = TAU*CONJG( V3 ) + V4 = V( 4 ) + T4 = TAU*CONJG( V4 ) + V5 = V( 5 ) + T5 = TAU*CONJG( V5 ) + V6 = V( 6 ) + T6 = TAU*CONJG( V6 ) + V7 = V( 7 ) + T7 = TAU*CONJG( V7 ) + V8 = V( 8 ) + T8 = TAU*CONJG( V8 ) + V9 = V( 9 ) + T9 = TAU*CONJG( V9 ) + DO 380 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) + + $ V7*C( J, 7 ) + V8*C( J, 8 ) + V9*C( J, 9 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + C( J, 6 ) = C( J, 6 ) - SUM*T6 + C( J, 7 ) = C( J, 7 ) - SUM*T7 + C( J, 8 ) = C( J, 8 ) - SUM*T8 + C( J, 9 ) = C( J, 9 ) - SUM*T9 + 380 CONTINUE + GO TO 410 + 390 CONTINUE +* +* Special code for 10 x 10 Householder +* + V1 = V( 1 ) + T1 = TAU*CONJG( V1 ) + V2 = V( 2 ) + T2 = TAU*CONJG( V2 ) + V3 = V( 3 ) + T3 = TAU*CONJG( V3 ) + V4 = V( 4 ) + T4 = TAU*CONJG( V4 ) + V5 = V( 5 ) + T5 = TAU*CONJG( V5 ) + V6 = V( 6 ) + T6 = TAU*CONJG( V6 ) + V7 = V( 7 ) + T7 = TAU*CONJG( V7 ) + V8 = V( 8 ) + T8 = TAU*CONJG( V8 ) + V9 = V( 9 ) + T9 = TAU*CONJG( V9 ) + V10 = V( 10 ) + T10 = TAU*CONJG( V10 ) + DO 400 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) + + $ V7*C( J, 7 ) + V8*C( J, 8 ) + V9*C( J, 9 ) + + $ V10*C( J, 10 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + C( J, 6 ) = C( J, 6 ) - SUM*T6 + C( J, 7 ) = C( J, 7 ) - SUM*T7 + C( J, 8 ) = C( J, 8 ) - SUM*T8 + C( J, 9 ) = C( J, 9 ) - SUM*T9 + C( J, 10 ) = C( J, 10 ) - SUM*T10 + 400 CONTINUE + GO TO 410 + END IF + 410 RETURN +* +* End of CLARFX +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/clartg.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/clartg.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,195 @@ + SUBROUTINE CLARTG( F, G, CS, SN, R ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + REAL CS + COMPLEX F, G, R, SN +* .. +* +* Purpose +* ======= +* +* CLARTG generates a plane rotation so that +* +* [ CS SN ] [ F ] [ R ] +* [ __ ] . [ ] = [ ] where CS**2 + |SN|**2 = 1. +* [ -SN CS ] [ G ] [ 0 ] +* +* This is a faster version of the BLAS1 routine CROTG, except for +* the following differences: +* F and G are unchanged on return. +* If G=0, then CS=1 and SN=0. +* If F=0, then CS=0 and SN is chosen so that R is real. +* +* Arguments +* ========= +* +* F (input) COMPLEX +* The first component of vector to be rotated. +* +* G (input) COMPLEX +* The second component of vector to be rotated. +* +* CS (output) REAL +* The cosine of the rotation. +* +* SN (output) COMPLEX +* The sine of the rotation. +* +* R (output) COMPLEX +* The nonzero component of the rotated vector. +* +* Further Details +* ======= ======= +* +* 3-5-96 - Modified with a new algorithm by W. Kahan and J. Demmel +* +* This version has a few statements commented out for thread safety +* (machine parameters are computed on each entry). 10 feb 03, SJH. +* +* ===================================================================== +* +* .. Parameters .. + REAL TWO, ONE, ZERO + PARAMETER ( TWO = 2.0E+0, ONE = 1.0E+0, ZERO = 0.0E+0 ) + COMPLEX CZERO + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. +* LOGICAL FIRST + INTEGER COUNT, I + REAL D, DI, DR, EPS, F2, F2S, G2, G2S, SAFMIN, + $ SAFMN2, SAFMX2, SCALE + COMPLEX FF, FS, GS +* .. +* .. External Functions .. + REAL SLAMCH, SLAPY2 + EXTERNAL SLAMCH, SLAPY2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, CONJG, INT, LOG, MAX, REAL, + $ SQRT +* .. +* .. Statement Functions .. + REAL ABS1, ABSSQ +* .. +* .. Save statement .. +* SAVE FIRST, SAFMX2, SAFMIN, SAFMN2 +* .. +* .. Data statements .. +* DATA FIRST / .TRUE. / +* .. +* .. Statement Function definitions .. + ABS1( FF ) = MAX( ABS( REAL( FF ) ), ABS( AIMAG( FF ) ) ) + ABSSQ( FF ) = REAL( FF )**2 + AIMAG( FF )**2 +* .. +* .. Executable Statements .. +* +* IF( FIRST ) THEN + SAFMIN = SLAMCH( 'S' ) + EPS = SLAMCH( 'E' ) + SAFMN2 = SLAMCH( 'B' )**INT( LOG( SAFMIN / EPS ) / + $ LOG( SLAMCH( 'B' ) ) / TWO ) + SAFMX2 = ONE / SAFMN2 +* FIRST = .FALSE. +* END IF + SCALE = MAX( ABS1( F ), ABS1( G ) ) + FS = F + GS = G + COUNT = 0 + IF( SCALE.GE.SAFMX2 ) THEN + 10 CONTINUE + COUNT = COUNT + 1 + FS = FS*SAFMN2 + GS = GS*SAFMN2 + SCALE = SCALE*SAFMN2 + IF( SCALE.GE.SAFMX2 ) + $ GO TO 10 + ELSE IF( SCALE.LE.SAFMN2 ) THEN + IF( G.EQ.CZERO ) THEN + CS = ONE + SN = CZERO + R = F + RETURN + END IF + 20 CONTINUE + COUNT = COUNT - 1 + FS = FS*SAFMX2 + GS = GS*SAFMX2 + SCALE = SCALE*SAFMX2 + IF( SCALE.LE.SAFMN2 ) + $ GO TO 20 + END IF + F2 = ABSSQ( FS ) + G2 = ABSSQ( GS ) + IF( F2.LE.MAX( G2, ONE )*SAFMIN ) THEN +* +* This is a rare case: F is very small. +* + IF( F.EQ.CZERO ) THEN + CS = ZERO + R = SLAPY2( REAL( G ), AIMAG( G ) ) +* Do complex/real division explicitly with two real divisions + D = SLAPY2( REAL( GS ), AIMAG( GS ) ) + SN = CMPLX( REAL( GS ) / D, -AIMAG( GS ) / D ) + RETURN + END IF + F2S = SLAPY2( REAL( FS ), AIMAG( FS ) ) +* G2 and G2S are accurate +* G2 is at least SAFMIN, and G2S is at least SAFMN2 + G2S = SQRT( G2 ) +* Error in CS from underflow in F2S is at most +* UNFL / SAFMN2 .lt. sqrt(UNFL*EPS) .lt. EPS +* If MAX(G2,ONE)=G2, then F2 .lt. G2*SAFMIN, +* and so CS .lt. sqrt(SAFMIN) +* If MAX(G2,ONE)=ONE, then F2 .lt. SAFMIN +* and so CS .lt. sqrt(SAFMIN)/SAFMN2 = sqrt(EPS) +* Therefore, CS = F2S/G2S / sqrt( 1 + (F2S/G2S)**2 ) = F2S/G2S + CS = F2S / G2S +* Make sure abs(FF) = 1 +* Do complex/real division explicitly with 2 real divisions + IF( ABS1( F ).GT.ONE ) THEN + D = SLAPY2( REAL( F ), AIMAG( F ) ) + FF = CMPLX( REAL( F ) / D, AIMAG( F ) / D ) + ELSE + DR = SAFMX2*REAL( F ) + DI = SAFMX2*AIMAG( F ) + D = SLAPY2( DR, DI ) + FF = CMPLX( DR / D, DI / D ) + END IF + SN = FF*CMPLX( REAL( GS ) / G2S, -AIMAG( GS ) / G2S ) + R = CS*F + SN*G + ELSE +* +* This is the most common case. +* Neither F2 nor F2/G2 are less than SAFMIN +* F2S cannot overflow, and it is accurate +* + F2S = SQRT( ONE+G2 / F2 ) +* Do the F2S(real)*FS(complex) multiply with two real multiplies + R = CMPLX( F2S*REAL( FS ), F2S*AIMAG( FS ) ) + CS = ONE / F2S + D = F2 + G2 +* Do complex/real division explicitly with two real divisions + SN = CMPLX( REAL( R ) / D, AIMAG( R ) / D ) + SN = SN*CONJG( GS ) + IF( COUNT.NE.0 ) THEN + IF( COUNT.GT.0 ) THEN + DO 30 I = 1, COUNT + R = R*SAFMX2 + 30 CONTINUE + ELSE + DO 40 I = 1, -COUNT + R = R*SAFMN2 + 40 CONTINUE + END IF + END IF + END IF + RETURN +* +* End of CLARTG +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/clarz.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/clarz.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,157 @@ + SUBROUTINE CLARZ( SIDE, M, N, L, V, INCV, TAU, C, LDC, WORK ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER INCV, L, LDC, M, N + COMPLEX TAU +* .. +* .. Array Arguments .. + COMPLEX C( LDC, * ), V( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CLARZ applies a complex elementary reflector H to a complex +* M-by-N matrix C, from either the left or the right. H is represented +* in the form +* +* H = I - tau * v * v' +* +* where tau is a complex scalar and v is a complex vector. +* +* If tau = 0, then H is taken to be the unit matrix. +* +* To apply H' (the conjugate transpose of H), supply conjg(tau) instead +* tau. +* +* H is a product of k elementary reflectors as returned by CTZRZF. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': form H * C +* = 'R': form C * H +* +* M (input) INTEGER +* The number of rows of the matrix C. +* +* N (input) INTEGER +* The number of columns of the matrix C. +* +* L (input) INTEGER +* The number of entries of the vector V containing +* the meaningful part of the Householder vectors. +* If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. +* +* V (input) COMPLEX array, dimension (1+(L-1)*abs(INCV)) +* The vector v in the representation of H as returned by +* CTZRZF. V is not used if TAU = 0. +* +* INCV (input) INTEGER +* The increment between elements of v. INCV <> 0. +* +* TAU (input) COMPLEX +* The value tau in the representation of H. +* +* C (input/output) COMPLEX array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by the matrix H * C if SIDE = 'L', +* or C * H if SIDE = 'R'. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace) COMPLEX array, dimension +* (N) if SIDE = 'L' +* or (M) if SIDE = 'R' +* +* Further Details +* =============== +* +* Based on contributions by +* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CCOPY, CGEMV, CGERC, CGERU, CLACGV +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C +* + IF( TAU.NE.ZERO ) THEN +* +* w( 1:n ) = conjg( C( 1, 1:n ) ) +* + CALL CCOPY( N, C, LDC, WORK, 1 ) + CALL CLACGV( N, WORK, 1 ) +* +* w( 1:n ) = conjg( w( 1:n ) + C( m-l+1:m, 1:n )' * v( 1:l ) ) +* + CALL CGEMV( 'Conjugate transpose', L, N, ONE, C( M-L+1, 1 ), + $ LDC, V, INCV, ONE, WORK, 1 ) + CALL CLACGV( N, WORK, 1 ) +* +* C( 1, 1:n ) = C( 1, 1:n ) - tau * w( 1:n ) +* + CALL CAXPY( N, -TAU, WORK, 1, C, LDC ) +* +* C( m-l+1:m, 1:n ) = C( m-l+1:m, 1:n ) - ... +* tau * v( 1:l ) * conjg( w( 1:n )' ) +* + CALL CGERU( L, N, -TAU, V, INCV, WORK, 1, C( M-L+1, 1 ), + $ LDC ) + END IF +* + ELSE +* +* Form C * H +* + IF( TAU.NE.ZERO ) THEN +* +* w( 1:m ) = C( 1:m, 1 ) +* + CALL CCOPY( M, C, 1, WORK, 1 ) +* +* w( 1:m ) = w( 1:m ) + C( 1:m, n-l+1:n, 1:n ) * v( 1:l ) +* + CALL CGEMV( 'No transpose', M, L, ONE, C( 1, N-L+1 ), LDC, + $ V, INCV, ONE, WORK, 1 ) +* +* C( 1:m, 1 ) = C( 1:m, 1 ) - tau * w( 1:m ) +* + CALL CAXPY( M, -TAU, WORK, 1, C, 1 ) +* +* C( 1:m, n-l+1:n ) = C( 1:m, n-l+1:n ) - ... +* tau * w( 1:m ) * v( 1:l )' +* + CALL CGERC( M, L, -TAU, WORK, 1, V, INCV, C( 1, N-L+1 ), + $ LDC ) +* + END IF +* + END IF +* + RETURN +* +* End of CLARZ +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/clarzb.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/clarzb.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,234 @@ + SUBROUTINE CLARZB( SIDE, TRANS, DIRECT, STOREV, M, N, K, L, V, + $ LDV, T, LDT, C, LDC, WORK, LDWORK ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, SIDE, STOREV, TRANS + INTEGER K, L, LDC, LDT, LDV, LDWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX C( LDC, * ), T( LDT, * ), V( LDV, * ), + $ WORK( LDWORK, * ) +* .. +* +* Purpose +* ======= +* +* CLARZB applies a complex block reflector H or its transpose H**H +* to a complex distributed M-by-N C from the left or the right. +* +* Currently, only STOREV = 'R' and DIRECT = 'B' are supported. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply H or H' from the Left +* = 'R': apply H or H' from the Right +* +* TRANS (input) CHARACTER*1 +* = 'N': apply H (No transpose) +* = 'C': apply H' (Conjugate transpose) +* +* DIRECT (input) CHARACTER*1 +* Indicates how H is formed from a product of elementary +* reflectors +* = 'F': H = H(1) H(2) . . . H(k) (Forward, not supported yet) +* = 'B': H = H(k) . . . H(2) H(1) (Backward) +* +* STOREV (input) CHARACTER*1 +* Indicates how the vectors which define the elementary +* reflectors are stored: +* = 'C': Columnwise (not supported yet) +* = 'R': Rowwise +* +* M (input) INTEGER +* The number of rows of the matrix C. +* +* N (input) INTEGER +* The number of columns of the matrix C. +* +* K (input) INTEGER +* The order of the matrix T (= the number of elementary +* reflectors whose product defines the block reflector). +* +* L (input) INTEGER +* The number of columns of the matrix V containing the +* meaningful part of the Householder reflectors. +* If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. +* +* V (input) COMPLEX array, dimension (LDV,NV). +* If STOREV = 'C', NV = K; if STOREV = 'R', NV = L. +* +* LDV (input) INTEGER +* The leading dimension of the array V. +* If STOREV = 'C', LDV >= L; if STOREV = 'R', LDV >= K. +* +* T (input) COMPLEX array, dimension (LDT,K) +* The triangular K-by-K matrix T in the representation of the +* block reflector. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= K. +* +* C (input/output) COMPLEX array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by H*C or H'*C or C*H or C*H'. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace) COMPLEX array, dimension (LDWORK,K) +* +* LDWORK (input) INTEGER +* The leading dimension of the array WORK. +* If SIDE = 'L', LDWORK >= max(1,N); +* if SIDE = 'R', LDWORK >= max(1,M). +* +* Further Details +* =============== +* +* Based on contributions by +* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + CHARACTER TRANST + INTEGER I, INFO, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CGEMM, CLACGV, CTRMM, XERBLA +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) + $ RETURN +* +* Check for currently supported options +* + INFO = 0 + IF( .NOT.LSAME( DIRECT, 'B' ) ) THEN + INFO = -3 + ELSE IF( .NOT.LSAME( STOREV, 'R' ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLARZB', -INFO ) + RETURN + END IF +* + IF( LSAME( TRANS, 'N' ) ) THEN + TRANST = 'C' + ELSE + TRANST = 'N' + END IF +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H' * C +* +* W( 1:n, 1:k ) = conjg( C( 1:k, 1:n )' ) +* + DO 10 J = 1, K + CALL CCOPY( N, C( J, 1 ), LDC, WORK( 1, J ), 1 ) + 10 CONTINUE +* +* W( 1:n, 1:k ) = W( 1:n, 1:k ) + ... +* conjg( C( m-l+1:m, 1:n )' ) * V( 1:k, 1:l )' +* + IF( L.GT.0 ) + $ CALL CGEMM( 'Transpose', 'Conjugate transpose', N, K, L, + $ ONE, C( M-L+1, 1 ), LDC, V, LDV, ONE, WORK, + $ LDWORK ) +* +* W( 1:n, 1:k ) = W( 1:n, 1:k ) * T' or W( 1:m, 1:k ) * T +* + CALL CTRMM( 'Right', 'Lower', TRANST, 'Non-unit', N, K, ONE, T, + $ LDT, WORK, LDWORK ) +* +* C( 1:k, 1:n ) = C( 1:k, 1:n ) - conjg( W( 1:n, 1:k )' ) +* + DO 30 J = 1, N + DO 20 I = 1, K + C( I, J ) = C( I, J ) - WORK( J, I ) + 20 CONTINUE + 30 CONTINUE +* +* C( m-l+1:m, 1:n ) = C( m-l+1:m, 1:n ) - ... +* conjg( V( 1:k, 1:l )' ) * conjg( W( 1:n, 1:k )' ) +* + IF( L.GT.0 ) + $ CALL CGEMM( 'Transpose', 'Transpose', L, N, K, -ONE, V, LDV, + $ WORK, LDWORK, ONE, C( M-L+1, 1 ), LDC ) +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H' +* +* W( 1:m, 1:k ) = C( 1:m, 1:k ) +* + DO 40 J = 1, K + CALL CCOPY( M, C( 1, J ), 1, WORK( 1, J ), 1 ) + 40 CONTINUE +* +* W( 1:m, 1:k ) = W( 1:m, 1:k ) + ... +* C( 1:m, n-l+1:n ) * conjg( V( 1:k, 1:l )' ) +* + IF( L.GT.0 ) + $ CALL CGEMM( 'No transpose', 'Transpose', M, K, L, ONE, + $ C( 1, N-L+1 ), LDC, V, LDV, ONE, WORK, LDWORK ) +* +* W( 1:m, 1:k ) = W( 1:m, 1:k ) * conjg( T ) or +* W( 1:m, 1:k ) * conjg( T' ) +* + DO 50 J = 1, K + CALL CLACGV( K-J+1, T( J, J ), 1 ) + 50 CONTINUE + CALL CTRMM( 'Right', 'Lower', TRANS, 'Non-unit', M, K, ONE, T, + $ LDT, WORK, LDWORK ) + DO 60 J = 1, K + CALL CLACGV( K-J+1, T( J, J ), 1 ) + 60 CONTINUE +* +* C( 1:m, 1:k ) = C( 1:m, 1:k ) - W( 1:m, 1:k ) +* + DO 80 J = 1, K + DO 70 I = 1, M + C( I, J ) = C( I, J ) - WORK( I, J ) + 70 CONTINUE + 80 CONTINUE +* +* C( 1:m, n-l+1:n ) = C( 1:m, n-l+1:n ) - ... +* W( 1:m, 1:k ) * conjg( V( 1:k, 1:l ) ) +* + DO 90 J = 1, L + CALL CLACGV( K, V( 1, J ), 1 ) + 90 CONTINUE + IF( L.GT.0 ) + $ CALL CGEMM( 'No transpose', 'No transpose', M, L, K, -ONE, + $ WORK, LDWORK, V, LDV, ONE, C( 1, N-L+1 ), LDC ) + DO 100 J = 1, L + CALL CLACGV( K, V( 1, J ), 1 ) + 100 CONTINUE +* + END IF +* + RETURN +* +* End of CLARZB +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/clarzt.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/clarzt.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,186 @@ + SUBROUTINE CLARZT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, STOREV + INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. + COMPLEX T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* Purpose +* ======= +* +* CLARZT forms the triangular factor T of a complex block reflector +* H of order > n, which is defined as a product of k elementary +* reflectors. +* +* If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; +* +* If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. +* +* If STOREV = 'C', the vector which defines the elementary reflector +* H(i) is stored in the i-th column of the array V, and +* +* H = I - V * T * V' +* +* If STOREV = 'R', the vector which defines the elementary reflector +* H(i) is stored in the i-th row of the array V, and +* +* H = I - V' * T * V +* +* Currently, only STOREV = 'R' and DIRECT = 'B' are supported. +* +* Arguments +* ========= +* +* DIRECT (input) CHARACTER*1 +* Specifies the order in which the elementary reflectors are +* multiplied to form the block reflector: +* = 'F': H = H(1) H(2) . . . H(k) (Forward, not supported yet) +* = 'B': H = H(k) . . . H(2) H(1) (Backward) +* +* STOREV (input) CHARACTER*1 +* Specifies how the vectors which define the elementary +* reflectors are stored (see also Further Details): +* = 'C': columnwise (not supported yet) +* = 'R': rowwise +* +* N (input) INTEGER +* The order of the block reflector H. N >= 0. +* +* K (input) INTEGER +* The order of the triangular factor T (= the number of +* elementary reflectors). K >= 1. +* +* V (input/output) COMPLEX array, dimension +* (LDV,K) if STOREV = 'C' +* (LDV,N) if STOREV = 'R' +* The matrix V. See further details. +* +* LDV (input) INTEGER +* The leading dimension of the array V. +* If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. +* +* TAU (input) COMPLEX array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i). +* +* T (output) COMPLEX array, dimension (LDT,K) +* The k by k triangular factor T of the block reflector. +* If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is +* lower triangular. The rest of the array is not used. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= K. +* +* Further Details +* =============== +* +* Based on contributions by +* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +* The shape of the matrix V and the storage of the vectors which define +* the H(i) is best illustrated by the following example with n = 5 and +* k = 3. The elements equal to 1 are not stored; the corresponding +* array elements are modified but restored on exit. The rest of the +* array is not used. +* +* DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': +* +* ______V_____ +* ( v1 v2 v3 ) / \ +* ( v1 v2 v3 ) ( v1 v1 v1 v1 v1 . . . . 1 ) +* V = ( v1 v2 v3 ) ( v2 v2 v2 v2 v2 . . . 1 ) +* ( v1 v2 v3 ) ( v3 v3 v3 v3 v3 . . 1 ) +* ( v1 v2 v3 ) +* . . . +* . . . +* 1 . . +* 1 . +* 1 +* +* DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': +* +* ______V_____ +* 1 / \ +* . 1 ( 1 . . . . v1 v1 v1 v1 v1 ) +* . . 1 ( . 1 . . . v2 v2 v2 v2 v2 ) +* . . . ( . . 1 . . v3 v3 v3 v3 v3 ) +* . . . +* ( v1 v2 v3 ) +* ( v1 v2 v3 ) +* V = ( v1 v2 v3 ) +* ( v1 v2 v3 ) +* ( v1 v2 v3 ) +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, INFO, J +* .. +* .. External Subroutines .. + EXTERNAL CGEMV, CLACGV, CTRMV, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Check for currently supported options +* + INFO = 0 + IF( .NOT.LSAME( DIRECT, 'B' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( STOREV, 'R' ) ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLARZT', -INFO ) + RETURN + END IF +* + DO 20 I = K, 1, -1 + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO 10 J = I, K + T( J, I ) = ZERO + 10 CONTINUE + ELSE +* +* general case +* + IF( I.LT.K ) THEN +* +* T(i+1:k,i) = - tau(i) * V(i+1:k,1:n) * V(i,1:n)' +* + CALL CLACGV( N, V( I, 1 ), LDV ) + CALL CGEMV( 'No transpose', K-I, N, -TAU( I ), + $ V( I+1, 1 ), LDV, V( I, 1 ), LDV, ZERO, + $ T( I+1, I ), 1 ) + CALL CLACGV( N, V( I, 1 ), LDV ) +* +* T(i+1:k,i) = T(i+1:k,i+1:k) * T(i+1:k,i) +* + CALL CTRMV( 'Lower', 'No transpose', 'Non-unit', K-I, + $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 ) + END IF + T( I, I ) = TAU( I ) + END IF + 20 CONTINUE + RETURN +* +* End of CLARZT +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/clascl.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/clascl.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,267 @@ + SUBROUTINE CLASCL( TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER TYPE + INTEGER INFO, KL, KU, LDA, M, N + REAL CFROM, CTO +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* CLASCL multiplies the M by N complex matrix A by the real scalar +* CTO/CFROM. This is done without over/underflow as long as the final +* result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that +* A may be full, upper triangular, lower triangular, upper Hessenberg, +* or banded. +* +* Arguments +* ========= +* +* TYPE (input) CHARACTER*1 +* TYPE indices the storage type of the input matrix. +* = 'G': A is a full matrix. +* = 'L': A is a lower triangular matrix. +* = 'U': A is an upper triangular matrix. +* = 'H': A is an upper Hessenberg matrix. +* = 'B': A is a symmetric band matrix with lower bandwidth KL +* and upper bandwidth KU and with the only the lower +* half stored. +* = 'Q': A is a symmetric band matrix with lower bandwidth KL +* and upper bandwidth KU and with the only the upper +* half stored. +* = 'Z': A is a band matrix with lower bandwidth KL and upper +* bandwidth KU. +* +* KL (input) INTEGER +* The lower bandwidth of A. Referenced only if TYPE = 'B', +* 'Q' or 'Z'. +* +* KU (input) INTEGER +* The upper bandwidth of A. Referenced only if TYPE = 'B', +* 'Q' or 'Z'. +* +* CFROM (input) REAL +* CTO (input) REAL +* The matrix A is multiplied by CTO/CFROM. A(I,J) is computed +* without over/underflow if the final result CTO*A(I,J)/CFROM +* can be represented without over/underflow. CFROM must be +* nonzero. +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* The matrix to be multiplied by CTO/CFROM. See TYPE for the +* storage type. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* INFO (output) INTEGER +* 0 - successful exit +* <0 - if INFO = -i, the i-th argument had an illegal value. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL DONE + INTEGER I, ITYPE, J, K1, K2, K3, K4 + REAL BIGNUM, CFROM1, CFROMC, CTO1, CTOC, MUL, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 +* + IF( LSAME( TYPE, 'G' ) ) THEN + ITYPE = 0 + ELSE IF( LSAME( TYPE, 'L' ) ) THEN + ITYPE = 1 + ELSE IF( LSAME( TYPE, 'U' ) ) THEN + ITYPE = 2 + ELSE IF( LSAME( TYPE, 'H' ) ) THEN + ITYPE = 3 + ELSE IF( LSAME( TYPE, 'B' ) ) THEN + ITYPE = 4 + ELSE IF( LSAME( TYPE, 'Q' ) ) THEN + ITYPE = 5 + ELSE IF( LSAME( TYPE, 'Z' ) ) THEN + ITYPE = 6 + ELSE + ITYPE = -1 + END IF +* + IF( ITYPE.EQ.-1 ) THEN + INFO = -1 + ELSE IF( CFROM.EQ.ZERO ) THEN + INFO = -4 + ELSE IF( M.LT.0 ) THEN + INFO = -6 + ELSE IF( N.LT.0 .OR. ( ITYPE.EQ.4 .AND. N.NE.M ) .OR. + $ ( ITYPE.EQ.5 .AND. N.NE.M ) ) THEN + INFO = -7 + ELSE IF( ITYPE.LE.3 .AND. LDA.LT.MAX( 1, M ) ) THEN + INFO = -9 + ELSE IF( ITYPE.GE.4 ) THEN + IF( KL.LT.0 .OR. KL.GT.MAX( M-1, 0 ) ) THEN + INFO = -2 + ELSE IF( KU.LT.0 .OR. KU.GT.MAX( N-1, 0 ) .OR. + $ ( ( ITYPE.EQ.4 .OR. ITYPE.EQ.5 ) .AND. KL.NE.KU ) ) + $ THEN + INFO = -3 + ELSE IF( ( ITYPE.EQ.4 .AND. LDA.LT.KL+1 ) .OR. + $ ( ITYPE.EQ.5 .AND. LDA.LT.KU+1 ) .OR. + $ ( ITYPE.EQ.6 .AND. LDA.LT.2*KL+KU+1 ) ) THEN + INFO = -9 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLASCL', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. M.EQ.0 ) + $ RETURN +* +* Get machine parameters +* + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM +* + CFROMC = CFROM + CTOC = CTO +* + 10 CONTINUE + CFROM1 = CFROMC*SMLNUM + CTO1 = CTOC / BIGNUM + IF( ABS( CFROM1 ).GT.ABS( CTOC ) .AND. CTOC.NE.ZERO ) THEN + MUL = SMLNUM + DONE = .FALSE. + CFROMC = CFROM1 + ELSE IF( ABS( CTO1 ).GT.ABS( CFROMC ) ) THEN + MUL = BIGNUM + DONE = .FALSE. + CTOC = CTO1 + ELSE + MUL = CTOC / CFROMC + DONE = .TRUE. + END IF +* + IF( ITYPE.EQ.0 ) THEN +* +* Full matrix +* + DO 30 J = 1, N + DO 20 I = 1, M + A( I, J ) = A( I, J )*MUL + 20 CONTINUE + 30 CONTINUE +* + ELSE IF( ITYPE.EQ.1 ) THEN +* +* Lower triangular matrix +* + DO 50 J = 1, N + DO 40 I = J, M + A( I, J ) = A( I, J )*MUL + 40 CONTINUE + 50 CONTINUE +* + ELSE IF( ITYPE.EQ.2 ) THEN +* +* Upper triangular matrix +* + DO 70 J = 1, N + DO 60 I = 1, MIN( J, M ) + A( I, J ) = A( I, J )*MUL + 60 CONTINUE + 70 CONTINUE +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* Upper Hessenberg matrix +* + DO 90 J = 1, N + DO 80 I = 1, MIN( J+1, M ) + A( I, J ) = A( I, J )*MUL + 80 CONTINUE + 90 CONTINUE +* + ELSE IF( ITYPE.EQ.4 ) THEN +* +* Lower half of a symmetric band matrix +* + K3 = KL + 1 + K4 = N + 1 + DO 110 J = 1, N + DO 100 I = 1, MIN( K3, K4-J ) + A( I, J ) = A( I, J )*MUL + 100 CONTINUE + 110 CONTINUE +* + ELSE IF( ITYPE.EQ.5 ) THEN +* +* Upper half of a symmetric band matrix +* + K1 = KU + 2 + K3 = KU + 1 + DO 130 J = 1, N + DO 120 I = MAX( K1-J, 1 ), K3 + A( I, J ) = A( I, J )*MUL + 120 CONTINUE + 130 CONTINUE +* + ELSE IF( ITYPE.EQ.6 ) THEN +* +* Band matrix +* + K1 = KL + KU + 2 + K2 = KL + 1 + K3 = 2*KL + KU + 1 + K4 = KL + KU + 1 + M + DO 150 J = 1, N + DO 140 I = MAX( K1-J, K2 ), MIN( K3, K4-J ) + A( I, J ) = A( I, J )*MUL + 140 CONTINUE + 150 CONTINUE +* + END IF +* + IF( .NOT.DONE ) + $ GO TO 10 +* + RETURN +* +* End of CLASCL +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/claset.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/claset.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,114 @@ + SUBROUTINE CLASET( UPLO, M, N, ALPHA, BETA, A, LDA ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER LDA, M, N + COMPLEX ALPHA, BETA +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* CLASET initializes a 2-D array A to BETA on the diagonal and +* ALPHA on the offdiagonals. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies the part of the matrix A to be set. +* = 'U': Upper triangular part is set. The lower triangle +* is unchanged. +* = 'L': Lower triangular part is set. The upper triangle +* is unchanged. +* Otherwise: All of the matrix A is set. +* +* M (input) INTEGER +* On entry, M specifies the number of rows of A. +* +* N (input) INTEGER +* On entry, N specifies the number of columns of A. +* +* ALPHA (input) COMPLEX +* All the offdiagonal array elements are set to ALPHA. +* +* BETA (input) COMPLEX +* All the diagonal array elements are set to BETA. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the m by n matrix A. +* On exit, A(i,j) = ALPHA, 1 <= i <= m, 1 <= j <= n, i.ne.j; +* A(i,i) = BETA , 1 <= i <= min(m,n) +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Set the diagonal to BETA and the strictly upper triangular +* part of the array to ALPHA. +* + DO 20 J = 2, N + DO 10 I = 1, MIN( J-1, M ) + A( I, J ) = ALPHA + 10 CONTINUE + 20 CONTINUE + DO 30 I = 1, MIN( N, M ) + A( I, I ) = BETA + 30 CONTINUE +* + ELSE IF( LSAME( UPLO, 'L' ) ) THEN +* +* Set the diagonal to BETA and the strictly lower triangular +* part of the array to ALPHA. +* + DO 50 J = 1, MIN( M, N ) + DO 40 I = J + 1, M + A( I, J ) = ALPHA + 40 CONTINUE + 50 CONTINUE + DO 60 I = 1, MIN( N, M ) + A( I, I ) = BETA + 60 CONTINUE +* + ELSE +* +* Set the array to BETA on the diagonal and ALPHA on the +* offdiagonal. +* + DO 80 J = 1, N + DO 70 I = 1, M + A( I, J ) = ALPHA + 70 CONTINUE + 80 CONTINUE + DO 90 I = 1, MIN( M, N ) + A( I, I ) = BETA + 90 CONTINUE + END IF +* + RETURN +* +* End of CLASET +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/clasr.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/clasr.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,363 @@ + SUBROUTINE CLASR( SIDE, PIVOT, DIRECT, M, N, C, S, A, LDA ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, PIVOT, SIDE + INTEGER LDA, M, N +* .. +* .. Array Arguments .. + REAL C( * ), S( * ) + COMPLEX A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* CLASR applies a sequence of real plane rotations to a complex matrix +* A, from either the left or the right. +* +* When SIDE = 'L', the transformation takes the form +* +* A := P*A +* +* and when SIDE = 'R', the transformation takes the form +* +* A := A*P**T +* +* where P is an orthogonal matrix consisting of a sequence of z plane +* rotations, with z = M when SIDE = 'L' and z = N when SIDE = 'R', +* and P**T is the transpose of P. +* +* When DIRECT = 'F' (Forward sequence), then +* +* P = P(z-1) * ... * P(2) * P(1) +* +* and when DIRECT = 'B' (Backward sequence), then +* +* P = P(1) * P(2) * ... * P(z-1) +* +* where P(k) is a plane rotation matrix defined by the 2-by-2 rotation +* +* R(k) = ( c(k) s(k) ) +* = ( -s(k) c(k) ). +* +* When PIVOT = 'V' (Variable pivot), the rotation is performed +* for the plane (k,k+1), i.e., P(k) has the form +* +* P(k) = ( 1 ) +* ( ... ) +* ( 1 ) +* ( c(k) s(k) ) +* ( -s(k) c(k) ) +* ( 1 ) +* ( ... ) +* ( 1 ) +* +* where R(k) appears as a rank-2 modification to the identity matrix in +* rows and columns k and k+1. +* +* When PIVOT = 'T' (Top pivot), the rotation is performed for the +* plane (1,k+1), so P(k) has the form +* +* P(k) = ( c(k) s(k) ) +* ( 1 ) +* ( ... ) +* ( 1 ) +* ( -s(k) c(k) ) +* ( 1 ) +* ( ... ) +* ( 1 ) +* +* where R(k) appears in rows and columns 1 and k+1. +* +* Similarly, when PIVOT = 'B' (Bottom pivot), the rotation is +* performed for the plane (k,z), giving P(k) the form +* +* P(k) = ( 1 ) +* ( ... ) +* ( 1 ) +* ( c(k) s(k) ) +* ( 1 ) +* ( ... ) +* ( 1 ) +* ( -s(k) c(k) ) +* +* where R(k) appears in rows and columns k and z. The rotations are +* performed without ever forming P(k) explicitly. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* Specifies whether the plane rotation matrix P is applied to +* A on the left or the right. +* = 'L': Left, compute A := P*A +* = 'R': Right, compute A:= A*P**T +* +* PIVOT (input) CHARACTER*1 +* Specifies the plane for which P(k) is a plane rotation +* matrix. +* = 'V': Variable pivot, the plane (k,k+1) +* = 'T': Top pivot, the plane (1,k+1) +* = 'B': Bottom pivot, the plane (k,z) +* +* DIRECT (input) CHARACTER*1 +* Specifies whether P is a forward or backward sequence of +* plane rotations. +* = 'F': Forward, P = P(z-1)*...*P(2)*P(1) +* = 'B': Backward, P = P(1)*P(2)*...*P(z-1) +* +* M (input) INTEGER +* The number of rows of the matrix A. If m <= 1, an immediate +* return is effected. +* +* N (input) INTEGER +* The number of columns of the matrix A. If n <= 1, an +* immediate return is effected. +* +* C (input) REAL array, dimension +* (M-1) if SIDE = 'L' +* (N-1) if SIDE = 'R' +* The cosines c(k) of the plane rotations. +* +* S (input) REAL array, dimension +* (M-1) if SIDE = 'L' +* (N-1) if SIDE = 'R' +* The sines s(k) of the plane rotations. The 2-by-2 plane +* rotation part of the matrix P(k), R(k), has the form +* R(k) = ( c(k) s(k) ) +* ( -s(k) c(k) ). +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* The M-by-N matrix A. On exit, A is overwritten by P*A if +* SIDE = 'R' or by A*P**T if SIDE = 'L'. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, INFO, J + REAL CTEMP, STEMP + COMPLEX TEMP +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( .NOT.( LSAME( SIDE, 'L' ) .OR. LSAME( SIDE, 'R' ) ) ) THEN + INFO = 1 + ELSE IF( .NOT.( LSAME( PIVOT, 'V' ) .OR. LSAME( PIVOT, + $ 'T' ) .OR. LSAME( PIVOT, 'B' ) ) ) THEN + INFO = 2 + ELSE IF( .NOT.( LSAME( DIRECT, 'F' ) .OR. LSAME( DIRECT, 'B' ) ) ) + $ THEN + INFO = 3 + ELSE IF( M.LT.0 ) THEN + INFO = 4 + ELSE IF( N.LT.0 ) THEN + INFO = 5 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = 9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLASR ', INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( ( M.EQ.0 ) .OR. ( N.EQ.0 ) ) + $ RETURN + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form P * A +* + IF( LSAME( PIVOT, 'V' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 20 J = 1, M - 1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 10 I = 1, N + TEMP = A( J+1, I ) + A( J+1, I ) = CTEMP*TEMP - STEMP*A( J, I ) + A( J, I ) = STEMP*TEMP + CTEMP*A( J, I ) + 10 CONTINUE + END IF + 20 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 40 J = M - 1, 1, -1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 30 I = 1, N + TEMP = A( J+1, I ) + A( J+1, I ) = CTEMP*TEMP - STEMP*A( J, I ) + A( J, I ) = STEMP*TEMP + CTEMP*A( J, I ) + 30 CONTINUE + END IF + 40 CONTINUE + END IF + ELSE IF( LSAME( PIVOT, 'T' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 60 J = 2, M + CTEMP = C( J-1 ) + STEMP = S( J-1 ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 50 I = 1, N + TEMP = A( J, I ) + A( J, I ) = CTEMP*TEMP - STEMP*A( 1, I ) + A( 1, I ) = STEMP*TEMP + CTEMP*A( 1, I ) + 50 CONTINUE + END IF + 60 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 80 J = M, 2, -1 + CTEMP = C( J-1 ) + STEMP = S( J-1 ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 70 I = 1, N + TEMP = A( J, I ) + A( J, I ) = CTEMP*TEMP - STEMP*A( 1, I ) + A( 1, I ) = STEMP*TEMP + CTEMP*A( 1, I ) + 70 CONTINUE + END IF + 80 CONTINUE + END IF + ELSE IF( LSAME( PIVOT, 'B' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 100 J = 1, M - 1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 90 I = 1, N + TEMP = A( J, I ) + A( J, I ) = STEMP*A( M, I ) + CTEMP*TEMP + A( M, I ) = CTEMP*A( M, I ) - STEMP*TEMP + 90 CONTINUE + END IF + 100 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 120 J = M - 1, 1, -1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 110 I = 1, N + TEMP = A( J, I ) + A( J, I ) = STEMP*A( M, I ) + CTEMP*TEMP + A( M, I ) = CTEMP*A( M, I ) - STEMP*TEMP + 110 CONTINUE + END IF + 120 CONTINUE + END IF + END IF + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form A * P' +* + IF( LSAME( PIVOT, 'V' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 140 J = 1, N - 1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 130 I = 1, M + TEMP = A( I, J+1 ) + A( I, J+1 ) = CTEMP*TEMP - STEMP*A( I, J ) + A( I, J ) = STEMP*TEMP + CTEMP*A( I, J ) + 130 CONTINUE + END IF + 140 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 160 J = N - 1, 1, -1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 150 I = 1, M + TEMP = A( I, J+1 ) + A( I, J+1 ) = CTEMP*TEMP - STEMP*A( I, J ) + A( I, J ) = STEMP*TEMP + CTEMP*A( I, J ) + 150 CONTINUE + END IF + 160 CONTINUE + END IF + ELSE IF( LSAME( PIVOT, 'T' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 180 J = 2, N + CTEMP = C( J-1 ) + STEMP = S( J-1 ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 170 I = 1, M + TEMP = A( I, J ) + A( I, J ) = CTEMP*TEMP - STEMP*A( I, 1 ) + A( I, 1 ) = STEMP*TEMP + CTEMP*A( I, 1 ) + 170 CONTINUE + END IF + 180 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 200 J = N, 2, -1 + CTEMP = C( J-1 ) + STEMP = S( J-1 ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 190 I = 1, M + TEMP = A( I, J ) + A( I, J ) = CTEMP*TEMP - STEMP*A( I, 1 ) + A( I, 1 ) = STEMP*TEMP + CTEMP*A( I, 1 ) + 190 CONTINUE + END IF + 200 CONTINUE + END IF + ELSE IF( LSAME( PIVOT, 'B' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 220 J = 1, N - 1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 210 I = 1, M + TEMP = A( I, J ) + A( I, J ) = STEMP*A( I, N ) + CTEMP*TEMP + A( I, N ) = CTEMP*A( I, N ) - STEMP*TEMP + 210 CONTINUE + END IF + 220 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 240 J = N - 1, 1, -1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 230 I = 1, M + TEMP = A( I, J ) + A( I, J ) = STEMP*A( I, N ) + CTEMP*TEMP + A( I, N ) = CTEMP*A( I, N ) - STEMP*TEMP + 230 CONTINUE + END IF + 240 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of CLASR +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/classq.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/classq.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,101 @@ + SUBROUTINE CLASSQ( N, X, INCX, SCALE, SUMSQ ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INCX, N + REAL SCALE, SUMSQ +* .. +* .. Array Arguments .. + COMPLEX X( * ) +* .. +* +* Purpose +* ======= +* +* CLASSQ returns the values scl and ssq such that +* +* ( scl**2 )*ssq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, +* +* where x( i ) = abs( X( 1 + ( i - 1 )*INCX ) ). The value of sumsq is +* assumed to be at least unity and the value of ssq will then satisfy +* +* 1.0 .le. ssq .le. ( sumsq + 2*n ). +* +* scale is assumed to be non-negative and scl returns the value +* +* scl = max( scale, abs( real( x( i ) ) ), abs( aimag( x( i ) ) ) ), +* i +* +* scale and sumsq must be supplied in SCALE and SUMSQ respectively. +* SCALE and SUMSQ are overwritten by scl and ssq respectively. +* +* The routine makes only one pass through the vector X. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of elements to be used from the vector X. +* +* X (input) COMPLEX array, dimension (N) +* The vector x as described above. +* x( i ) = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n. +* +* INCX (input) INTEGER +* The increment between successive values of the vector X. +* INCX > 0. +* +* SCALE (input/output) REAL +* On entry, the value scale in the equation above. +* On exit, SCALE is overwritten with the value scl . +* +* SUMSQ (input/output) REAL +* On entry, the value sumsq in the equation above. +* On exit, SUMSQ is overwritten with the value ssq . +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER IX + REAL TEMP1 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, REAL +* .. +* .. Executable Statements .. +* + IF( N.GT.0 ) THEN + DO 10 IX = 1, 1 + ( N-1 )*INCX, INCX + IF( REAL( X( IX ) ).NE.ZERO ) THEN + TEMP1 = ABS( REAL( X( IX ) ) ) + IF( SCALE.LT.TEMP1 ) THEN + SUMSQ = 1 + SUMSQ*( SCALE / TEMP1 )**2 + SCALE = TEMP1 + ELSE + SUMSQ = SUMSQ + ( TEMP1 / SCALE )**2 + END IF + END IF + IF( AIMAG( X( IX ) ).NE.ZERO ) THEN + TEMP1 = ABS( AIMAG( X( IX ) ) ) + IF( SCALE.LT.TEMP1 ) THEN + SUMSQ = 1 + SUMSQ*( SCALE / TEMP1 )**2 + SCALE = TEMP1 + ELSE + SUMSQ = SUMSQ + ( TEMP1 / SCALE )**2 + END IF + END IF + 10 CONTINUE + END IF +* + RETURN +* +* End of CLASSQ +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/claswp.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/claswp.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,119 @@ + SUBROUTINE CLASWP( N, A, LDA, K1, K2, IPIV, INCX ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INCX, K1, K2, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* CLASWP performs a series of row interchanges on the matrix A. +* One row interchange is initiated for each of rows K1 through K2 of A. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of columns of the matrix A. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the matrix of column dimension N to which the row +* interchanges will be applied. +* On exit, the permuted matrix. +* +* LDA (input) INTEGER +* The leading dimension of the array A. +* +* K1 (input) INTEGER +* The first element of IPIV for which a row interchange will +* be done. +* +* K2 (input) INTEGER +* The last element of IPIV for which a row interchange will +* be done. +* +* IPIV (input) INTEGER array, dimension (K2*abs(INCX)) +* The vector of pivot indices. Only the elements in positions +* K1 through K2 of IPIV are accessed. +* IPIV(K) = L implies rows K and L are to be interchanged. +* +* INCX (input) INTEGER +* The increment between successive values of IPIV. If IPIV +* is negative, the pivots are applied in reverse order. +* +* Further Details +* =============== +* +* Modified by +* R. C. Whaley, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, I1, I2, INC, IP, IX, IX0, J, K, N32 + COMPLEX TEMP +* .. +* .. Executable Statements .. +* +* Interchange row I with row IPIV(I) for each of rows K1 through K2. +* + IF( INCX.GT.0 ) THEN + IX0 = K1 + I1 = K1 + I2 = K2 + INC = 1 + ELSE IF( INCX.LT.0 ) THEN + IX0 = 1 + ( 1-K2 )*INCX + I1 = K2 + I2 = K1 + INC = -1 + ELSE + RETURN + END IF +* + N32 = ( N / 32 )*32 + IF( N32.NE.0 ) THEN + DO 30 J = 1, N32, 32 + IX = IX0 + DO 20 I = I1, I2, INC + IP = IPIV( IX ) + IF( IP.NE.I ) THEN + DO 10 K = J, J + 31 + TEMP = A( I, K ) + A( I, K ) = A( IP, K ) + A( IP, K ) = TEMP + 10 CONTINUE + END IF + IX = IX + INCX + 20 CONTINUE + 30 CONTINUE + END IF + IF( N32.NE.N ) THEN + N32 = N32 + 1 + IX = IX0 + DO 50 I = I1, I2, INC + IP = IPIV( IX ) + IF( IP.NE.I ) THEN + DO 40 K = N32, N + TEMP = A( I, K ) + A( I, K ) = A( IP, K ) + A( IP, K ) = TEMP + 40 CONTINUE + END IF + IX = IX + INCX + 50 CONTINUE + END IF +* + RETURN +* +* End of CLASWP +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/clatbs.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/clatbs.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,908 @@ + SUBROUTINE CLATBS( UPLO, TRANS, DIAG, NORMIN, N, KD, AB, LDAB, X, + $ SCALE, CNORM, INFO ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORMIN, TRANS, UPLO + INTEGER INFO, KD, LDAB, N + REAL SCALE +* .. +* .. Array Arguments .. + REAL CNORM( * ) + COMPLEX AB( LDAB, * ), X( * ) +* .. +* +* Purpose +* ======= +* +* CLATBS solves one of the triangular systems +* +* A * x = s*b, A**T * x = s*b, or A**H * x = s*b, +* +* with scaling to prevent overflow, where A is an upper or lower +* triangular band matrix. Here A' denotes the transpose of A, x and b +* are n-element vectors, and s is a scaling factor, usually less than +* or equal to 1, chosen so that the components of x will be less than +* the overflow threshold. If the unscaled problem will not cause +* overflow, the Level 2 BLAS routine CTBSV is called. If the matrix A +* is singular (A(j,j) = 0 for some j), then s is set to 0 and a +* non-trivial solution to A*x = 0 is returned. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the matrix A is upper or lower triangular. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* TRANS (input) CHARACTER*1 +* Specifies the operation applied to A. +* = 'N': Solve A * x = s*b (No transpose) +* = 'T': Solve A**T * x = s*b (Transpose) +* = 'C': Solve A**H * x = s*b (Conjugate transpose) +* +* DIAG (input) CHARACTER*1 +* Specifies whether or not the matrix A is unit triangular. +* = 'N': Non-unit triangular +* = 'U': Unit triangular +* +* NORMIN (input) CHARACTER*1 +* Specifies whether CNORM has been set or not. +* = 'Y': CNORM contains the column norms on entry +* = 'N': CNORM is not set on entry. On exit, the norms will +* be computed and stored in CNORM. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of subdiagonals or superdiagonals in the +* triangular matrix A. KD >= 0. +* +* AB (input) COMPLEX array, dimension (LDAB,N) +* The upper or lower triangular band matrix A, stored in the +* first KD+1 rows of the array. The j-th column of A is stored +* in the j-th column of the array AB as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* X (input/output) COMPLEX array, dimension (N) +* On entry, the right hand side b of the triangular system. +* On exit, X is overwritten by the solution vector x. +* +* SCALE (output) REAL +* The scaling factor s for the triangular system +* A * x = s*b, A**T * x = s*b, or A**H * x = s*b. +* If SCALE = 0, the matrix A is singular or badly scaled, and +* the vector x is an exact or approximate solution to A*x = 0. +* +* CNORM (input or output) REAL array, dimension (N) +* +* If NORMIN = 'Y', CNORM is an input argument and CNORM(j) +* contains the norm of the off-diagonal part of the j-th column +* of A. If TRANS = 'N', CNORM(j) must be greater than or equal +* to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) +* must be greater than or equal to the 1-norm. +* +* If NORMIN = 'N', CNORM is an output argument and CNORM(j) +* returns the 1-norm of the offdiagonal part of the j-th column +* of A. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* +* Further Details +* ======= ======= +* +* A rough bound on x is computed; if that is less than overflow, CTBSV +* is called, otherwise, specific code is used which checks for possible +* overflow or divide-by-zero at every operation. +* +* A columnwise scheme is used for solving A*x = b. The basic algorithm +* if A is lower triangular is +* +* x[1:n] := b[1:n] +* for j = 1, ..., n +* x(j) := x(j) / A(j,j) +* x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] +* end +* +* Define bounds on the components of x after j iterations of the loop: +* M(j) = bound on x[1:j] +* G(j) = bound on x[j+1:n] +* Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. +* +* Then for iteration j+1 we have +* M(j+1) <= G(j) / | A(j+1,j+1) | +* G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | +* <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) +* +* where CNORM(j+1) is greater than or equal to the infinity-norm of +* column j+1 of A, not counting the diagonal. Hence +* +* G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) +* 1<=i<=j +* and +* +* |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) +* 1<=i< j +* +* Since |x(j)| <= M(j), we use the Level 2 BLAS routine CTBSV if the +* reciprocal of the largest M(j), j=1,..,n, is larger than +* max(underflow, 1/overflow). +* +* The bound on x(j) is also used to determine when a step in the +* columnwise method can be performed without fear of overflow. If +* the computed bound is greater than a large constant, x is scaled to +* prevent overflow, but if the bound overflows, x is set to 0, x(j) to +* 1, and scale to 0, and a non-trivial solution to A*x = 0 is found. +* +* Similarly, a row-wise scheme is used to solve A**T *x = b or +* A**H *x = b. The basic algorithm for A upper triangular is +* +* for j = 1, ..., n +* x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j) +* end +* +* We simultaneously compute two bounds +* G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j +* M(j) = bound on x(i), 1<=i<=j +* +* The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we +* add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. +* Then the bound on x(j) is +* +* M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | +* +* <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) +* 1<=i<=j +* +* and we can safely call CTBSV if 1/M(n) and 1/G(n) are both greater +* than max(underflow, 1/overflow). +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, HALF, ONE, TWO + PARAMETER ( ZERO = 0.0E+0, HALF = 0.5E+0, ONE = 1.0E+0, + $ TWO = 2.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + INTEGER I, IMAX, J, JFIRST, JINC, JLAST, JLEN, MAIND + REAL BIGNUM, GROW, REC, SMLNUM, TJJ, TMAX, TSCAL, + $ XBND, XJ, XMAX + COMPLEX CSUMJ, TJJS, USCAL, ZDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX, ISAMAX + REAL SCASUM, SLAMCH + COMPLEX CDOTC, CDOTU, CLADIV + EXTERNAL LSAME, ICAMAX, ISAMAX, SCASUM, SLAMCH, CDOTC, + $ CDOTU, CLADIV +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CSSCAL, CTBSV, SLABAD, SSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, CONJG, MAX, MIN, REAL +* .. +* .. Statement Functions .. + REAL CABS1, CABS2 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) + CABS2( ZDUM ) = ABS( REAL( ZDUM ) / 2. ) + + $ ABS( AIMAG( ZDUM ) / 2. ) +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* +* Test the input parameters. +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT. + $ LSAME( NORMIN, 'N' ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( KD.LT.0 ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLATBS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine machine dependent parameters to control overflow. +* + SMLNUM = SLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) + SMLNUM = SMLNUM / SLAMCH( 'Precision' ) + BIGNUM = ONE / SMLNUM + SCALE = ONE +* + IF( LSAME( NORMIN, 'N' ) ) THEN +* +* Compute the 1-norm of each column, not including the diagonal. +* + IF( UPPER ) THEN +* +* A is upper triangular. +* + DO 10 J = 1, N + JLEN = MIN( KD, J-1 ) + CNORM( J ) = SCASUM( JLEN, AB( KD+1-JLEN, J ), 1 ) + 10 CONTINUE + ELSE +* +* A is lower triangular. +* + DO 20 J = 1, N + JLEN = MIN( KD, N-J ) + IF( JLEN.GT.0 ) THEN + CNORM( J ) = SCASUM( JLEN, AB( 2, J ), 1 ) + ELSE + CNORM( J ) = ZERO + END IF + 20 CONTINUE + END IF + END IF +* +* Scale the column norms by TSCAL if the maximum element in CNORM is +* greater than BIGNUM/2. +* + IMAX = ISAMAX( N, CNORM, 1 ) + TMAX = CNORM( IMAX ) + IF( TMAX.LE.BIGNUM*HALF ) THEN + TSCAL = ONE + ELSE + TSCAL = HALF / ( SMLNUM*TMAX ) + CALL SSCAL( N, TSCAL, CNORM, 1 ) + END IF +* +* Compute a bound on the computed solution vector to see if the +* Level 2 BLAS routine CTBSV can be used. +* + XMAX = ZERO + DO 30 J = 1, N + XMAX = MAX( XMAX, CABS2( X( J ) ) ) + 30 CONTINUE + XBND = XMAX + IF( NOTRAN ) THEN +* +* Compute the growth in A * x = b. +* + IF( UPPER ) THEN + JFIRST = N + JLAST = 1 + JINC = -1 + MAIND = KD + 1 + ELSE + JFIRST = 1 + JLAST = N + JINC = 1 + MAIND = 1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 60 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, G(0) = max{x(i), i=1,...,n}. +* + GROW = HALF / MAX( XBND, SMLNUM ) + XBND = GROW + DO 40 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 60 +* + TJJS = AB( MAIND, J ) + TJJ = CABS1( TJJS ) +* + IF( TJJ.GE.SMLNUM ) THEN +* +* M(j) = G(j-1) / abs(A(j,j)) +* + XBND = MIN( XBND, MIN( ONE, TJJ )*GROW ) + ELSE +* +* M(j) could overflow, set XBND to 0. +* + XBND = ZERO + END IF +* + IF( TJJ+CNORM( J ).GE.SMLNUM ) THEN +* +* G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) +* + GROW = GROW*( TJJ / ( TJJ+CNORM( J ) ) ) + ELSE +* +* G(j) could overflow, set GROW to 0. +* + GROW = ZERO + END IF + 40 CONTINUE + GROW = XBND + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, HALF / MAX( XBND, SMLNUM ) ) + DO 50 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 60 +* +* G(j) = G(j-1)*( 1 + CNORM(j) ) +* + GROW = GROW*( ONE / ( ONE+CNORM( J ) ) ) + 50 CONTINUE + END IF + 60 CONTINUE +* + ELSE +* +* Compute the growth in A**T * x = b or A**H * x = b. +* + IF( UPPER ) THEN + JFIRST = 1 + JLAST = N + JINC = 1 + MAIND = KD + 1 + ELSE + JFIRST = N + JLAST = 1 + JINC = -1 + MAIND = 1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 90 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, M(0) = max{x(i), i=1,...,n}. +* + GROW = HALF / MAX( XBND, SMLNUM ) + XBND = GROW + DO 70 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 90 +* +* G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) +* + XJ = ONE + CNORM( J ) + GROW = MIN( GROW, XBND / XJ ) +* + TJJS = AB( MAIND, J ) + TJJ = CABS1( TJJS ) +* + IF( TJJ.GE.SMLNUM ) THEN +* +* M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) +* + IF( XJ.GT.TJJ ) + $ XBND = XBND*( TJJ / XJ ) + ELSE +* +* M(j) could overflow, set XBND to 0. +* + XBND = ZERO + END IF + 70 CONTINUE + GROW = MIN( GROW, XBND ) + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, HALF / MAX( XBND, SMLNUM ) ) + DO 80 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 90 +* +* G(j) = ( 1 + CNORM(j) )*G(j-1) +* + XJ = ONE + CNORM( J ) + GROW = GROW / XJ + 80 CONTINUE + END IF + 90 CONTINUE + END IF +* + IF( ( GROW*TSCAL ).GT.SMLNUM ) THEN +* +* Use the Level 2 BLAS solve if the reciprocal of the bound on +* elements of X is not too small. +* + CALL CTBSV( UPLO, TRANS, DIAG, N, KD, AB, LDAB, X, 1 ) + ELSE +* +* Use a Level 1 BLAS solve, scaling intermediate results. +* + IF( XMAX.GT.BIGNUM*HALF ) THEN +* +* Scale X so that its components are less than or equal to +* BIGNUM in absolute value. +* + SCALE = ( BIGNUM*HALF ) / XMAX + CALL CSSCAL( N, SCALE, X, 1 ) + XMAX = BIGNUM + ELSE + XMAX = XMAX*TWO + END IF +* + IF( NOTRAN ) THEN +* +* Solve A * x = b +* + DO 110 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) / A(j,j), scaling x if necessary. +* + XJ = CABS1( X( J ) ) + IF( NOUNIT ) THEN + TJJS = AB( MAIND, J )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 105 + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by 1/b(j). +* + REC = ONE / XJ + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = CLADIV( X( J ), TJJS ) + XJ = CABS1( X( J ) ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM +* to avoid overflow when dividing by A(j,j). +* + REC = ( TJJ*BIGNUM ) / XJ + IF( CNORM( J ).GT.ONE ) THEN +* +* Scale by 1/CNORM(j) to avoid overflow when +* multiplying x(j) times column j. +* + REC = REC / CNORM( J ) + END IF + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = CLADIV( X( J ), TJJS ) + XJ = CABS1( X( J ) ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0, and compute a solution to A*x = 0. +* + DO 100 I = 1, N + X( I ) = ZERO + 100 CONTINUE + X( J ) = ONE + XJ = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 105 CONTINUE +* +* Scale x if necessary to avoid overflow when adding a +* multiple of column j of A. +* + IF( XJ.GT.ONE ) THEN + REC = ONE / XJ + IF( CNORM( J ).GT.( BIGNUM-XMAX )*REC ) THEN +* +* Scale x by 1/(2*abs(x(j))). +* + REC = REC*HALF + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + END IF + ELSE IF( XJ*CNORM( J ).GT.( BIGNUM-XMAX ) ) THEN +* +* Scale x by 1/2. +* + CALL CSSCAL( N, HALF, X, 1 ) + SCALE = SCALE*HALF + END IF +* + IF( UPPER ) THEN + IF( J.GT.1 ) THEN +* +* Compute the update +* x(max(1,j-kd):j-1) := x(max(1,j-kd):j-1) - +* x(j)* A(max(1,j-kd):j-1,j) +* + JLEN = MIN( KD, J-1 ) + CALL CAXPY( JLEN, -X( J )*TSCAL, + $ AB( KD+1-JLEN, J ), 1, X( J-JLEN ), 1 ) + I = ICAMAX( J-1, X, 1 ) + XMAX = CABS1( X( I ) ) + END IF + ELSE IF( J.LT.N ) THEN +* +* Compute the update +* x(j+1:min(j+kd,n)) := x(j+1:min(j+kd,n)) - +* x(j) * A(j+1:min(j+kd,n),j) +* + JLEN = MIN( KD, N-J ) + IF( JLEN.GT.0 ) + $ CALL CAXPY( JLEN, -X( J )*TSCAL, AB( 2, J ), 1, + $ X( J+1 ), 1 ) + I = J + ICAMAX( N-J, X( J+1 ), 1 ) + XMAX = CABS1( X( I ) ) + END IF + 110 CONTINUE +* + ELSE IF( LSAME( TRANS, 'T' ) ) THEN +* +* Solve A**T * x = b +* + DO 150 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) - sum A(k,j)*x(k). +* k<>j +* + XJ = CABS1( X( J ) ) + USCAL = TSCAL + REC = ONE / MAX( XMAX, ONE ) + IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN +* +* If x(j) could overflow, scale x by 1/(2*XMAX). +* + REC = REC*HALF + IF( NOUNIT ) THEN + TJJS = AB( MAIND, J )*TSCAL + ELSE + TJJS = TSCAL + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.ONE ) THEN +* +* Divide by A(j,j) when scaling x if A(j,j) > 1. +* + REC = MIN( ONE, REC*TJJ ) + USCAL = CLADIV( USCAL, TJJS ) + END IF + IF( REC.LT.ONE ) THEN + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + CSUMJ = ZERO + IF( USCAL.EQ.CMPLX( ONE ) ) THEN +* +* If the scaling needed for A in the dot product is 1, +* call CDOTU to perform the dot product. +* + IF( UPPER ) THEN + JLEN = MIN( KD, J-1 ) + CSUMJ = CDOTU( JLEN, AB( KD+1-JLEN, J ), 1, + $ X( J-JLEN ), 1 ) + ELSE + JLEN = MIN( KD, N-J ) + IF( JLEN.GT.1 ) + $ CSUMJ = CDOTU( JLEN, AB( 2, J ), 1, X( J+1 ), + $ 1 ) + END IF + ELSE +* +* Otherwise, use in-line code for the dot product. +* + IF( UPPER ) THEN + JLEN = MIN( KD, J-1 ) + DO 120 I = 1, JLEN + CSUMJ = CSUMJ + ( AB( KD+I-JLEN, J )*USCAL )* + $ X( J-JLEN-1+I ) + 120 CONTINUE + ELSE + JLEN = MIN( KD, N-J ) + DO 130 I = 1, JLEN + CSUMJ = CSUMJ + ( AB( I+1, J )*USCAL )*X( J+I ) + 130 CONTINUE + END IF + END IF +* + IF( USCAL.EQ.CMPLX( TSCAL ) ) THEN +* +* Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) +* was not used to scale the dotproduct. +* + X( J ) = X( J ) - CSUMJ + XJ = CABS1( X( J ) ) + IF( NOUNIT ) THEN +* +* Compute x(j) = x(j) / A(j,j), scaling if necessary. +* + TJJS = AB( MAIND, J )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 145 + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale X by 1/abs(x(j)). +* + REC = ONE / XJ + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = CLADIV( X( J ), TJJS ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. +* + REC = ( TJJ*BIGNUM ) / XJ + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = CLADIV( X( J ), TJJS ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0 and compute a solution to A**T *x = 0. +* + DO 140 I = 1, N + X( I ) = ZERO + 140 CONTINUE + X( J ) = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 145 CONTINUE + ELSE +* +* Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot +* product has already been divided by 1/A(j,j). +* + X( J ) = CLADIV( X( J ), TJJS ) - CSUMJ + END IF + XMAX = MAX( XMAX, CABS1( X( J ) ) ) + 150 CONTINUE +* + ELSE +* +* Solve A**H * x = b +* + DO 190 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) - sum A(k,j)*x(k). +* k<>j +* + XJ = CABS1( X( J ) ) + USCAL = TSCAL + REC = ONE / MAX( XMAX, ONE ) + IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN +* +* If x(j) could overflow, scale x by 1/(2*XMAX). +* + REC = REC*HALF + IF( NOUNIT ) THEN + TJJS = CONJG( AB( MAIND, J ) )*TSCAL + ELSE + TJJS = TSCAL + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.ONE ) THEN +* +* Divide by A(j,j) when scaling x if A(j,j) > 1. +* + REC = MIN( ONE, REC*TJJ ) + USCAL = CLADIV( USCAL, TJJS ) + END IF + IF( REC.LT.ONE ) THEN + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + CSUMJ = ZERO + IF( USCAL.EQ.CMPLX( ONE ) ) THEN +* +* If the scaling needed for A in the dot product is 1, +* call CDOTC to perform the dot product. +* + IF( UPPER ) THEN + JLEN = MIN( KD, J-1 ) + CSUMJ = CDOTC( JLEN, AB( KD+1-JLEN, J ), 1, + $ X( J-JLEN ), 1 ) + ELSE + JLEN = MIN( KD, N-J ) + IF( JLEN.GT.1 ) + $ CSUMJ = CDOTC( JLEN, AB( 2, J ), 1, X( J+1 ), + $ 1 ) + END IF + ELSE +* +* Otherwise, use in-line code for the dot product. +* + IF( UPPER ) THEN + JLEN = MIN( KD, J-1 ) + DO 160 I = 1, JLEN + CSUMJ = CSUMJ + ( CONJG( AB( KD+I-JLEN, J ) )* + $ USCAL )*X( J-JLEN-1+I ) + 160 CONTINUE + ELSE + JLEN = MIN( KD, N-J ) + DO 170 I = 1, JLEN + CSUMJ = CSUMJ + ( CONJG( AB( I+1, J ) )*USCAL )* + $ X( J+I ) + 170 CONTINUE + END IF + END IF +* + IF( USCAL.EQ.CMPLX( TSCAL ) ) THEN +* +* Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) +* was not used to scale the dotproduct. +* + X( J ) = X( J ) - CSUMJ + XJ = CABS1( X( J ) ) + IF( NOUNIT ) THEN +* +* Compute x(j) = x(j) / A(j,j), scaling if necessary. +* + TJJS = CONJG( AB( MAIND, J ) )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 185 + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale X by 1/abs(x(j)). +* + REC = ONE / XJ + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = CLADIV( X( J ), TJJS ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. +* + REC = ( TJJ*BIGNUM ) / XJ + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = CLADIV( X( J ), TJJS ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0 and compute a solution to A**H *x = 0. +* + DO 180 I = 1, N + X( I ) = ZERO + 180 CONTINUE + X( J ) = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 185 CONTINUE + ELSE +* +* Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot +* product has already been divided by 1/A(j,j). +* + X( J ) = CLADIV( X( J ), TJJS ) - CSUMJ + END IF + XMAX = MAX( XMAX, CABS1( X( J ) ) ) + 190 CONTINUE + END IF + SCALE = SCALE / TSCAL + END IF +* +* Scale the column norms by 1/TSCAL for return. +* + IF( TSCAL.NE.ONE ) THEN + CALL SSCAL( N, ONE / TSCAL, CNORM, 1 ) + END IF +* + RETURN +* +* End of CLATBS +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/clatrd.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/clatrd.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,279 @@ + SUBROUTINE CLATRD( UPLO, N, NB, A, LDA, E, TAU, W, LDW ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER LDA, LDW, N, NB +* .. +* .. Array Arguments .. + REAL E( * ) + COMPLEX A( LDA, * ), TAU( * ), W( LDW, * ) +* .. +* +* Purpose +* ======= +* +* CLATRD reduces NB rows and columns of a complex Hermitian matrix A to +* Hermitian tridiagonal form by a unitary similarity +* transformation Q' * A * Q, and returns the matrices V and W which are +* needed to apply the transformation to the unreduced part of A. +* +* If UPLO = 'U', CLATRD reduces the last NB rows and columns of a +* matrix, of which the upper triangle is supplied; +* if UPLO = 'L', CLATRD reduces the first NB rows and columns of a +* matrix, of which the lower triangle is supplied. +* +* This is an auxiliary routine called by CHETRD. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* Hermitian matrix A is stored: +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. +* +* NB (input) INTEGER +* The number of rows and columns to be reduced. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the Hermitian matrix A. If UPLO = 'U', the leading +* n-by-n upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading n-by-n lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* On exit: +* if UPLO = 'U', the last NB columns have been reduced to +* tridiagonal form, with the diagonal elements overwriting +* the diagonal elements of A; the elements above the diagonal +* with the array TAU, represent the unitary matrix Q as a +* product of elementary reflectors; +* if UPLO = 'L', the first NB columns have been reduced to +* tridiagonal form, with the diagonal elements overwriting +* the diagonal elements of A; the elements below the diagonal +* with the array TAU, represent the unitary matrix Q as a +* product of elementary reflectors. +* See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* E (output) REAL array, dimension (N-1) +* If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal +* elements of the last NB columns of the reduced matrix; +* if UPLO = 'L', E(1:nb) contains the subdiagonal elements of +* the first NB columns of the reduced matrix. +* +* TAU (output) COMPLEX array, dimension (N-1) +* The scalar factors of the elementary reflectors, stored in +* TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'. +* See Further Details. +* +* W (output) COMPLEX array, dimension (LDW,NB) +* The n-by-nb matrix W required to update the unreduced part +* of A. +* +* LDW (input) INTEGER +* The leading dimension of the array W. LDW >= max(1,N). +* +* Further Details +* =============== +* +* If UPLO = 'U', the matrix Q is represented as a product of elementary +* reflectors +* +* Q = H(n) H(n-1) . . . H(n-nb+1). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i), +* and tau in TAU(i-1). +* +* If UPLO = 'L', the matrix Q is represented as a product of elementary +* reflectors +* +* Q = H(1) H(2) . . . H(nb). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), +* and tau in TAU(i). +* +* The elements of the vectors v together form the n-by-nb matrix V +* which is needed, with W, to apply the transformation to the unreduced +* part of the matrix, using a Hermitian rank-2k update of the form: +* A := A - V*W' - W*V'. +* +* The contents of A on exit are illustrated by the following examples +* with n = 5 and nb = 2: +* +* if UPLO = 'U': if UPLO = 'L': +* +* ( a a a v4 v5 ) ( d ) +* ( a a v4 v5 ) ( 1 d ) +* ( a 1 v5 ) ( v1 1 a ) +* ( d 1 ) ( v1 v2 a a ) +* ( d ) ( v1 v2 a a a ) +* +* where d denotes a diagonal element of the reduced matrix, a denotes +* an element of the original matrix that is unchanged, and vi denotes +* an element of the vector defining H(i). +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO, ONE, HALF + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), + $ ONE = ( 1.0E+0, 0.0E+0 ), + $ HALF = ( 0.5E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, IW + COMPLEX ALPHA +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CGEMV, CHEMV, CLACGV, CLARFG, CSCAL +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX CDOTC + EXTERNAL LSAME, CDOTC +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN, REAL +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Reduce last NB columns of upper triangle +* + DO 10 I = N, N - NB + 1, -1 + IW = I - N + NB + IF( I.LT.N ) THEN +* +* Update A(1:i,i) +* + A( I, I ) = REAL( A( I, I ) ) + CALL CLACGV( N-I, W( I, IW+1 ), LDW ) + CALL CGEMV( 'No transpose', I, N-I, -ONE, A( 1, I+1 ), + $ LDA, W( I, IW+1 ), LDW, ONE, A( 1, I ), 1 ) + CALL CLACGV( N-I, W( I, IW+1 ), LDW ) + CALL CLACGV( N-I, A( I, I+1 ), LDA ) + CALL CGEMV( 'No transpose', I, N-I, -ONE, W( 1, IW+1 ), + $ LDW, A( I, I+1 ), LDA, ONE, A( 1, I ), 1 ) + CALL CLACGV( N-I, A( I, I+1 ), LDA ) + A( I, I ) = REAL( A( I, I ) ) + END IF + IF( I.GT.1 ) THEN +* +* Generate elementary reflector H(i) to annihilate +* A(1:i-2,i) +* + ALPHA = A( I-1, I ) + CALL CLARFG( I-1, ALPHA, A( 1, I ), 1, TAU( I-1 ) ) + E( I-1 ) = ALPHA + A( I-1, I ) = ONE +* +* Compute W(1:i-1,i) +* + CALL CHEMV( 'Upper', I-1, ONE, A, LDA, A( 1, I ), 1, + $ ZERO, W( 1, IW ), 1 ) + IF( I.LT.N ) THEN + CALL CGEMV( 'Conjugate transpose', I-1, N-I, ONE, + $ W( 1, IW+1 ), LDW, A( 1, I ), 1, ZERO, + $ W( I+1, IW ), 1 ) + CALL CGEMV( 'No transpose', I-1, N-I, -ONE, + $ A( 1, I+1 ), LDA, W( I+1, IW ), 1, ONE, + $ W( 1, IW ), 1 ) + CALL CGEMV( 'Conjugate transpose', I-1, N-I, ONE, + $ A( 1, I+1 ), LDA, A( 1, I ), 1, ZERO, + $ W( I+1, IW ), 1 ) + CALL CGEMV( 'No transpose', I-1, N-I, -ONE, + $ W( 1, IW+1 ), LDW, W( I+1, IW ), 1, ONE, + $ W( 1, IW ), 1 ) + END IF + CALL CSCAL( I-1, TAU( I-1 ), W( 1, IW ), 1 ) + ALPHA = -HALF*TAU( I-1 )*CDOTC( I-1, W( 1, IW ), 1, + $ A( 1, I ), 1 ) + CALL CAXPY( I-1, ALPHA, A( 1, I ), 1, W( 1, IW ), 1 ) + END IF +* + 10 CONTINUE + ELSE +* +* Reduce first NB columns of lower triangle +* + DO 20 I = 1, NB +* +* Update A(i:n,i) +* + A( I, I ) = REAL( A( I, I ) ) + CALL CLACGV( I-1, W( I, 1 ), LDW ) + CALL CGEMV( 'No transpose', N-I+1, I-1, -ONE, A( I, 1 ), + $ LDA, W( I, 1 ), LDW, ONE, A( I, I ), 1 ) + CALL CLACGV( I-1, W( I, 1 ), LDW ) + CALL CLACGV( I-1, A( I, 1 ), LDA ) + CALL CGEMV( 'No transpose', N-I+1, I-1, -ONE, W( I, 1 ), + $ LDW, A( I, 1 ), LDA, ONE, A( I, I ), 1 ) + CALL CLACGV( I-1, A( I, 1 ), LDA ) + A( I, I ) = REAL( A( I, I ) ) + IF( I.LT.N ) THEN +* +* Generate elementary reflector H(i) to annihilate +* A(i+2:n,i) +* + ALPHA = A( I+1, I ) + CALL CLARFG( N-I, ALPHA, A( MIN( I+2, N ), I ), 1, + $ TAU( I ) ) + E( I ) = ALPHA + A( I+1, I ) = ONE +* +* Compute W(i+1:n,i) +* + CALL CHEMV( 'Lower', N-I, ONE, A( I+1, I+1 ), LDA, + $ A( I+1, I ), 1, ZERO, W( I+1, I ), 1 ) + CALL CGEMV( 'Conjugate transpose', N-I, I-1, ONE, + $ W( I+1, 1 ), LDW, A( I+1, I ), 1, ZERO, + $ W( 1, I ), 1 ) + CALL CGEMV( 'No transpose', N-I, I-1, -ONE, A( I+1, 1 ), + $ LDA, W( 1, I ), 1, ONE, W( I+1, I ), 1 ) + CALL CGEMV( 'Conjugate transpose', N-I, I-1, ONE, + $ A( I+1, 1 ), LDA, A( I+1, I ), 1, ZERO, + $ W( 1, I ), 1 ) + CALL CGEMV( 'No transpose', N-I, I-1, -ONE, W( I+1, 1 ), + $ LDW, W( 1, I ), 1, ONE, W( I+1, I ), 1 ) + CALL CSCAL( N-I, TAU( I ), W( I+1, I ), 1 ) + ALPHA = -HALF*TAU( I )*CDOTC( N-I, W( I+1, I ), 1, + $ A( I+1, I ), 1 ) + CALL CAXPY( N-I, ALPHA, A( I+1, I ), 1, W( I+1, I ), 1 ) + END IF +* + 20 CONTINUE + END IF +* + RETURN +* +* End of CLATRD +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/clatrs.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/clatrs.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,879 @@ + SUBROUTINE CLATRS( UPLO, TRANS, DIAG, NORMIN, N, A, LDA, X, SCALE, + $ CNORM, INFO ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORMIN, TRANS, UPLO + INTEGER INFO, LDA, N + REAL SCALE +* .. +* .. Array Arguments .. + REAL CNORM( * ) + COMPLEX A( LDA, * ), X( * ) +* .. +* +* Purpose +* ======= +* +* CLATRS solves one of the triangular systems +* +* A * x = s*b, A**T * x = s*b, or A**H * x = s*b, +* +* with scaling to prevent overflow. Here A is an upper or lower +* triangular matrix, A**T denotes the transpose of A, A**H denotes the +* conjugate transpose of A, x and b are n-element vectors, and s is a +* scaling factor, usually less than or equal to 1, chosen so that the +* components of x will be less than the overflow threshold. If the +* unscaled problem will not cause overflow, the Level 2 BLAS routine +* CTRSV is called. If the matrix A is singular (A(j,j) = 0 for some j), +* then s is set to 0 and a non-trivial solution to A*x = 0 is returned. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the matrix A is upper or lower triangular. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* TRANS (input) CHARACTER*1 +* Specifies the operation applied to A. +* = 'N': Solve A * x = s*b (No transpose) +* = 'T': Solve A**T * x = s*b (Transpose) +* = 'C': Solve A**H * x = s*b (Conjugate transpose) +* +* DIAG (input) CHARACTER*1 +* Specifies whether or not the matrix A is unit triangular. +* = 'N': Non-unit triangular +* = 'U': Unit triangular +* +* NORMIN (input) CHARACTER*1 +* Specifies whether CNORM has been set or not. +* = 'Y': CNORM contains the column norms on entry +* = 'N': CNORM is not set on entry. On exit, the norms will +* be computed and stored in CNORM. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input) COMPLEX array, dimension (LDA,N) +* The triangular matrix A. If UPLO = 'U', the leading n by n +* upper triangular part of the array A contains the upper +* triangular matrix, and the strictly lower triangular part of +* A is not referenced. If UPLO = 'L', the leading n by n lower +* triangular part of the array A contains the lower triangular +* matrix, and the strictly upper triangular part of A is not +* referenced. If DIAG = 'U', the diagonal elements of A are +* also not referenced and are assumed to be 1. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max (1,N). +* +* X (input/output) COMPLEX array, dimension (N) +* On entry, the right hand side b of the triangular system. +* On exit, X is overwritten by the solution vector x. +* +* SCALE (output) REAL +* The scaling factor s for the triangular system +* A * x = s*b, A**T * x = s*b, or A**H * x = s*b. +* If SCALE = 0, the matrix A is singular or badly scaled, and +* the vector x is an exact or approximate solution to A*x = 0. +* +* CNORM (input or output) REAL array, dimension (N) +* +* If NORMIN = 'Y', CNORM is an input argument and CNORM(j) +* contains the norm of the off-diagonal part of the j-th column +* of A. If TRANS = 'N', CNORM(j) must be greater than or equal +* to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) +* must be greater than or equal to the 1-norm. +* +* If NORMIN = 'N', CNORM is an output argument and CNORM(j) +* returns the 1-norm of the offdiagonal part of the j-th column +* of A. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* +* Further Details +* ======= ======= +* +* A rough bound on x is computed; if that is less than overflow, CTRSV +* is called, otherwise, specific code is used which checks for possible +* overflow or divide-by-zero at every operation. +* +* A columnwise scheme is used for solving A*x = b. The basic algorithm +* if A is lower triangular is +* +* x[1:n] := b[1:n] +* for j = 1, ..., n +* x(j) := x(j) / A(j,j) +* x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] +* end +* +* Define bounds on the components of x after j iterations of the loop: +* M(j) = bound on x[1:j] +* G(j) = bound on x[j+1:n] +* Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. +* +* Then for iteration j+1 we have +* M(j+1) <= G(j) / | A(j+1,j+1) | +* G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | +* <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) +* +* where CNORM(j+1) is greater than or equal to the infinity-norm of +* column j+1 of A, not counting the diagonal. Hence +* +* G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) +* 1<=i<=j +* and +* +* |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) +* 1<=i< j +* +* Since |x(j)| <= M(j), we use the Level 2 BLAS routine CTRSV if the +* reciprocal of the largest M(j), j=1,..,n, is larger than +* max(underflow, 1/overflow). +* +* The bound on x(j) is also used to determine when a step in the +* columnwise method can be performed without fear of overflow. If +* the computed bound is greater than a large constant, x is scaled to +* prevent overflow, but if the bound overflows, x is set to 0, x(j) to +* 1, and scale to 0, and a non-trivial solution to A*x = 0 is found. +* +* Similarly, a row-wise scheme is used to solve A**T *x = b or +* A**H *x = b. The basic algorithm for A upper triangular is +* +* for j = 1, ..., n +* x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j) +* end +* +* We simultaneously compute two bounds +* G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j +* M(j) = bound on x(i), 1<=i<=j +* +* The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we +* add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. +* Then the bound on x(j) is +* +* M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | +* +* <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) +* 1<=i<=j +* +* and we can safely call CTRSV if 1/M(n) and 1/G(n) are both greater +* than max(underflow, 1/overflow). +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, HALF, ONE, TWO + PARAMETER ( ZERO = 0.0E+0, HALF = 0.5E+0, ONE = 1.0E+0, + $ TWO = 2.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + INTEGER I, IMAX, J, JFIRST, JINC, JLAST + REAL BIGNUM, GROW, REC, SMLNUM, TJJ, TMAX, TSCAL, + $ XBND, XJ, XMAX + COMPLEX CSUMJ, TJJS, USCAL, ZDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX, ISAMAX + REAL SCASUM, SLAMCH + COMPLEX CDOTC, CDOTU, CLADIV + EXTERNAL LSAME, ICAMAX, ISAMAX, SCASUM, SLAMCH, CDOTC, + $ CDOTU, CLADIV +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CSSCAL, CTRSV, SLABAD, SSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, CONJG, MAX, MIN, REAL +* .. +* .. Statement Functions .. + REAL CABS1, CABS2 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) + CABS2( ZDUM ) = ABS( REAL( ZDUM ) / 2. ) + + $ ABS( AIMAG( ZDUM ) / 2. ) +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* +* Test the input parameters. +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT. + $ LSAME( NORMIN, 'N' ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLATRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine machine dependent parameters to control overflow. +* + SMLNUM = SLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) + SMLNUM = SMLNUM / SLAMCH( 'Precision' ) + BIGNUM = ONE / SMLNUM + SCALE = ONE +* + IF( LSAME( NORMIN, 'N' ) ) THEN +* +* Compute the 1-norm of each column, not including the diagonal. +* + IF( UPPER ) THEN +* +* A is upper triangular. +* + DO 10 J = 1, N + CNORM( J ) = SCASUM( J-1, A( 1, J ), 1 ) + 10 CONTINUE + ELSE +* +* A is lower triangular. +* + DO 20 J = 1, N - 1 + CNORM( J ) = SCASUM( N-J, A( J+1, J ), 1 ) + 20 CONTINUE + CNORM( N ) = ZERO + END IF + END IF +* +* Scale the column norms by TSCAL if the maximum element in CNORM is +* greater than BIGNUM/2. +* + IMAX = ISAMAX( N, CNORM, 1 ) + TMAX = CNORM( IMAX ) + IF( TMAX.LE.BIGNUM*HALF ) THEN + TSCAL = ONE + ELSE + TSCAL = HALF / ( SMLNUM*TMAX ) + CALL SSCAL( N, TSCAL, CNORM, 1 ) + END IF +* +* Compute a bound on the computed solution vector to see if the +* Level 2 BLAS routine CTRSV can be used. +* + XMAX = ZERO + DO 30 J = 1, N + XMAX = MAX( XMAX, CABS2( X( J ) ) ) + 30 CONTINUE + XBND = XMAX +* + IF( NOTRAN ) THEN +* +* Compute the growth in A * x = b. +* + IF( UPPER ) THEN + JFIRST = N + JLAST = 1 + JINC = -1 + ELSE + JFIRST = 1 + JLAST = N + JINC = 1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 60 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, G(0) = max{x(i), i=1,...,n}. +* + GROW = HALF / MAX( XBND, SMLNUM ) + XBND = GROW + DO 40 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 60 +* + TJJS = A( J, J ) + TJJ = CABS1( TJJS ) +* + IF( TJJ.GE.SMLNUM ) THEN +* +* M(j) = G(j-1) / abs(A(j,j)) +* + XBND = MIN( XBND, MIN( ONE, TJJ )*GROW ) + ELSE +* +* M(j) could overflow, set XBND to 0. +* + XBND = ZERO + END IF +* + IF( TJJ+CNORM( J ).GE.SMLNUM ) THEN +* +* G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) +* + GROW = GROW*( TJJ / ( TJJ+CNORM( J ) ) ) + ELSE +* +* G(j) could overflow, set GROW to 0. +* + GROW = ZERO + END IF + 40 CONTINUE + GROW = XBND + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, HALF / MAX( XBND, SMLNUM ) ) + DO 50 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 60 +* +* G(j) = G(j-1)*( 1 + CNORM(j) ) +* + GROW = GROW*( ONE / ( ONE+CNORM( J ) ) ) + 50 CONTINUE + END IF + 60 CONTINUE +* + ELSE +* +* Compute the growth in A**T * x = b or A**H * x = b. +* + IF( UPPER ) THEN + JFIRST = 1 + JLAST = N + JINC = 1 + ELSE + JFIRST = N + JLAST = 1 + JINC = -1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 90 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, M(0) = max{x(i), i=1,...,n}. +* + GROW = HALF / MAX( XBND, SMLNUM ) + XBND = GROW + DO 70 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 90 +* +* G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) +* + XJ = ONE + CNORM( J ) + GROW = MIN( GROW, XBND / XJ ) +* + TJJS = A( J, J ) + TJJ = CABS1( TJJS ) +* + IF( TJJ.GE.SMLNUM ) THEN +* +* M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) +* + IF( XJ.GT.TJJ ) + $ XBND = XBND*( TJJ / XJ ) + ELSE +* +* M(j) could overflow, set XBND to 0. +* + XBND = ZERO + END IF + 70 CONTINUE + GROW = MIN( GROW, XBND ) + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, HALF / MAX( XBND, SMLNUM ) ) + DO 80 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 90 +* +* G(j) = ( 1 + CNORM(j) )*G(j-1) +* + XJ = ONE + CNORM( J ) + GROW = GROW / XJ + 80 CONTINUE + END IF + 90 CONTINUE + END IF +* + IF( ( GROW*TSCAL ).GT.SMLNUM ) THEN +* +* Use the Level 2 BLAS solve if the reciprocal of the bound on +* elements of X is not too small. +* + CALL CTRSV( UPLO, TRANS, DIAG, N, A, LDA, X, 1 ) + ELSE +* +* Use a Level 1 BLAS solve, scaling intermediate results. +* + IF( XMAX.GT.BIGNUM*HALF ) THEN +* +* Scale X so that its components are less than or equal to +* BIGNUM in absolute value. +* + SCALE = ( BIGNUM*HALF ) / XMAX + CALL CSSCAL( N, SCALE, X, 1 ) + XMAX = BIGNUM + ELSE + XMAX = XMAX*TWO + END IF +* + IF( NOTRAN ) THEN +* +* Solve A * x = b +* + DO 110 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) / A(j,j), scaling x if necessary. +* + XJ = CABS1( X( J ) ) + IF( NOUNIT ) THEN + TJJS = A( J, J )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 105 + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by 1/b(j). +* + REC = ONE / XJ + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = CLADIV( X( J ), TJJS ) + XJ = CABS1( X( J ) ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM +* to avoid overflow when dividing by A(j,j). +* + REC = ( TJJ*BIGNUM ) / XJ + IF( CNORM( J ).GT.ONE ) THEN +* +* Scale by 1/CNORM(j) to avoid overflow when +* multiplying x(j) times column j. +* + REC = REC / CNORM( J ) + END IF + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = CLADIV( X( J ), TJJS ) + XJ = CABS1( X( J ) ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0, and compute a solution to A*x = 0. +* + DO 100 I = 1, N + X( I ) = ZERO + 100 CONTINUE + X( J ) = ONE + XJ = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 105 CONTINUE +* +* Scale x if necessary to avoid overflow when adding a +* multiple of column j of A. +* + IF( XJ.GT.ONE ) THEN + REC = ONE / XJ + IF( CNORM( J ).GT.( BIGNUM-XMAX )*REC ) THEN +* +* Scale x by 1/(2*abs(x(j))). +* + REC = REC*HALF + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + END IF + ELSE IF( XJ*CNORM( J ).GT.( BIGNUM-XMAX ) ) THEN +* +* Scale x by 1/2. +* + CALL CSSCAL( N, HALF, X, 1 ) + SCALE = SCALE*HALF + END IF +* + IF( UPPER ) THEN + IF( J.GT.1 ) THEN +* +* Compute the update +* x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j) +* + CALL CAXPY( J-1, -X( J )*TSCAL, A( 1, J ), 1, X, + $ 1 ) + I = ICAMAX( J-1, X, 1 ) + XMAX = CABS1( X( I ) ) + END IF + ELSE + IF( J.LT.N ) THEN +* +* Compute the update +* x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j) +* + CALL CAXPY( N-J, -X( J )*TSCAL, A( J+1, J ), 1, + $ X( J+1 ), 1 ) + I = J + ICAMAX( N-J, X( J+1 ), 1 ) + XMAX = CABS1( X( I ) ) + END IF + END IF + 110 CONTINUE +* + ELSE IF( LSAME( TRANS, 'T' ) ) THEN +* +* Solve A**T * x = b +* + DO 150 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) - sum A(k,j)*x(k). +* k<>j +* + XJ = CABS1( X( J ) ) + USCAL = TSCAL + REC = ONE / MAX( XMAX, ONE ) + IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN +* +* If x(j) could overflow, scale x by 1/(2*XMAX). +* + REC = REC*HALF + IF( NOUNIT ) THEN + TJJS = A( J, J )*TSCAL + ELSE + TJJS = TSCAL + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.ONE ) THEN +* +* Divide by A(j,j) when scaling x if A(j,j) > 1. +* + REC = MIN( ONE, REC*TJJ ) + USCAL = CLADIV( USCAL, TJJS ) + END IF + IF( REC.LT.ONE ) THEN + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + CSUMJ = ZERO + IF( USCAL.EQ.CMPLX( ONE ) ) THEN +* +* If the scaling needed for A in the dot product is 1, +* call CDOTU to perform the dot product. +* + IF( UPPER ) THEN + CSUMJ = CDOTU( J-1, A( 1, J ), 1, X, 1 ) + ELSE IF( J.LT.N ) THEN + CSUMJ = CDOTU( N-J, A( J+1, J ), 1, X( J+1 ), 1 ) + END IF + ELSE +* +* Otherwise, use in-line code for the dot product. +* + IF( UPPER ) THEN + DO 120 I = 1, J - 1 + CSUMJ = CSUMJ + ( A( I, J )*USCAL )*X( I ) + 120 CONTINUE + ELSE IF( J.LT.N ) THEN + DO 130 I = J + 1, N + CSUMJ = CSUMJ + ( A( I, J )*USCAL )*X( I ) + 130 CONTINUE + END IF + END IF +* + IF( USCAL.EQ.CMPLX( TSCAL ) ) THEN +* +* Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) +* was not used to scale the dotproduct. +* + X( J ) = X( J ) - CSUMJ + XJ = CABS1( X( J ) ) + IF( NOUNIT ) THEN + TJJS = A( J, J )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 145 + END IF +* +* Compute x(j) = x(j) / A(j,j), scaling if necessary. +* + TJJ = CABS1( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale X by 1/abs(x(j)). +* + REC = ONE / XJ + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = CLADIV( X( J ), TJJS ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. +* + REC = ( TJJ*BIGNUM ) / XJ + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = CLADIV( X( J ), TJJS ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0 and compute a solution to A**T *x = 0. +* + DO 140 I = 1, N + X( I ) = ZERO + 140 CONTINUE + X( J ) = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 145 CONTINUE + ELSE +* +* Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot +* product has already been divided by 1/A(j,j). +* + X( J ) = CLADIV( X( J ), TJJS ) - CSUMJ + END IF + XMAX = MAX( XMAX, CABS1( X( J ) ) ) + 150 CONTINUE +* + ELSE +* +* Solve A**H * x = b +* + DO 190 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) - sum A(k,j)*x(k). +* k<>j +* + XJ = CABS1( X( J ) ) + USCAL = TSCAL + REC = ONE / MAX( XMAX, ONE ) + IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN +* +* If x(j) could overflow, scale x by 1/(2*XMAX). +* + REC = REC*HALF + IF( NOUNIT ) THEN + TJJS = CONJG( A( J, J ) )*TSCAL + ELSE + TJJS = TSCAL + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.ONE ) THEN +* +* Divide by A(j,j) when scaling x if A(j,j) > 1. +* + REC = MIN( ONE, REC*TJJ ) + USCAL = CLADIV( USCAL, TJJS ) + END IF + IF( REC.LT.ONE ) THEN + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + CSUMJ = ZERO + IF( USCAL.EQ.CMPLX( ONE ) ) THEN +* +* If the scaling needed for A in the dot product is 1, +* call CDOTC to perform the dot product. +* + IF( UPPER ) THEN + CSUMJ = CDOTC( J-1, A( 1, J ), 1, X, 1 ) + ELSE IF( J.LT.N ) THEN + CSUMJ = CDOTC( N-J, A( J+1, J ), 1, X( J+1 ), 1 ) + END IF + ELSE +* +* Otherwise, use in-line code for the dot product. +* + IF( UPPER ) THEN + DO 160 I = 1, J - 1 + CSUMJ = CSUMJ + ( CONJG( A( I, J ) )*USCAL )* + $ X( I ) + 160 CONTINUE + ELSE IF( J.LT.N ) THEN + DO 170 I = J + 1, N + CSUMJ = CSUMJ + ( CONJG( A( I, J ) )*USCAL )* + $ X( I ) + 170 CONTINUE + END IF + END IF +* + IF( USCAL.EQ.CMPLX( TSCAL ) ) THEN +* +* Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) +* was not used to scale the dotproduct. +* + X( J ) = X( J ) - CSUMJ + XJ = CABS1( X( J ) ) + IF( NOUNIT ) THEN + TJJS = CONJG( A( J, J ) )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 185 + END IF +* +* Compute x(j) = x(j) / A(j,j), scaling if necessary. +* + TJJ = CABS1( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale X by 1/abs(x(j)). +* + REC = ONE / XJ + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = CLADIV( X( J ), TJJS ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. +* + REC = ( TJJ*BIGNUM ) / XJ + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = CLADIV( X( J ), TJJS ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0 and compute a solution to A**H *x = 0. +* + DO 180 I = 1, N + X( I ) = ZERO + 180 CONTINUE + X( J ) = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 185 CONTINUE + ELSE +* +* Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot +* product has already been divided by 1/A(j,j). +* + X( J ) = CLADIV( X( J ), TJJS ) - CSUMJ + END IF + XMAX = MAX( XMAX, CABS1( X( J ) ) ) + 190 CONTINUE + END IF + SCALE = SCALE / TSCAL + END IF +* +* Scale the column norms by 1/TSCAL for return. +* + IF( TSCAL.NE.ONE ) THEN + CALL SSCAL( N, ONE / TSCAL, CNORM, 1 ) + END IF +* + RETURN +* +* End of CLATRS +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/clatrz.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/clatrz.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,133 @@ + SUBROUTINE CLATRZ( M, N, L, A, LDA, TAU, WORK ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER L, LDA, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CLATRZ factors the M-by-(M+L) complex upper trapezoidal matrix +* [ A1 A2 ] = [ A(1:M,1:M) A(1:M,N-L+1:N) ] as ( R 0 ) * Z by means +* of unitary transformations, where Z is an (M+L)-by-(M+L) unitary +* matrix and, R and A1 are M-by-M upper triangular matrices. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* L (input) INTEGER +* The number of columns of the matrix A containing the +* meaningful part of the Householder vectors. N-M >= L >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the leading M-by-N upper trapezoidal part of the +* array A must contain the matrix to be factorized. +* On exit, the leading M-by-M upper triangular part of A +* contains the upper triangular matrix R, and elements N-L+1 to +* N of the first M rows of A, with the array TAU, represent the +* unitary matrix Z as a product of M elementary reflectors. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) COMPLEX array, dimension (M) +* The scalar factors of the elementary reflectors. +* +* WORK (workspace) COMPLEX array, dimension (M) +* +* Further Details +* =============== +* +* Based on contributions by +* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +* The factorization is obtained by Householder's method. The kth +* transformation matrix, Z( k ), which is used to introduce zeros into +* the ( m - k + 1 )th row of A, is given in the form +* +* Z( k ) = ( I 0 ), +* ( 0 T( k ) ) +* +* where +* +* T( k ) = I - tau*u( k )*u( k )', u( k ) = ( 1 ), +* ( 0 ) +* ( z( k ) ) +* +* tau is a scalar and z( k ) is an l element vector. tau and z( k ) +* are chosen to annihilate the elements of the kth row of A2. +* +* The scalar tau is returned in the kth element of TAU and the vector +* u( k ) in the kth row of A2, such that the elements of z( k ) are +* in a( k, l + 1 ), ..., a( k, n ). The elements of R are returned in +* the upper triangular part of A1. +* +* Z is given by +* +* Z = Z( 1 ) * Z( 2 ) * ... * Z( m ). +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I + COMPLEX ALPHA +* .. +* .. External Subroutines .. + EXTERNAL CLACGV, CLARFG, CLARZ +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.EQ.0 ) THEN + RETURN + ELSE IF( M.EQ.N ) THEN + DO 10 I = 1, N + TAU( I ) = ZERO + 10 CONTINUE + RETURN + END IF +* + DO 20 I = M, 1, -1 +* +* Generate elementary reflector H(i) to annihilate +* [ A(i,i) A(i,n-l+1:n) ] +* + CALL CLACGV( L, A( I, N-L+1 ), LDA ) + ALPHA = CONJG( A( I, I ) ) + CALL CLARFG( L+1, ALPHA, A( I, N-L+1 ), LDA, TAU( I ) ) + TAU( I ) = CONJG( TAU( I ) ) +* +* Apply H(i) to A(1:i-1,i:n) from the right +* + CALL CLARZ( 'Right', I-1, N-I+1, L, A( I, N-L+1 ), LDA, + $ CONJG( TAU( I ) ), A( 1, I ), LDA, WORK ) + A( I, I ) = CONJG( ALPHA ) +* + 20 CONTINUE +* + RETURN +* +* End of CLATRZ +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/clauu2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/clauu2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,143 @@ + SUBROUTINE CLAUU2( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* CLAUU2 computes the product U * U' or L' * L, where the triangular +* factor U or L is stored in the upper or lower triangular part of +* the array A. +* +* If UPLO = 'U' or 'u' then the upper triangle of the result is stored, +* overwriting the factor U in A. +* If UPLO = 'L' or 'l' then the lower triangle of the result is stored, +* overwriting the factor L in A. +* +* This is the unblocked form of the algorithm, calling Level 2 BLAS. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the triangular factor stored in the array A +* is upper or lower triangular: +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the triangular factor U or L. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the triangular factor U or L. +* On exit, if UPLO = 'U', the upper triangle of A is +* overwritten with the upper triangle of the product U * U'; +* if UPLO = 'L', the lower triangle of A is overwritten with +* the lower triangle of the product L' * L. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I + REAL AII +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX CDOTC + EXTERNAL LSAME, CDOTC +* .. +* .. External Subroutines .. + EXTERNAL CGEMV, CLACGV, CSSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CMPLX, MAX, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLAUU2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Compute the product U * U'. +* + DO 10 I = 1, N + AII = A( I, I ) + IF( I.LT.N ) THEN + A( I, I ) = AII*AII + REAL( CDOTC( N-I, A( I, I+1 ), LDA, + $ A( I, I+1 ), LDA ) ) + CALL CLACGV( N-I, A( I, I+1 ), LDA ) + CALL CGEMV( 'No transpose', I-1, N-I, ONE, A( 1, I+1 ), + $ LDA, A( I, I+1 ), LDA, CMPLX( AII ), + $ A( 1, I ), 1 ) + CALL CLACGV( N-I, A( I, I+1 ), LDA ) + ELSE + CALL CSSCAL( I, AII, A( 1, I ), 1 ) + END IF + 10 CONTINUE +* + ELSE +* +* Compute the product L' * L. +* + DO 20 I = 1, N + AII = A( I, I ) + IF( I.LT.N ) THEN + A( I, I ) = AII*AII + REAL( CDOTC( N-I, A( I+1, I ), 1, + $ A( I+1, I ), 1 ) ) + CALL CLACGV( I-1, A( I, 1 ), LDA ) + CALL CGEMV( 'Conjugate transpose', N-I, I-1, ONE, + $ A( I+1, 1 ), LDA, A( I+1, I ), 1, + $ CMPLX( AII ), A( I, 1 ), LDA ) + CALL CLACGV( I-1, A( I, 1 ), LDA ) + ELSE + CALL CSSCAL( I, AII, A( I, 1 ), LDA ) + END IF + 20 CONTINUE + END IF +* + RETURN +* +* End of CLAUU2 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/clauum.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/clauum.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,160 @@ + SUBROUTINE CLAUUM( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* CLAUUM computes the product U * U' or L' * L, where the triangular +* factor U or L is stored in the upper or lower triangular part of +* the array A. +* +* If UPLO = 'U' or 'u' then the upper triangle of the result is stored, +* overwriting the factor U in A. +* If UPLO = 'L' or 'l' then the lower triangle of the result is stored, +* overwriting the factor L in A. +* +* This is the blocked form of the algorithm, calling Level 3 BLAS. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the triangular factor stored in the array A +* is upper or lower triangular: +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the triangular factor U or L. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the triangular factor U or L. +* On exit, if UPLO = 'U', the upper triangle of A is +* overwritten with the upper triangle of the product U * U'; +* if UPLO = 'L', the lower triangle of A is overwritten with +* the lower triangle of the product L' * L. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IB, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CGEMM, CHERK, CLAUU2, CTRMM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLAUUM', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'CLAUUM', UPLO, N, -1, -1, -1 ) +* + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code +* + CALL CLAUU2( UPLO, N, A, LDA, INFO ) + ELSE +* +* Use blocked code +* + IF( UPPER ) THEN +* +* Compute the product U * U'. +* + DO 10 I = 1, N, NB + IB = MIN( NB, N-I+1 ) + CALL CTRMM( 'Right', 'Upper', 'Conjugate transpose', + $ 'Non-unit', I-1, IB, CONE, A( I, I ), LDA, + $ A( 1, I ), LDA ) + CALL CLAUU2( 'Upper', IB, A( I, I ), LDA, INFO ) + IF( I+IB.LE.N ) THEN + CALL CGEMM( 'No transpose', 'Conjugate transpose', + $ I-1, IB, N-I-IB+1, CONE, A( 1, I+IB ), + $ LDA, A( I, I+IB ), LDA, CONE, A( 1, I ), + $ LDA ) + CALL CHERK( 'Upper', 'No transpose', IB, N-I-IB+1, + $ ONE, A( I, I+IB ), LDA, ONE, A( I, I ), + $ LDA ) + END IF + 10 CONTINUE + ELSE +* +* Compute the product L' * L. +* + DO 20 I = 1, N, NB + IB = MIN( NB, N-I+1 ) + CALL CTRMM( 'Left', 'Lower', 'Conjugate transpose', + $ 'Non-unit', IB, I-1, CONE, A( I, I ), LDA, + $ A( I, 1 ), LDA ) + CALL CLAUU2( 'Lower', IB, A( I, I ), LDA, INFO ) + IF( I+IB.LE.N ) THEN + CALL CGEMM( 'Conjugate transpose', 'No transpose', IB, + $ I-1, N-I-IB+1, CONE, A( I+IB, I ), LDA, + $ A( I+IB, 1 ), LDA, CONE, A( I, 1 ), LDA ) + CALL CHERK( 'Lower', 'Conjugate transpose', IB, + $ N-I-IB+1, ONE, A( I+IB, I ), LDA, ONE, + $ A( I, I ), LDA ) + END IF + 20 CONTINUE + END IF + END IF +* + RETURN +* +* End of CLAUUM +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cpbcon.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cpbcon.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,198 @@ + SUBROUTINE CPBCON( UPLO, N, KD, AB, LDAB, ANORM, RCOND, WORK, + $ RWORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* Modified to call CLACN2 in place of CLACON, 10 Feb 03, SJH. +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + REAL RWORK( * ) + COMPLEX AB( LDAB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CPBCON estimates the reciprocal of the condition number (in the +* 1-norm) of a complex Hermitian positive definite band matrix using +* the Cholesky factorization A = U**H*U or A = L*L**H computed by +* CPBTRF. +* +* An estimate is obtained for norm(inv(A)), and the reciprocal of the +* condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangular factor stored in AB; +* = 'L': Lower triangular factor stored in AB. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of sub-diagonals if UPLO = 'L'. KD >= 0. +* +* AB (input) COMPLEX array, dimension (LDAB,N) +* The triangular factor U or L from the Cholesky factorization +* A = U**H*U or A = L*L**H of the band matrix A, stored in the +* first KD+1 rows of the array. The j-th column of U or L is +* stored in the j-th column of the array AB as follows: +* if UPLO ='U', AB(kd+1+i-j,j) = U(i,j) for max(1,j-kd)<=i<=j; +* if UPLO ='L', AB(1+i-j,j) = L(i,j) for j<=i<=min(n,j+kd). +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* ANORM (input) REAL +* The 1-norm (or infinity-norm) of the Hermitian band matrix A. +* +* RCOND (output) REAL +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +* estimate of the 1-norm of inv(A) computed in this routine. +* +* WORK (workspace) COMPLEX array, dimension (2*N) +* +* RWORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + CHARACTER NORMIN + INTEGER IX, KASE + REAL AINVNM, SCALE, SCALEL, SCALEU, SMLNUM + COMPLEX ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX + REAL SLAMCH + EXTERNAL LSAME, ICAMAX, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CLACN2, CLATBS, CSRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -5 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPBCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* + SMLNUM = SLAMCH( 'Safe minimum' ) +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + NORMIN = 'N' + 10 CONTINUE + CALL CLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( UPPER ) THEN +* +* Multiply by inv(U'). +* + CALL CLATBS( 'Upper', 'Conjugate transpose', 'Non-unit', + $ NORMIN, N, KD, AB, LDAB, WORK, SCALEL, RWORK, + $ INFO ) + NORMIN = 'Y' +* +* Multiply by inv(U). +* + CALL CLATBS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N, + $ KD, AB, LDAB, WORK, SCALEU, RWORK, INFO ) + ELSE +* +* Multiply by inv(L). +* + CALL CLATBS( 'Lower', 'No transpose', 'Non-unit', NORMIN, N, + $ KD, AB, LDAB, WORK, SCALEL, RWORK, INFO ) + NORMIN = 'Y' +* +* Multiply by inv(L'). +* + CALL CLATBS( 'Lower', 'Conjugate transpose', 'Non-unit', + $ NORMIN, N, KD, AB, LDAB, WORK, SCALEU, RWORK, + $ INFO ) + END IF +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + SCALE = SCALEL*SCALEU + IF( SCALE.NE.ONE ) THEN + IX = ICAMAX( N, WORK, 1 ) + IF( SCALE.LT.CABS1( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL CSRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + 20 CONTINUE +* + RETURN +* +* End of CPBCON +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cpbtf2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cpbtf2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,200 @@ + SUBROUTINE CPBTF2( UPLO, N, KD, AB, LDAB, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, N +* .. +* .. Array Arguments .. + COMPLEX AB( LDAB, * ) +* .. +* +* Purpose +* ======= +* +* CPBTF2 computes the Cholesky factorization of a complex Hermitian +* positive definite band matrix A. +* +* The factorization has the form +* A = U' * U , if UPLO = 'U', or +* A = L * L', if UPLO = 'L', +* where U is an upper triangular matrix, U' is the conjugate transpose +* of U, and L is lower triangular. +* +* This is the unblocked version of the algorithm, calling Level 2 BLAS. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* Hermitian matrix A is stored: +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of super-diagonals of the matrix A if UPLO = 'U', +* or the number of sub-diagonals if UPLO = 'L'. KD >= 0. +* +* AB (input/output) COMPLEX array, dimension (LDAB,N) +* On entry, the upper or lower triangle of the Hermitian band +* matrix A, stored in the first KD+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* +* On exit, if INFO = 0, the triangular factor U or L from the +* Cholesky factorization A = U'*U or A = L*L' of the band +* matrix A, in the same storage format as A. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* > 0: if INFO = k, the leading minor of order k is not +* positive definite, and the factorization could not be +* completed. +* +* Further Details +* =============== +* +* The band storage scheme is illustrated by the following example, when +* N = 6, KD = 2, and UPLO = 'U': +* +* On entry: On exit: +* +* * * a13 a24 a35 a46 * * u13 u24 u35 u46 +* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +* +* Similarly, if UPLO = 'L' the format of A is as follows: +* +* On entry: On exit: +* +* a11 a22 a33 a44 a55 a66 l11 l22 l33 l44 l55 l66 +* a21 a32 a43 a54 a65 * l21 l32 l43 l54 l65 * +* a31 a42 a53 a64 * * l31 l42 l53 l64 * * +* +* Array elements marked * are not used by the routine. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, KLD, KN + REAL AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CHER, CLACGV, CSSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, REAL, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPBTF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + KLD = MAX( 1, LDAB-1 ) +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization A = U'*U. +* + DO 10 J = 1, N +* +* Compute U(J,J) and test for non-positive-definiteness. +* + AJJ = REAL( AB( KD+1, J ) ) + IF( AJJ.LE.ZERO ) THEN + AB( KD+1, J ) = AJJ + GO TO 30 + END IF + AJJ = SQRT( AJJ ) + AB( KD+1, J ) = AJJ +* +* Compute elements J+1:J+KN of row J and update the +* trailing submatrix within the band. +* + KN = MIN( KD, N-J ) + IF( KN.GT.0 ) THEN + CALL CSSCAL( KN, ONE / AJJ, AB( KD, J+1 ), KLD ) + CALL CLACGV( KN, AB( KD, J+1 ), KLD ) + CALL CHER( 'Upper', KN, -ONE, AB( KD, J+1 ), KLD, + $ AB( KD+1, J+1 ), KLD ) + CALL CLACGV( KN, AB( KD, J+1 ), KLD ) + END IF + 10 CONTINUE + ELSE +* +* Compute the Cholesky factorization A = L*L'. +* + DO 20 J = 1, N +* +* Compute L(J,J) and test for non-positive-definiteness. +* + AJJ = REAL( AB( 1, J ) ) + IF( AJJ.LE.ZERO ) THEN + AB( 1, J ) = AJJ + GO TO 30 + END IF + AJJ = SQRT( AJJ ) + AB( 1, J ) = AJJ +* +* Compute elements J+1:J+KN of column J and update the +* trailing submatrix within the band. +* + KN = MIN( KD, N-J ) + IF( KN.GT.0 ) THEN + CALL CSSCAL( KN, ONE / AJJ, AB( 2, J ), 1 ) + CALL CHER( 'Lower', KN, -ONE, AB( 2, J ), 1, + $ AB( 1, J+1 ), KLD ) + END IF + 20 CONTINUE + END IF + RETURN +* + 30 CONTINUE + INFO = J + RETURN +* +* End of CPBTF2 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cpbtrf.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cpbtrf.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,371 @@ + SUBROUTINE CPBTRF( UPLO, N, KD, AB, LDAB, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, N +* .. +* .. Array Arguments .. + COMPLEX AB( LDAB, * ) +* .. +* +* Purpose +* ======= +* +* CPBTRF computes the Cholesky factorization of a complex Hermitian +* positive definite band matrix A. +* +* The factorization has the form +* A = U**H * U, if UPLO = 'U', or +* A = L * L**H, if UPLO = 'L', +* where U is an upper triangular matrix and L is lower triangular. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KD >= 0. +* +* AB (input/output) COMPLEX array, dimension (LDAB,N) +* On entry, the upper or lower triangle of the Hermitian band +* matrix A, stored in the first KD+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* +* On exit, if INFO = 0, the triangular factor U or L from the +* Cholesky factorization A = U**H*U or A = L*L**H of the band +* matrix A, in the same storage format as A. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the leading minor of order i is not +* positive definite, and the factorization could not be +* completed. +* +* Further Details +* =============== +* +* The band storage scheme is illustrated by the following example, when +* N = 6, KD = 2, and UPLO = 'U': +* +* On entry: On exit: +* +* * * a13 a24 a35 a46 * * u13 u24 u35 u46 +* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +* +* Similarly, if UPLO = 'L' the format of A is as follows: +* +* On entry: On exit: +* +* a11 a22 a33 a44 a55 a66 l11 l22 l33 l44 l55 l66 +* a21 a32 a43 a54 a65 * l21 l32 l43 l54 l65 * +* a31 a42 a53 a64 * * l31 l42 l53 l64 * * +* +* Array elements marked * are not used by the routine. +* +* Contributed by +* Peter Mayes and Giuseppe Radicati, IBM ECSEC, Rome, March 23, 1989 +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) + INTEGER NBMAX, LDWORK + PARAMETER ( NBMAX = 32, LDWORK = NBMAX+1 ) +* .. +* .. Local Scalars .. + INTEGER I, I2, I3, IB, II, J, JJ, NB +* .. +* .. Local Arrays .. + COMPLEX WORK( LDWORK, NBMAX ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CGEMM, CHERK, CPBTF2, CPOTF2, CTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( ( .NOT.LSAME( UPLO, 'U' ) ) .AND. + $ ( .NOT.LSAME( UPLO, 'L' ) ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPBTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment +* + NB = ILAENV( 1, 'CPBTRF', UPLO, N, KD, -1, -1 ) +* +* The block size must not exceed the semi-bandwidth KD, and must not +* exceed the limit set by the size of the local array WORK. +* + NB = MIN( NB, NBMAX ) +* + IF( NB.LE.1 .OR. NB.GT.KD ) THEN +* +* Use unblocked code +* + CALL CPBTF2( UPLO, N, KD, AB, LDAB, INFO ) + ELSE +* +* Use blocked code +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Compute the Cholesky factorization of a Hermitian band +* matrix, given the upper triangle of the matrix in band +* storage. +* +* Zero the upper triangle of the work array. +* + DO 20 J = 1, NB + DO 10 I = 1, J - 1 + WORK( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE +* +* Process the band matrix one diagonal block at a time. +* + DO 70 I = 1, N, NB + IB = MIN( NB, N-I+1 ) +* +* Factorize the diagonal block +* + CALL CPOTF2( UPLO, IB, AB( KD+1, I ), LDAB-1, II ) + IF( II.NE.0 ) THEN + INFO = I + II - 1 + GO TO 150 + END IF + IF( I+IB.LE.N ) THEN +* +* Update the relevant part of the trailing submatrix. +* If A11 denotes the diagonal block which has just been +* factorized, then we need to update the remaining +* blocks in the diagram: +* +* A11 A12 A13 +* A22 A23 +* A33 +* +* The numbers of rows and columns in the partitioning +* are IB, I2, I3 respectively. The blocks A12, A22 and +* A23 are empty if IB = KD. The upper triangle of A13 +* lies outside the band. +* + I2 = MIN( KD-IB, N-I-IB+1 ) + I3 = MIN( IB, N-I-KD+1 ) +* + IF( I2.GT.0 ) THEN +* +* Update A12 +* + CALL CTRSM( 'Left', 'Upper', 'Conjugate transpose', + $ 'Non-unit', IB, I2, CONE, + $ AB( KD+1, I ), LDAB-1, + $ AB( KD+1-IB, I+IB ), LDAB-1 ) +* +* Update A22 +* + CALL CHERK( 'Upper', 'Conjugate transpose', I2, IB, + $ -ONE, AB( KD+1-IB, I+IB ), LDAB-1, ONE, + $ AB( KD+1, I+IB ), LDAB-1 ) + END IF +* + IF( I3.GT.0 ) THEN +* +* Copy the lower triangle of A13 into the work array. +* + DO 40 JJ = 1, I3 + DO 30 II = JJ, IB + WORK( II, JJ ) = AB( II-JJ+1, JJ+I+KD-1 ) + 30 CONTINUE + 40 CONTINUE +* +* Update A13 (in the work array). +* + CALL CTRSM( 'Left', 'Upper', 'Conjugate transpose', + $ 'Non-unit', IB, I3, CONE, + $ AB( KD+1, I ), LDAB-1, WORK, LDWORK ) +* +* Update A23 +* + IF( I2.GT.0 ) + $ CALL CGEMM( 'Conjugate transpose', + $ 'No transpose', I2, I3, IB, -CONE, + $ AB( KD+1-IB, I+IB ), LDAB-1, WORK, + $ LDWORK, CONE, AB( 1+IB, I+KD ), + $ LDAB-1 ) +* +* Update A33 +* + CALL CHERK( 'Upper', 'Conjugate transpose', I3, IB, + $ -ONE, WORK, LDWORK, ONE, + $ AB( KD+1, I+KD ), LDAB-1 ) +* +* Copy the lower triangle of A13 back into place. +* + DO 60 JJ = 1, I3 + DO 50 II = JJ, IB + AB( II-JJ+1, JJ+I+KD-1 ) = WORK( II, JJ ) + 50 CONTINUE + 60 CONTINUE + END IF + END IF + 70 CONTINUE + ELSE +* +* Compute the Cholesky factorization of a Hermitian band +* matrix, given the lower triangle of the matrix in band +* storage. +* +* Zero the lower triangle of the work array. +* + DO 90 J = 1, NB + DO 80 I = J + 1, NB + WORK( I, J ) = ZERO + 80 CONTINUE + 90 CONTINUE +* +* Process the band matrix one diagonal block at a time. +* + DO 140 I = 1, N, NB + IB = MIN( NB, N-I+1 ) +* +* Factorize the diagonal block +* + CALL CPOTF2( UPLO, IB, AB( 1, I ), LDAB-1, II ) + IF( II.NE.0 ) THEN + INFO = I + II - 1 + GO TO 150 + END IF + IF( I+IB.LE.N ) THEN +* +* Update the relevant part of the trailing submatrix. +* If A11 denotes the diagonal block which has just been +* factorized, then we need to update the remaining +* blocks in the diagram: +* +* A11 +* A21 A22 +* A31 A32 A33 +* +* The numbers of rows and columns in the partitioning +* are IB, I2, I3 respectively. The blocks A21, A22 and +* A32 are empty if IB = KD. The lower triangle of A31 +* lies outside the band. +* + I2 = MIN( KD-IB, N-I-IB+1 ) + I3 = MIN( IB, N-I-KD+1 ) +* + IF( I2.GT.0 ) THEN +* +* Update A21 +* + CALL CTRSM( 'Right', 'Lower', + $ 'Conjugate transpose', 'Non-unit', I2, + $ IB, CONE, AB( 1, I ), LDAB-1, + $ AB( 1+IB, I ), LDAB-1 ) +* +* Update A22 +* + CALL CHERK( 'Lower', 'No transpose', I2, IB, -ONE, + $ AB( 1+IB, I ), LDAB-1, ONE, + $ AB( 1, I+IB ), LDAB-1 ) + END IF +* + IF( I3.GT.0 ) THEN +* +* Copy the upper triangle of A31 into the work array. +* + DO 110 JJ = 1, IB + DO 100 II = 1, MIN( JJ, I3 ) + WORK( II, JJ ) = AB( KD+1-JJ+II, JJ+I-1 ) + 100 CONTINUE + 110 CONTINUE +* +* Update A31 (in the work array). +* + CALL CTRSM( 'Right', 'Lower', + $ 'Conjugate transpose', 'Non-unit', I3, + $ IB, CONE, AB( 1, I ), LDAB-1, WORK, + $ LDWORK ) +* +* Update A32 +* + IF( I2.GT.0 ) + $ CALL CGEMM( 'No transpose', + $ 'Conjugate transpose', I3, I2, IB, + $ -CONE, WORK, LDWORK, AB( 1+IB, I ), + $ LDAB-1, CONE, AB( 1+KD-IB, I+IB ), + $ LDAB-1 ) +* +* Update A33 +* + CALL CHERK( 'Lower', 'No transpose', I3, IB, -ONE, + $ WORK, LDWORK, ONE, AB( 1, I+KD ), + $ LDAB-1 ) +* +* Copy the upper triangle of A31 back into place. +* + DO 130 JJ = 1, IB + DO 120 II = 1, MIN( JJ, I3 ) + AB( KD+1-JJ+II, JJ+I-1 ) = WORK( II, JJ ) + 120 CONTINUE + 130 CONTINUE + END IF + END IF + 140 CONTINUE + END IF + END IF + RETURN +* + 150 CONTINUE + RETURN +* +* End of CPBTRF +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cpbtrs.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cpbtrs.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,145 @@ + SUBROUTINE CPBTRS( UPLO, N, KD, NRHS, AB, LDAB, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX AB( LDAB, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* CPBTRS solves a system of linear equations A*X = B with a Hermitian +* positive definite band matrix A using the Cholesky factorization +* A = U**H*U or A = L*L**H computed by CPBTRF. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangular factor stored in AB; +* = 'L': Lower triangular factor stored in AB. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KD >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* AB (input) COMPLEX array, dimension (LDAB,N) +* The triangular factor U or L from the Cholesky factorization +* A = U**H*U or A = L*L**H of the band matrix A, stored in the +* first KD+1 rows of the array. The j-th column of U or L is +* stored in the j-th column of the array AB as follows: +* if UPLO ='U', AB(kd+1+i-j,j) = U(i,j) for max(1,j-kd)<=i<=j; +* if UPLO ='L', AB(1+i-j,j) = L(i,j) for j<=i<=min(n,j+kd). +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* B (input/output) COMPLEX array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CTBSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPBTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B where A = U'*U. +* + DO 10 J = 1, NRHS +* +* Solve U'*X = B, overwriting B with X. +* + CALL CTBSV( 'Upper', 'Conjugate transpose', 'Non-unit', N, + $ KD, AB, LDAB, B( 1, J ), 1 ) +* +* Solve U*X = B, overwriting B with X. +* + CALL CTBSV( 'Upper', 'No transpose', 'Non-unit', N, KD, AB, + $ LDAB, B( 1, J ), 1 ) + 10 CONTINUE + ELSE +* +* Solve A*X = B where A = L*L'. +* + DO 20 J = 1, NRHS +* +* Solve L*X = B, overwriting B with X. +* + CALL CTBSV( 'Lower', 'No transpose', 'Non-unit', N, KD, AB, + $ LDAB, B( 1, J ), 1 ) +* +* Solve L'*X = B, overwriting B with X. +* + CALL CTBSV( 'Lower', 'Conjugate transpose', 'Non-unit', N, + $ KD, AB, LDAB, B( 1, J ), 1 ) + 20 CONTINUE + END IF +* + RETURN +* +* End of CPBTRS +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cpocon.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cpocon.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,184 @@ + SUBROUTINE CPOCON( UPLO, N, A, LDA, ANORM, RCOND, WORK, RWORK, + $ INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* Modified to call CLACN2 in place of CLACON, 10 Feb 03, SJH. +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + REAL RWORK( * ) + COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CPOCON estimates the reciprocal of the condition number (in the +* 1-norm) of a complex Hermitian positive definite matrix using the +* Cholesky factorization A = U**H*U or A = L*L**H computed by CPOTRF. +* +* An estimate is obtained for norm(inv(A)), and the reciprocal of the +* condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input) COMPLEX array, dimension (LDA,N) +* The triangular factor U or L from the Cholesky factorization +* A = U**H*U or A = L*L**H, as computed by CPOTRF. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* ANORM (input) REAL +* The 1-norm (or infinity-norm) of the Hermitian matrix A. +* +* RCOND (output) REAL +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +* estimate of the 1-norm of inv(A) computed in this routine. +* +* WORK (workspace) COMPLEX array, dimension (2*N) +* +* RWORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + CHARACTER NORMIN + INTEGER IX, KASE + REAL AINVNM, SCALE, SCALEL, SCALEU, SMLNUM + COMPLEX ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX + REAL SLAMCH + EXTERNAL LSAME, ICAMAX, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CLACN2, CLATRS, CSRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPOCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* + SMLNUM = SLAMCH( 'Safe minimum' ) +* +* Estimate the 1-norm of inv(A). +* + KASE = 0 + NORMIN = 'N' + 10 CONTINUE + CALL CLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( UPPER ) THEN +* +* Multiply by inv(U'). +* + CALL CLATRS( 'Upper', 'Conjugate transpose', 'Non-unit', + $ NORMIN, N, A, LDA, WORK, SCALEL, RWORK, INFO ) + NORMIN = 'Y' +* +* Multiply by inv(U). +* + CALL CLATRS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N, + $ A, LDA, WORK, SCALEU, RWORK, INFO ) + ELSE +* +* Multiply by inv(L). +* + CALL CLATRS( 'Lower', 'No transpose', 'Non-unit', NORMIN, N, + $ A, LDA, WORK, SCALEL, RWORK, INFO ) + NORMIN = 'Y' +* +* Multiply by inv(L'). +* + CALL CLATRS( 'Lower', 'Conjugate transpose', 'Non-unit', + $ NORMIN, N, A, LDA, WORK, SCALEU, RWORK, INFO ) + END IF +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + SCALE = SCALEL*SCALEU + IF( SCALE.NE.ONE ) THEN + IX = ICAMAX( N, WORK, 1 ) + IF( SCALE.LT.CABS1( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL CSRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + 20 CONTINUE + RETURN +* +* End of CPOCON +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cpotf2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cpotf2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,174 @@ + SUBROUTINE CPOTF2( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* CPOTF2 computes the Cholesky factorization of a complex Hermitian +* positive definite matrix A. +* +* The factorization has the form +* A = U' * U , if UPLO = 'U', or +* A = L * L', if UPLO = 'L', +* where U is an upper triangular matrix and L is lower triangular. +* +* This is the unblocked version of the algorithm, calling Level 2 BLAS. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* Hermitian matrix A is stored. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the Hermitian matrix A. If UPLO = 'U', the leading +* n by n upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading n by n lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* +* On exit, if INFO = 0, the factor U or L from the Cholesky +* factorization A = U'*U or A = L*L'. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* > 0: if INFO = k, the leading minor of order k is not +* positive definite, and the factorization could not be +* completed. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J + REAL AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX CDOTC + EXTERNAL LSAME, CDOTC +* .. +* .. External Subroutines .. + EXTERNAL CGEMV, CLACGV, CSSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, REAL, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPOTF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization A = U'*U. +* + DO 10 J = 1, N +* +* Compute U(J,J) and test for non-positive-definiteness. +* + AJJ = REAL( A( J, J ) ) - CDOTC( J-1, A( 1, J ), 1, + $ A( 1, J ), 1 ) + IF( AJJ.LE.ZERO ) THEN + A( J, J ) = AJJ + GO TO 30 + END IF + AJJ = SQRT( AJJ ) + A( J, J ) = AJJ +* +* Compute elements J+1:N of row J. +* + IF( J.LT.N ) THEN + CALL CLACGV( J-1, A( 1, J ), 1 ) + CALL CGEMV( 'Transpose', J-1, N-J, -CONE, A( 1, J+1 ), + $ LDA, A( 1, J ), 1, CONE, A( J, J+1 ), LDA ) + CALL CLACGV( J-1, A( 1, J ), 1 ) + CALL CSSCAL( N-J, ONE / AJJ, A( J, J+1 ), LDA ) + END IF + 10 CONTINUE + ELSE +* +* Compute the Cholesky factorization A = L*L'. +* + DO 20 J = 1, N +* +* Compute L(J,J) and test for non-positive-definiteness. +* + AJJ = REAL( A( J, J ) ) - CDOTC( J-1, A( J, 1 ), LDA, + $ A( J, 1 ), LDA ) + IF( AJJ.LE.ZERO ) THEN + A( J, J ) = AJJ + GO TO 30 + END IF + AJJ = SQRT( AJJ ) + A( J, J ) = AJJ +* +* Compute elements J+1:N of column J. +* + IF( J.LT.N ) THEN + CALL CLACGV( J-1, A( J, 1 ), LDA ) + CALL CGEMV( 'No transpose', N-J, J-1, -CONE, A( J+1, 1 ), + $ LDA, A( J, 1 ), LDA, CONE, A( J+1, J ), 1 ) + CALL CLACGV( J-1, A( J, 1 ), LDA ) + CALL CSSCAL( N-J, ONE / AJJ, A( J+1, J ), 1 ) + END IF + 20 CONTINUE + END IF + GO TO 40 +* + 30 CONTINUE + INFO = J +* + 40 CONTINUE + RETURN +* +* End of CPOTF2 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cpotrf.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cpotrf.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,186 @@ + SUBROUTINE CPOTRF( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* CPOTRF computes the Cholesky factorization of a complex Hermitian +* positive definite matrix A. +* +* The factorization has the form +* A = U**H * U, if UPLO = 'U', or +* A = L * L**H, if UPLO = 'L', +* where U is an upper triangular matrix and L is lower triangular. +* +* This is the block version of the algorithm, calling Level 3 BLAS. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the Hermitian matrix A. If UPLO = 'U', the leading +* N-by-N upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading N-by-N lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* +* On exit, if INFO = 0, the factor U or L from the Cholesky +* factorization A = U**H*U or A = L*L**H. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the leading minor of order i is not +* positive definite, and the factorization could not be +* completed. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + COMPLEX CONE + PARAMETER ( ONE = 1.0E+0, CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, JB, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CGEMM, CHERK, CPOTF2, CTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPOTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'CPOTRF', UPLO, N, -1, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code. +* + CALL CPOTF2( UPLO, N, A, LDA, INFO ) + ELSE +* +* Use blocked code. +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization A = U'*U. +* + DO 10 J = 1, N, NB +* +* Update and factorize the current diagonal block and test +* for non-positive-definiteness. +* + JB = MIN( NB, N-J+1 ) + CALL CHERK( 'Upper', 'Conjugate transpose', JB, J-1, + $ -ONE, A( 1, J ), LDA, ONE, A( J, J ), LDA ) + CALL CPOTF2( 'Upper', JB, A( J, J ), LDA, INFO ) + IF( INFO.NE.0 ) + $ GO TO 30 + IF( J+JB.LE.N ) THEN +* +* Compute the current block row. +* + CALL CGEMM( 'Conjugate transpose', 'No transpose', JB, + $ N-J-JB+1, J-1, -CONE, A( 1, J ), LDA, + $ A( 1, J+JB ), LDA, CONE, A( J, J+JB ), + $ LDA ) + CALL CTRSM( 'Left', 'Upper', 'Conjugate transpose', + $ 'Non-unit', JB, N-J-JB+1, CONE, A( J, J ), + $ LDA, A( J, J+JB ), LDA ) + END IF + 10 CONTINUE +* + ELSE +* +* Compute the Cholesky factorization A = L*L'. +* + DO 20 J = 1, N, NB +* +* Update and factorize the current diagonal block and test +* for non-positive-definiteness. +* + JB = MIN( NB, N-J+1 ) + CALL CHERK( 'Lower', 'No transpose', JB, J-1, -ONE, + $ A( J, 1 ), LDA, ONE, A( J, J ), LDA ) + CALL CPOTF2( 'Lower', JB, A( J, J ), LDA, INFO ) + IF( INFO.NE.0 ) + $ GO TO 30 + IF( J+JB.LE.N ) THEN +* +* Compute the current block column. +* + CALL CGEMM( 'No transpose', 'Conjugate transpose', + $ N-J-JB+1, JB, J-1, -CONE, A( J+JB, 1 ), + $ LDA, A( J, 1 ), LDA, CONE, A( J+JB, J ), + $ LDA ) + CALL CTRSM( 'Right', 'Lower', 'Conjugate transpose', + $ 'Non-unit', N-J-JB+1, JB, CONE, A( J, J ), + $ LDA, A( J+JB, J ), LDA ) + END IF + 20 CONTINUE + END IF + END IF + GO TO 40 +* + 30 CONTINUE + INFO = INFO + J - 1 +* + 40 CONTINUE + RETURN +* +* End of CPOTRF +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cpotri.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cpotri.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,96 @@ + SUBROUTINE CPOTRI( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* CPOTRI computes the inverse of a complex Hermitian positive definite +* matrix A using the Cholesky factorization A = U**H*U or A = L*L**H +* computed by CPOTRF. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the triangular factor U or L from the Cholesky +* factorization A = U**H*U or A = L*L**H, as computed by +* CPOTRF. +* On exit, the upper or lower triangle of the (Hermitian) +* inverse of A, overwriting the input factor U or L. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the (i,i) element of the factor U or L is +* zero, and the inverse could not be computed. +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLAUUM, CTRTRI, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPOTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Invert the triangular Cholesky factor U or L. +* + CALL CTRTRI( UPLO, 'Non-unit', N, A, LDA, INFO ) + IF( INFO.GT.0 ) + $ RETURN +* +* Form inv(U)*inv(U)' or inv(L)'*inv(L). +* + CALL CLAUUM( UPLO, N, A, LDA, INFO ) +* + RETURN +* +* End of CPOTRI +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cpotrs.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cpotrs.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,132 @@ + SUBROUTINE CPOTRS( UPLO, N, NRHS, A, LDA, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* CPOTRS solves a system of linear equations A*X = B with a Hermitian +* positive definite matrix A using the Cholesky factorization +* A = U**H*U or A = L*L**H computed by CPOTRF. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* A (input) COMPLEX array, dimension (LDA,N) +* The triangular factor U or L from the Cholesky factorization +* A = U**H*U or A = L*L**H, as computed by CPOTRF. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) COMPLEX array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPOTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B where A = U'*U. +* +* Solve U'*X = B, overwriting B with X. +* + CALL CTRSM( 'Left', 'Upper', 'Conjugate transpose', 'Non-unit', + $ N, NRHS, ONE, A, LDA, B, LDB ) +* +* Solve U*X = B, overwriting B with X. +* + CALL CTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N, + $ NRHS, ONE, A, LDA, B, LDB ) + ELSE +* +* Solve A*X = B where A = L*L'. +* +* Solve L*X = B, overwriting B with X. +* + CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Non-unit', N, + $ NRHS, ONE, A, LDA, B, LDB ) +* +* Solve L'*X = B, overwriting B with X. +* + CALL CTRSM( 'Left', 'Lower', 'Conjugate transpose', 'Non-unit', + $ N, NRHS, ONE, A, LDA, B, LDB ) + END IF +* + RETURN +* +* End of CPOTRS +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cptsv.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cptsv.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,100 @@ + SUBROUTINE CPTSV( N, NRHS, D, E, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + REAL D( * ) + COMPLEX B( LDB, * ), E( * ) +* .. +* +* Purpose +* ======= +* +* CPTSV computes the solution to a complex system of linear equations +* A*X = B, where A is an N-by-N Hermitian positive definite tridiagonal +* matrix, and X and B are N-by-NRHS matrices. +* +* A is factored as A = L*D*L**H, and the factored form of A is then +* used to solve the system of equations. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* D (input/output) REAL array, dimension (N) +* On entry, the n diagonal elements of the tridiagonal matrix +* A. On exit, the n diagonal elements of the diagonal matrix +* D from the factorization A = L*D*L**H. +* +* E (input/output) COMPLEX array, dimension (N-1) +* On entry, the (n-1) subdiagonal elements of the tridiagonal +* matrix A. On exit, the (n-1) subdiagonal elements of the +* unit bidiagonal factor L from the L*D*L**H factorization of +* A. E can also be regarded as the superdiagonal of the unit +* bidiagonal factor U from the U**H*D*U factorization of A. +* +* B (input/output) COMPLEX array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS right hand side matrix B. +* On exit, if INFO = 0, the N-by-NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the leading minor of order i is not +* positive definite, and the solution has not been +* computed. The factorization has not been completed +* unless i = N. +* +* ===================================================================== +* +* .. External Subroutines .. + EXTERNAL CPTTRF, CPTTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -2 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPTSV ', -INFO ) + RETURN + END IF +* +* Compute the L*D*L' (or U'*D*U) factorization of A. +* + CALL CPTTRF( N, D, E, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL CPTTRS( 'Lower', N, NRHS, D, E, B, LDB, INFO ) + END IF + RETURN +* +* End of CPTSV +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cpttrf.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cpttrf.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,168 @@ + SUBROUTINE CPTTRF( N, D, E, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, N +* .. +* .. Array Arguments .. + REAL D( * ) + COMPLEX E( * ) +* .. +* +* Purpose +* ======= +* +* CPTTRF computes the L*D*L' factorization of a complex Hermitian +* positive definite tridiagonal matrix A. The factorization may also +* be regarded as having the form A = U'*D*U. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* D (input/output) REAL array, dimension (N) +* On entry, the n diagonal elements of the tridiagonal matrix +* A. On exit, the n diagonal elements of the diagonal matrix +* D from the L*D*L' factorization of A. +* +* E (input/output) COMPLEX array, dimension (N-1) +* On entry, the (n-1) subdiagonal elements of the tridiagonal +* matrix A. On exit, the (n-1) subdiagonal elements of the +* unit bidiagonal factor L from the L*D*L' factorization of A. +* E can also be regarded as the superdiagonal of the unit +* bidiagonal factor U from the U'*D*U factorization of A. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* > 0: if INFO = k, the leading minor of order k is not +* positive definite; if k < N, the factorization could not +* be completed, while if k = N, the factorization was +* completed, but D(N) <= 0. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, I4 + REAL EII, EIR, F, G +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC AIMAG, CMPLX, MOD, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + CALL XERBLA( 'CPTTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Compute the L*D*L' (or U'*D*U) factorization of A. +* + I4 = MOD( N-1, 4 ) + DO 10 I = 1, I4 + IF( D( I ).LE.ZERO ) THEN + INFO = I + GO TO 20 + END IF + EIR = REAL( E( I ) ) + EII = AIMAG( E( I ) ) + F = EIR / D( I ) + G = EII / D( I ) + E( I ) = CMPLX( F, G ) + D( I+1 ) = D( I+1 ) - F*EIR - G*EII + 10 CONTINUE +* + DO 110 I = I4+1, N - 4, 4 +* +* Drop out of the loop if d(i) <= 0: the matrix is not positive +* definite. +* + IF( D( I ).LE.ZERO ) THEN + INFO = I + GO TO 20 + END IF +* +* Solve for e(i) and d(i+1). +* + EIR = REAL( E( I ) ) + EII = AIMAG( E( I ) ) + F = EIR / D( I ) + G = EII / D( I ) + E( I ) = CMPLX( F, G ) + D( I+1 ) = D( I+1 ) - F*EIR - G*EII +* + IF( D( I+1 ).LE.ZERO ) THEN + INFO = I+1 + GO TO 20 + END IF +* +* Solve for e(i+1) and d(i+2). +* + EIR = REAL( E( I+1 ) ) + EII = AIMAG( E( I+1 ) ) + F = EIR / D( I+1 ) + G = EII / D( I+1 ) + E( I+1 ) = CMPLX( F, G ) + D( I+2 ) = D( I+2 ) - F*EIR - G*EII +* + IF( D( I+2 ).LE.ZERO ) THEN + INFO = I+2 + GO TO 20 + END IF +* +* Solve for e(i+2) and d(i+3). +* + EIR = REAL( E( I+2 ) ) + EII = AIMAG( E( I+2 ) ) + F = EIR / D( I+2 ) + G = EII / D( I+2 ) + E( I+2 ) = CMPLX( F, G ) + D( I+3 ) = D( I+3 ) - F*EIR - G*EII +* + IF( D( I+3 ).LE.ZERO ) THEN + INFO = I+3 + GO TO 20 + END IF +* +* Solve for e(i+3) and d(i+4). +* + EIR = REAL( E( I+3 ) ) + EII = AIMAG( E( I+3 ) ) + F = EIR / D( I+3 ) + G = EII / D( I+3 ) + E( I+3 ) = CMPLX( F, G ) + D( I+4 ) = D( I+4 ) - F*EIR - G*EII + 110 CONTINUE +* +* Check d(n) for positive definiteness. +* + IF( D( N ).LE.ZERO ) + $ INFO = N +* + 20 CONTINUE + RETURN +* +* End of CPTTRF +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cpttrs.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cpttrs.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,135 @@ + SUBROUTINE CPTTRS( UPLO, N, NRHS, D, E, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + REAL D( * ) + COMPLEX B( LDB, * ), E( * ) +* .. +* +* Purpose +* ======= +* +* CPTTRS solves a tridiagonal system of the form +* A * X = B +* using the factorization A = U'*D*U or A = L*D*L' computed by CPTTRF. +* D is a diagonal matrix specified in the vector D, U (or L) is a unit +* bidiagonal matrix whose superdiagonal (subdiagonal) is specified in +* the vector E, and X and B are N by NRHS matrices. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies the form of the factorization and whether the +* vector E is the superdiagonal of the upper bidiagonal factor +* U or the subdiagonal of the lower bidiagonal factor L. +* = 'U': A = U'*D*U, E is the superdiagonal of U +* = 'L': A = L*D*L', E is the subdiagonal of L +* +* N (input) INTEGER +* The order of the tridiagonal matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* D (input) REAL array, dimension (N) +* The n diagonal elements of the diagonal matrix D from the +* factorization A = U'*D*U or A = L*D*L'. +* +* E (input) COMPLEX array, dimension (N-1) +* If UPLO = 'U', the (n-1) superdiagonal elements of the unit +* bidiagonal factor U from the factorization A = U'*D*U. +* If UPLO = 'L', the (n-1) subdiagonal elements of the unit +* bidiagonal factor L from the factorization A = L*D*L'. +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the right hand side vectors B for the system of +* linear equations. +* On exit, the solution vectors, X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER + INTEGER IUPLO, J, JB, NB +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CPTTS2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + UPPER = ( UPLO.EQ.'U' .OR. UPLO.EQ.'u' ) + IF( .NOT.UPPER .AND. .NOT.( UPLO.EQ.'L' .OR. UPLO.EQ.'l' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPTTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* +* Determine the number of right-hand sides to solve at a time. +* + IF( NRHS.EQ.1 ) THEN + NB = 1 + ELSE + NB = MAX( 1, ILAENV( 1, 'CPTTRS', UPLO, N, NRHS, -1, -1 ) ) + END IF +* +* Decode UPLO +* + IF( UPPER ) THEN + IUPLO = 1 + ELSE + IUPLO = 0 + END IF +* + IF( NB.GE.NRHS ) THEN + CALL CPTTS2( IUPLO, N, NRHS, D, E, B, LDB ) + ELSE + DO 10 J = 1, NRHS, NB + JB = MIN( NRHS-J+1, NB ) + CALL CPTTS2( IUPLO, N, JB, D, E, B( 1, J ), LDB ) + 10 CONTINUE + END IF +* + RETURN +* +* End of CPTTRS +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cptts2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cptts2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,176 @@ + SUBROUTINE CPTTS2( IUPLO, N, NRHS, D, E, B, LDB ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER IUPLO, LDB, N, NRHS +* .. +* .. Array Arguments .. + REAL D( * ) + COMPLEX B( LDB, * ), E( * ) +* .. +* +* Purpose +* ======= +* +* CPTTS2 solves a tridiagonal system of the form +* A * X = B +* using the factorization A = U'*D*U or A = L*D*L' computed by CPTTRF. +* D is a diagonal matrix specified in the vector D, U (or L) is a unit +* bidiagonal matrix whose superdiagonal (subdiagonal) is specified in +* the vector E, and X and B are N by NRHS matrices. +* +* Arguments +* ========= +* +* IUPLO (input) INTEGER +* Specifies the form of the factorization and whether the +* vector E is the superdiagonal of the upper bidiagonal factor +* U or the subdiagonal of the lower bidiagonal factor L. +* = 1: A = U'*D*U, E is the superdiagonal of U +* = 0: A = L*D*L', E is the subdiagonal of L +* +* N (input) INTEGER +* The order of the tridiagonal matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* D (input) REAL array, dimension (N) +* The n diagonal elements of the diagonal matrix D from the +* factorization A = U'*D*U or A = L*D*L'. +* +* E (input) COMPLEX array, dimension (N-1) +* If IUPLO = 1, the (n-1) superdiagonal elements of the unit +* bidiagonal factor U from the factorization A = U'*D*U. +* If IUPLO = 0, the (n-1) subdiagonal elements of the unit +* bidiagonal factor L from the factorization A = L*D*L'. +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the right hand side vectors B for the system of +* linear equations. +* On exit, the solution vectors, X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J +* .. +* .. External Subroutines .. + EXTERNAL CSSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.1 ) THEN + IF( N.EQ.1 ) + $ CALL CSSCAL( NRHS, 1. / D( 1 ), B, LDB ) + RETURN + END IF +* + IF( IUPLO.EQ.1 ) THEN +* +* Solve A * X = B using the factorization A = U'*D*U, +* overwriting each right hand side vector with its solution. +* + IF( NRHS.LE.2 ) THEN + J = 1 + 5 CONTINUE +* +* Solve U' * x = b. +* + DO 10 I = 2, N + B( I, J ) = B( I, J ) - B( I-1, J )*CONJG( E( I-1 ) ) + 10 CONTINUE +* +* Solve D * U * x = b. +* + DO 20 I = 1, N + B( I, J ) = B( I, J ) / D( I ) + 20 CONTINUE + DO 30 I = N - 1, 1, -1 + B( I, J ) = B( I, J ) - B( I+1, J )*E( I ) + 30 CONTINUE + IF( J.LT.NRHS ) THEN + J = J + 1 + GO TO 5 + END IF + ELSE + DO 60 J = 1, NRHS +* +* Solve U' * x = b. +* + DO 40 I = 2, N + B( I, J ) = B( I, J ) - B( I-1, J )*CONJG( E( I-1 ) ) + 40 CONTINUE +* +* Solve D * U * x = b. +* + B( N, J ) = B( N, J ) / D( N ) + DO 50 I = N - 1, 1, -1 + B( I, J ) = B( I, J ) / D( I ) - B( I+1, J )*E( I ) + 50 CONTINUE + 60 CONTINUE + END IF + ELSE +* +* Solve A * X = B using the factorization A = L*D*L', +* overwriting each right hand side vector with its solution. +* + IF( NRHS.LE.2 ) THEN + J = 1 + 65 CONTINUE +* +* Solve L * x = b. +* + DO 70 I = 2, N + B( I, J ) = B( I, J ) - B( I-1, J )*E( I-1 ) + 70 CONTINUE +* +* Solve D * L' * x = b. +* + DO 80 I = 1, N + B( I, J ) = B( I, J ) / D( I ) + 80 CONTINUE + DO 90 I = N - 1, 1, -1 + B( I, J ) = B( I, J ) - B( I+1, J )*CONJG( E( I ) ) + 90 CONTINUE + IF( J.LT.NRHS ) THEN + J = J + 1 + GO TO 65 + END IF + ELSE + DO 120 J = 1, NRHS +* +* Solve L * x = b. +* + DO 100 I = 2, N + B( I, J ) = B( I, J ) - B( I-1, J )*E( I-1 ) + 100 CONTINUE +* +* Solve D * L' * x = b. +* + B( N, J ) = B( N, J ) / D( N ) + DO 110 I = N - 1, 1, -1 + B( I, J ) = B( I, J ) / D( I ) - + $ B( I+1, J )*CONJG( E( I ) ) + 110 CONTINUE + 120 CONTINUE + END IF + END IF +* + RETURN +* +* End of CPTTS2 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/crot.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/crot.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,91 @@ + SUBROUTINE CROT( N, CX, INCX, CY, INCY, C, S ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INCX, INCY, N + REAL C + COMPLEX S +* .. +* .. Array Arguments .. + COMPLEX CX( * ), CY( * ) +* .. +* +* Purpose +* ======= +* +* CROT applies a plane rotation, where the cos (C) is real and the +* sin (S) is complex, and the vectors CX and CY are complex. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of elements in the vectors CX and CY. +* +* CX (input/output) COMPLEX array, dimension (N) +* On input, the vector X. +* On output, CX is overwritten with C*X + S*Y. +* +* INCX (input) INTEGER +* The increment between successive values of CY. INCX <> 0. +* +* CY (input/output) COMPLEX array, dimension (N) +* On input, the vector Y. +* On output, CY is overwritten with -CONJG(S)*X + C*Y. +* +* INCY (input) INTEGER +* The increment between successive values of CY. INCX <> 0. +* +* C (input) REAL +* S (input) COMPLEX +* C and S define a rotation +* [ C S ] +* [ -conjg(S) C ] +* where C*C + S*CONJG(S) = 1.0. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IX, IY + COMPLEX STEMP +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG +* .. +* .. Executable Statements .. +* + IF( N.LE.0 ) + $ RETURN + IF( INCX.EQ.1 .AND. INCY.EQ.1 ) + $ GO TO 20 +* +* Code for unequal increments or equal increments not equal to 1 +* + IX = 1 + IY = 1 + IF( INCX.LT.0 ) + $ IX = ( -N+1 )*INCX + 1 + IF( INCY.LT.0 ) + $ IY = ( -N+1 )*INCY + 1 + DO 10 I = 1, N + STEMP = C*CX( IX ) + S*CY( IY ) + CY( IY ) = C*CY( IY ) - CONJG( S )*CX( IX ) + CX( IX ) = STEMP + IX = IX + INCX + IY = IY + INCY + 10 CONTINUE + RETURN +* +* Code for both increments equal to 1 +* + 20 CONTINUE + DO 30 I = 1, N + STEMP = C*CX( I ) + S*CY( I ) + CY( I ) = C*CY( I ) - CONJG( S )*CX( I ) + CX( I ) = STEMP + 30 CONTINUE + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/csrscl.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/csrscl.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,114 @@ + SUBROUTINE CSRSCL( N, SA, SX, INCX ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INCX, N + REAL SA +* .. +* .. Array Arguments .. + COMPLEX SX( * ) +* .. +* +* Purpose +* ======= +* +* CSRSCL multiplies an n-element complex vector x by the real scalar +* 1/a. This is done without overflow or underflow as long as +* the final result x/a does not overflow or underflow. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of components of the vector x. +* +* SA (input) REAL +* The scalar a which is used to divide each component of x. +* SA must be >= 0, or the subroutine will divide by zero. +* +* SX (input/output) COMPLEX array, dimension +* (1+(N-1)*abs(INCX)) +* The n-element vector x. +* +* INCX (input) INTEGER +* The increment between successive values of the vector SX. +* > 0: SX(1) = X(1) and SX(1+(i-1)*INCX) = x(i), 1< i<= n +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL DONE + REAL BIGNUM, CDEN, CDEN1, CNUM, CNUM1, MUL, SMLNUM +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CSSCAL, SLABAD +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* +* Get machine parameters +* + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* +* Initialize the denominator to SA and the numerator to 1. +* + CDEN = SA + CNUM = ONE +* + 10 CONTINUE + CDEN1 = CDEN*SMLNUM + CNUM1 = CNUM / BIGNUM + IF( ABS( CDEN1 ).GT.ABS( CNUM ) .AND. CNUM.NE.ZERO ) THEN +* +* Pre-multiply X by SMLNUM if CDEN is large compared to CNUM. +* + MUL = SMLNUM + DONE = .FALSE. + CDEN = CDEN1 + ELSE IF( ABS( CNUM1 ).GT.ABS( CDEN ) ) THEN +* +* Pre-multiply X by BIGNUM if CDEN is small compared to CNUM. +* + MUL = BIGNUM + DONE = .FALSE. + CNUM = CNUM1 + ELSE +* +* Multiply X by CNUM / CDEN and return. +* + MUL = CNUM / CDEN + DONE = .TRUE. + END IF +* +* Scale the vector X by MUL +* + CALL CSSCAL( N, MUL, SX, INCX ) +* + IF( .NOT.DONE ) + $ GO TO 10 +* + RETURN +* +* End of CSRSCL +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/csteqr.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/csteqr.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,503 @@ + SUBROUTINE CSTEQR( COMPZ, N, D, E, Z, LDZ, WORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER COMPZ + INTEGER INFO, LDZ, N +* .. +* .. Array Arguments .. + REAL D( * ), E( * ), WORK( * ) + COMPLEX Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* CSTEQR computes all eigenvalues and, optionally, eigenvectors of a +* symmetric tridiagonal matrix using the implicit QL or QR method. +* The eigenvectors of a full or band complex Hermitian matrix can also +* be found if CHETRD or CHPTRD or CHBTRD has been used to reduce this +* matrix to tridiagonal form. +* +* Arguments +* ========= +* +* COMPZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only. +* = 'V': Compute eigenvalues and eigenvectors of the original +* Hermitian matrix. On entry, Z must contain the +* unitary matrix used to reduce the original matrix +* to tridiagonal form. +* = 'I': Compute eigenvalues and eigenvectors of the +* tridiagonal matrix. Z is initialized to the identity +* matrix. +* +* N (input) INTEGER +* The order of the matrix. N >= 0. +* +* D (input/output) REAL array, dimension (N) +* On entry, the diagonal elements of the tridiagonal matrix. +* On exit, if INFO = 0, the eigenvalues in ascending order. +* +* E (input/output) REAL array, dimension (N-1) +* On entry, the (n-1) subdiagonal elements of the tridiagonal +* matrix. +* On exit, E has been destroyed. +* +* Z (input/output) COMPLEX array, dimension (LDZ, N) +* On entry, if COMPZ = 'V', then Z contains the unitary +* matrix used in the reduction to tridiagonal form. +* On exit, if INFO = 0, then if COMPZ = 'V', Z contains the +* orthonormal eigenvectors of the original Hermitian matrix, +* and if COMPZ = 'I', Z contains the orthonormal eigenvectors +* of the symmetric tridiagonal matrix. +* If COMPZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* eigenvectors are desired, then LDZ >= max(1,N). +* +* WORK (workspace) REAL array, dimension (max(1,2*N-2)) +* If COMPZ = 'N', then WORK is not referenced. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: the algorithm has failed to find all the eigenvalues in +* a total of 30*N iterations; if INFO = i, then i +* elements of E have not converged to zero; on exit, D +* and E contain the elements of a symmetric tridiagonal +* matrix which is unitarily similar to the original +* matrix. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO, THREE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0, + $ THREE = 3.0E0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E0, 0.0E0 ), + $ CONE = ( 1.0E0, 0.0E0 ) ) + INTEGER MAXIT + PARAMETER ( MAXIT = 30 ) +* .. +* .. Local Scalars .. + INTEGER I, ICOMPZ, II, ISCALE, J, JTOT, K, L, L1, LEND, + $ LENDM1, LENDP1, LENDSV, LM1, LSV, M, MM, MM1, + $ NM1, NMAXIT + REAL ANORM, B, C, EPS, EPS2, F, G, P, R, RT1, RT2, + $ S, SAFMAX, SAFMIN, SSFMAX, SSFMIN, TST +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANST, SLAPY2 + EXTERNAL LSAME, SLAMCH, SLANST, SLAPY2 +* .. +* .. External Subroutines .. + EXTERNAL CLASET, CLASR, CSWAP, SLAE2, SLAEV2, SLARTG, + $ SLASCL, SLASRT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( LSAME( COMPZ, 'N' ) ) THEN + ICOMPZ = 0 + ELSE IF( LSAME( COMPZ, 'V' ) ) THEN + ICOMPZ = 1 + ELSE IF( LSAME( COMPZ, 'I' ) ) THEN + ICOMPZ = 2 + ELSE + ICOMPZ = -1 + END IF + IF( ICOMPZ.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ( LDZ.LT.1 ) .OR. ( ICOMPZ.GT.0 .AND. LDZ.LT.MAX( 1, + $ N ) ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSTEQR', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( ICOMPZ.EQ.2 ) + $ Z( 1, 1 ) = CONE + RETURN + END IF +* +* Determine the unit roundoff and over/underflow thresholds. +* + EPS = SLAMCH( 'E' ) + EPS2 = EPS**2 + SAFMIN = SLAMCH( 'S' ) + SAFMAX = ONE / SAFMIN + SSFMAX = SQRT( SAFMAX ) / THREE + SSFMIN = SQRT( SAFMIN ) / EPS2 +* +* Compute the eigenvalues and eigenvectors of the tridiagonal +* matrix. +* + IF( ICOMPZ.EQ.2 ) + $ CALL CLASET( 'Full', N, N, CZERO, CONE, Z, LDZ ) +* + NMAXIT = N*MAXIT + JTOT = 0 +* +* Determine where the matrix splits and choose QL or QR iteration +* for each block, according to whether top or bottom diagonal +* element is smaller. +* + L1 = 1 + NM1 = N - 1 +* + 10 CONTINUE + IF( L1.GT.N ) + $ GO TO 160 + IF( L1.GT.1 ) + $ E( L1-1 ) = ZERO + IF( L1.LE.NM1 ) THEN + DO 20 M = L1, NM1 + TST = ABS( E( M ) ) + IF( TST.EQ.ZERO ) + $ GO TO 30 + IF( TST.LE.( SQRT( ABS( D( M ) ) )*SQRT( ABS( D( M+ + $ 1 ) ) ) )*EPS ) THEN + E( M ) = ZERO + GO TO 30 + END IF + 20 CONTINUE + END IF + M = N +* + 30 CONTINUE + L = L1 + LSV = L + LEND = M + LENDSV = LEND + L1 = M + 1 + IF( LEND.EQ.L ) + $ GO TO 10 +* +* Scale submatrix in rows and columns L to LEND +* + ANORM = SLANST( 'I', LEND-L+1, D( L ), E( L ) ) + ISCALE = 0 + IF( ANORM.EQ.ZERO ) + $ GO TO 10 + IF( ANORM.GT.SSFMAX ) THEN + ISCALE = 1 + CALL SLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L+1, 1, D( L ), N, + $ INFO ) + CALL SLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L, 1, E( L ), N, + $ INFO ) + ELSE IF( ANORM.LT.SSFMIN ) THEN + ISCALE = 2 + CALL SLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L+1, 1, D( L ), N, + $ INFO ) + CALL SLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L, 1, E( L ), N, + $ INFO ) + END IF +* +* Choose between QL and QR iteration +* + IF( ABS( D( LEND ) ).LT.ABS( D( L ) ) ) THEN + LEND = LSV + L = LENDSV + END IF +* + IF( LEND.GT.L ) THEN +* +* QL Iteration +* +* Look for small subdiagonal element. +* + 40 CONTINUE + IF( L.NE.LEND ) THEN + LENDM1 = LEND - 1 + DO 50 M = L, LENDM1 + TST = ABS( E( M ) )**2 + IF( TST.LE.( EPS2*ABS( D( M ) ) )*ABS( D( M+1 ) )+ + $ SAFMIN )GO TO 60 + 50 CONTINUE + END IF +* + M = LEND +* + 60 CONTINUE + IF( M.LT.LEND ) + $ E( M ) = ZERO + P = D( L ) + IF( M.EQ.L ) + $ GO TO 80 +* +* If remaining matrix is 2-by-2, use SLAE2 or SLAEV2 +* to compute its eigensystem. +* + IF( M.EQ.L+1 ) THEN + IF( ICOMPZ.GT.0 ) THEN + CALL SLAEV2( D( L ), E( L ), D( L+1 ), RT1, RT2, C, S ) + WORK( L ) = C + WORK( N-1+L ) = S + CALL CLASR( 'R', 'V', 'B', N, 2, WORK( L ), + $ WORK( N-1+L ), Z( 1, L ), LDZ ) + ELSE + CALL SLAE2( D( L ), E( L ), D( L+1 ), RT1, RT2 ) + END IF + D( L ) = RT1 + D( L+1 ) = RT2 + E( L ) = ZERO + L = L + 2 + IF( L.LE.LEND ) + $ GO TO 40 + GO TO 140 + END IF +* + IF( JTOT.EQ.NMAXIT ) + $ GO TO 140 + JTOT = JTOT + 1 +* +* Form shift. +* + G = ( D( L+1 )-P ) / ( TWO*E( L ) ) + R = SLAPY2( G, ONE ) + G = D( M ) - P + ( E( L ) / ( G+SIGN( R, G ) ) ) +* + S = ONE + C = ONE + P = ZERO +* +* Inner loop +* + MM1 = M - 1 + DO 70 I = MM1, L, -1 + F = S*E( I ) + B = C*E( I ) + CALL SLARTG( G, F, C, S, R ) + IF( I.NE.M-1 ) + $ E( I+1 ) = R + G = D( I+1 ) - P + R = ( D( I )-G )*S + TWO*C*B + P = S*R + D( I+1 ) = G + P + G = C*R - B +* +* If eigenvectors are desired, then save rotations. +* + IF( ICOMPZ.GT.0 ) THEN + WORK( I ) = C + WORK( N-1+I ) = -S + END IF +* + 70 CONTINUE +* +* If eigenvectors are desired, then apply saved rotations. +* + IF( ICOMPZ.GT.0 ) THEN + MM = M - L + 1 + CALL CLASR( 'R', 'V', 'B', N, MM, WORK( L ), WORK( N-1+L ), + $ Z( 1, L ), LDZ ) + END IF +* + D( L ) = D( L ) - P + E( L ) = G + GO TO 40 +* +* Eigenvalue found. +* + 80 CONTINUE + D( L ) = P +* + L = L + 1 + IF( L.LE.LEND ) + $ GO TO 40 + GO TO 140 +* + ELSE +* +* QR Iteration +* +* Look for small superdiagonal element. +* + 90 CONTINUE + IF( L.NE.LEND ) THEN + LENDP1 = LEND + 1 + DO 100 M = L, LENDP1, -1 + TST = ABS( E( M-1 ) )**2 + IF( TST.LE.( EPS2*ABS( D( M ) ) )*ABS( D( M-1 ) )+ + $ SAFMIN )GO TO 110 + 100 CONTINUE + END IF +* + M = LEND +* + 110 CONTINUE + IF( M.GT.LEND ) + $ E( M-1 ) = ZERO + P = D( L ) + IF( M.EQ.L ) + $ GO TO 130 +* +* If remaining matrix is 2-by-2, use SLAE2 or SLAEV2 +* to compute its eigensystem. +* + IF( M.EQ.L-1 ) THEN + IF( ICOMPZ.GT.0 ) THEN + CALL SLAEV2( D( L-1 ), E( L-1 ), D( L ), RT1, RT2, C, S ) + WORK( M ) = C + WORK( N-1+M ) = S + CALL CLASR( 'R', 'V', 'F', N, 2, WORK( M ), + $ WORK( N-1+M ), Z( 1, L-1 ), LDZ ) + ELSE + CALL SLAE2( D( L-1 ), E( L-1 ), D( L ), RT1, RT2 ) + END IF + D( L-1 ) = RT1 + D( L ) = RT2 + E( L-1 ) = ZERO + L = L - 2 + IF( L.GE.LEND ) + $ GO TO 90 + GO TO 140 + END IF +* + IF( JTOT.EQ.NMAXIT ) + $ GO TO 140 + JTOT = JTOT + 1 +* +* Form shift. +* + G = ( D( L-1 )-P ) / ( TWO*E( L-1 ) ) + R = SLAPY2( G, ONE ) + G = D( M ) - P + ( E( L-1 ) / ( G+SIGN( R, G ) ) ) +* + S = ONE + C = ONE + P = ZERO +* +* Inner loop +* + LM1 = L - 1 + DO 120 I = M, LM1 + F = S*E( I ) + B = C*E( I ) + CALL SLARTG( G, F, C, S, R ) + IF( I.NE.M ) + $ E( I-1 ) = R + G = D( I ) - P + R = ( D( I+1 )-G )*S + TWO*C*B + P = S*R + D( I ) = G + P + G = C*R - B +* +* If eigenvectors are desired, then save rotations. +* + IF( ICOMPZ.GT.0 ) THEN + WORK( I ) = C + WORK( N-1+I ) = S + END IF +* + 120 CONTINUE +* +* If eigenvectors are desired, then apply saved rotations. +* + IF( ICOMPZ.GT.0 ) THEN + MM = L - M + 1 + CALL CLASR( 'R', 'V', 'F', N, MM, WORK( M ), WORK( N-1+M ), + $ Z( 1, M ), LDZ ) + END IF +* + D( L ) = D( L ) - P + E( LM1 ) = G + GO TO 90 +* +* Eigenvalue found. +* + 130 CONTINUE + D( L ) = P +* + L = L - 1 + IF( L.GE.LEND ) + $ GO TO 90 + GO TO 140 +* + END IF +* +* Undo scaling if necessary +* + 140 CONTINUE + IF( ISCALE.EQ.1 ) THEN + CALL SLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV+1, 1, + $ D( LSV ), N, INFO ) + CALL SLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV, 1, E( LSV ), + $ N, INFO ) + ELSE IF( ISCALE.EQ.2 ) THEN + CALL SLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV+1, 1, + $ D( LSV ), N, INFO ) + CALL SLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV, 1, E( LSV ), + $ N, INFO ) + END IF +* +* Check for no convergence to an eigenvalue after a total +* of N*MAXIT iterations. +* + IF( JTOT.EQ.NMAXIT ) THEN + DO 150 I = 1, N - 1 + IF( E( I ).NE.ZERO ) + $ INFO = INFO + 1 + 150 CONTINUE + RETURN + END IF + GO TO 10 +* +* Order eigenvalues and eigenvectors. +* + 160 CONTINUE + IF( ICOMPZ.EQ.0 ) THEN +* +* Use Quick Sort +* + CALL SLASRT( 'I', N, D, INFO ) +* + ELSE +* +* Use Selection Sort to minimize swaps of eigenvectors +* + DO 180 II = 2, N + I = II - 1 + K = I + P = D( I ) + DO 170 J = II, N + IF( D( J ).LT.P ) THEN + K = J + P = D( J ) + END IF + 170 CONTINUE + IF( K.NE.I ) THEN + D( K ) = D( I ) + D( I ) = P + CALL CSWAP( N, Z( 1, I ), 1, Z( 1, K ), 1 ) + END IF + 180 CONTINUE + END IF + RETURN +* +* End of CSTEQR +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/ctrcon.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/ctrcon.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,204 @@ + SUBROUTINE CTRCON( NORM, UPLO, DIAG, N, A, LDA, RCOND, WORK, + $ RWORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* Modified to call CLACN2 in place of CLACON, 10 Feb 03, SJH. +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER INFO, LDA, N + REAL RCOND +* .. +* .. Array Arguments .. + REAL RWORK( * ) + COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CTRCON estimates the reciprocal of the condition number of a +* triangular matrix A, in either the 1-norm or the infinity-norm. +* +* The norm of A is computed and an estimate is obtained for +* norm(inv(A)), then the reciprocal of the condition number is +* computed as +* RCOND = 1 / ( norm(A) * norm(inv(A)) ). +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies whether the 1-norm condition number or the +* infinity-norm condition number is required: +* = '1' or 'O': 1-norm; +* = 'I': Infinity-norm. +* +* UPLO (input) CHARACTER*1 +* = 'U': A is upper triangular; +* = 'L': A is lower triangular. +* +* DIAG (input) CHARACTER*1 +* = 'N': A is non-unit triangular; +* = 'U': A is unit triangular. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input) COMPLEX array, dimension (LDA,N) +* The triangular matrix A. If UPLO = 'U', the leading N-by-N +* upper triangular part of the array A contains the upper +* triangular matrix, and the strictly lower triangular part of +* A is not referenced. If UPLO = 'L', the leading N-by-N lower +* triangular part of the array A contains the lower triangular +* matrix, and the strictly upper triangular part of A is not +* referenced. If DIAG = 'U', the diagonal elements of A are +* also not referenced and are assumed to be 1. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* RCOND (output) REAL +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(norm(A) * norm(inv(A))). +* +* WORK (workspace) COMPLEX array, dimension (2*N) +* +* RWORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, ONENRM, UPPER + CHARACTER NORMIN + INTEGER IX, KASE, KASE1 + REAL AINVNM, ANORM, SCALE, SMLNUM, XNORM + COMPLEX ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX + REAL CLANTR, SLAMCH + EXTERNAL LSAME, ICAMAX, CLANTR, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CLACN2, CLATRS, CSRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + NOUNIT = LSAME( DIAG, 'N' ) +* + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTRCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + END IF +* + RCOND = ZERO + SMLNUM = SLAMCH( 'Safe minimum' )*REAL( MAX( 1, N ) ) +* +* Compute the norm of the triangular matrix A. +* + ANORM = CLANTR( NORM, UPLO, DIAG, N, N, A, LDA, RWORK ) +* +* Continue only if ANORM > 0. +* + IF( ANORM.GT.ZERO ) THEN +* +* Estimate the norm of the inverse of A. +* + AINVNM = ZERO + NORMIN = 'N' + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KASE = 0 + 10 CONTINUE + CALL CLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(A). +* + CALL CLATRS( UPLO, 'No transpose', DIAG, NORMIN, N, A, + $ LDA, WORK, SCALE, RWORK, INFO ) + ELSE +* +* Multiply by inv(A'). +* + CALL CLATRS( UPLO, 'Conjugate transpose', DIAG, NORMIN, + $ N, A, LDA, WORK, SCALE, RWORK, INFO ) + END IF + NORMIN = 'Y' +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + IF( SCALE.NE.ONE ) THEN + IX = ICAMAX( N, WORK, 1 ) + XNORM = CABS1( WORK( IX ) ) + IF( SCALE.LT.XNORM*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL CSRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / ANORM ) / AINVNM + END IF +* + 20 CONTINUE + RETURN +* +* End of CTRCON +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/ctrevc.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/ctrevc.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,386 @@ + SUBROUTINE CTREVC( SIDE, HOWMNY, SELECT, N, T, LDT, VL, LDVL, VR, + $ LDVR, MM, M, WORK, RWORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER HOWMNY, SIDE + INTEGER INFO, LDT, LDVL, LDVR, M, MM, N +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + REAL RWORK( * ) + COMPLEX T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* CTREVC computes some or all of the right and/or left eigenvectors of +* a complex upper triangular matrix T. +* Matrices of this type are produced by the Schur factorization of +* a complex general matrix: A = Q*T*Q**H, as computed by CHSEQR. +* +* The right eigenvector x and the left eigenvector y of T corresponding +* to an eigenvalue w are defined by: +* +* T*x = w*x, (y**H)*T = w*(y**H) +* +* where y**H denotes the conjugate transpose of the vector y. +* The eigenvalues are not input to this routine, but are read directly +* from the diagonal of T. +* +* This routine returns the matrices X and/or Y of right and left +* eigenvectors of T, or the products Q*X and/or Q*Y, where Q is an +* input matrix. If Q is the unitary factor that reduces a matrix A to +* Schur form T, then Q*X and Q*Y are the matrices of right and left +* eigenvectors of A. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'R': compute right eigenvectors only; +* = 'L': compute left eigenvectors only; +* = 'B': compute both right and left eigenvectors. +* +* HOWMNY (input) CHARACTER*1 +* = 'A': compute all right and/or left eigenvectors; +* = 'B': compute all right and/or left eigenvectors, +* backtransformed using the matrices supplied in +* VR and/or VL; +* = 'S': compute selected right and/or left eigenvectors, +* as indicated by the logical array SELECT. +* +* SELECT (input) LOGICAL array, dimension (N) +* If HOWMNY = 'S', SELECT specifies the eigenvectors to be +* computed. +* The eigenvector corresponding to the j-th eigenvalue is +* computed if SELECT(j) = .TRUE.. +* Not referenced if HOWMNY = 'A' or 'B'. +* +* N (input) INTEGER +* The order of the matrix T. N >= 0. +* +* T (input/output) COMPLEX array, dimension (LDT,N) +* The upper triangular matrix T. T is modified, but restored +* on exit. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= max(1,N). +* +* VL (input/output) COMPLEX array, dimension (LDVL,MM) +* On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must +* contain an N-by-N matrix Q (usually the unitary matrix Q of +* Schur vectors returned by CHSEQR). +* On exit, if SIDE = 'L' or 'B', VL contains: +* if HOWMNY = 'A', the matrix Y of left eigenvectors of T; +* if HOWMNY = 'B', the matrix Q*Y; +* if HOWMNY = 'S', the left eigenvectors of T specified by +* SELECT, stored consecutively in the columns +* of VL, in the same order as their +* eigenvalues. +* Not referenced if SIDE = 'R'. +* +* LDVL (input) INTEGER +* The leading dimension of the array VL. LDVL >= 1, and if +* SIDE = 'L' or 'B', LDVL >= N. +* +* VR (input/output) COMPLEX array, dimension (LDVR,MM) +* On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must +* contain an N-by-N matrix Q (usually the unitary matrix Q of +* Schur vectors returned by CHSEQR). +* On exit, if SIDE = 'R' or 'B', VR contains: +* if HOWMNY = 'A', the matrix X of right eigenvectors of T; +* if HOWMNY = 'B', the matrix Q*X; +* if HOWMNY = 'S', the right eigenvectors of T specified by +* SELECT, stored consecutively in the columns +* of VR, in the same order as their +* eigenvalues. +* Not referenced if SIDE = 'L'. +* +* LDVR (input) INTEGER +* The leading dimension of the array VR. LDVR >= 1, and if +* SIDE = 'R' or 'B'; LDVR >= N. +* +* MM (input) INTEGER +* The number of columns in the arrays VL and/or VR. MM >= M. +* +* M (output) INTEGER +* The number of columns in the arrays VL and/or VR actually +* used to store the eigenvectors. If HOWMNY = 'A' or 'B', M +* is set to N. Each selected eigenvector occupies one +* column. +* +* WORK (workspace) COMPLEX array, dimension (2*N) +* +* RWORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The algorithm used in this program is basically backward (forward) +* substitution, with scaling to make the the code robust against +* possible overflow. +* +* Each eigenvector is normalized so that the element of largest +* magnitude has magnitude 1; here the magnitude of a complex number +* (x,y) is taken to be |x| + |y|. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + COMPLEX CMZERO, CMONE + PARAMETER ( CMZERO = ( 0.0E+0, 0.0E+0 ), + $ CMONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ALLV, BOTHV, LEFTV, OVER, RIGHTV, SOMEV + INTEGER I, II, IS, J, K, KI + REAL OVFL, REMAX, SCALE, SMIN, SMLNUM, ULP, UNFL + COMPLEX CDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX + REAL SCASUM, SLAMCH + EXTERNAL LSAME, ICAMAX, SCASUM, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CGEMV, CLATRS, CSSCAL, SLABAD, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, CONJG, MAX, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) ) +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + BOTHV = LSAME( SIDE, 'B' ) + RIGHTV = LSAME( SIDE, 'R' ) .OR. BOTHV + LEFTV = LSAME( SIDE, 'L' ) .OR. BOTHV +* + ALLV = LSAME( HOWMNY, 'A' ) + OVER = LSAME( HOWMNY, 'B' ) + SOMEV = LSAME( HOWMNY, 'S' ) +* +* Set M to the number of columns required to store the selected +* eigenvectors. +* + IF( SOMEV ) THEN + M = 0 + DO 10 J = 1, N + IF( SELECT( J ) ) + $ M = M + 1 + 10 CONTINUE + ELSE + M = N + END IF +* + INFO = 0 + IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN + INFO = -1 + ELSE IF( .NOT.ALLV .AND. .NOT.OVER .AND. .NOT.SOMEV ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDT.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDVL.LT.1 .OR. ( LEFTV .AND. LDVL.LT.N ) ) THEN + INFO = -8 + ELSE IF( LDVR.LT.1 .OR. ( RIGHTV .AND. LDVR.LT.N ) ) THEN + INFO = -10 + ELSE IF( MM.LT.M ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTREVC', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* +* Set the constants to control overflow. +* + UNFL = SLAMCH( 'Safe minimum' ) + OVFL = ONE / UNFL + CALL SLABAD( UNFL, OVFL ) + ULP = SLAMCH( 'Precision' ) + SMLNUM = UNFL*( N / ULP ) +* +* Store the diagonal elements of T in working array WORK. +* + DO 20 I = 1, N + WORK( I+N ) = T( I, I ) + 20 CONTINUE +* +* Compute 1-norm of each column of strictly upper triangular +* part of T to control overflow in triangular solver. +* + RWORK( 1 ) = ZERO + DO 30 J = 2, N + RWORK( J ) = SCASUM( J-1, T( 1, J ), 1 ) + 30 CONTINUE +* + IF( RIGHTV ) THEN +* +* Compute right eigenvectors. +* + IS = M + DO 80 KI = N, 1, -1 +* + IF( SOMEV ) THEN + IF( .NOT.SELECT( KI ) ) + $ GO TO 80 + END IF + SMIN = MAX( ULP*( CABS1( T( KI, KI ) ) ), SMLNUM ) +* + WORK( 1 ) = CMONE +* +* Form right-hand side. +* + DO 40 K = 1, KI - 1 + WORK( K ) = -T( K, KI ) + 40 CONTINUE +* +* Solve the triangular system: +* (T(1:KI-1,1:KI-1) - T(KI,KI))*X = SCALE*WORK. +* + DO 50 K = 1, KI - 1 + T( K, K ) = T( K, K ) - T( KI, KI ) + IF( CABS1( T( K, K ) ).LT.SMIN ) + $ T( K, K ) = SMIN + 50 CONTINUE +* + IF( KI.GT.1 ) THEN + CALL CLATRS( 'Upper', 'No transpose', 'Non-unit', 'Y', + $ KI-1, T, LDT, WORK( 1 ), SCALE, RWORK, + $ INFO ) + WORK( KI ) = SCALE + END IF +* +* Copy the vector x or Q*x to VR and normalize. +* + IF( .NOT.OVER ) THEN + CALL CCOPY( KI, WORK( 1 ), 1, VR( 1, IS ), 1 ) +* + II = ICAMAX( KI, VR( 1, IS ), 1 ) + REMAX = ONE / CABS1( VR( II, IS ) ) + CALL CSSCAL( KI, REMAX, VR( 1, IS ), 1 ) +* + DO 60 K = KI + 1, N + VR( K, IS ) = CMZERO + 60 CONTINUE + ELSE + IF( KI.GT.1 ) + $ CALL CGEMV( 'N', N, KI-1, CMONE, VR, LDVR, WORK( 1 ), + $ 1, CMPLX( SCALE ), VR( 1, KI ), 1 ) +* + II = ICAMAX( N, VR( 1, KI ), 1 ) + REMAX = ONE / CABS1( VR( II, KI ) ) + CALL CSSCAL( N, REMAX, VR( 1, KI ), 1 ) + END IF +* +* Set back the original diagonal elements of T. +* + DO 70 K = 1, KI - 1 + T( K, K ) = WORK( K+N ) + 70 CONTINUE +* + IS = IS - 1 + 80 CONTINUE + END IF +* + IF( LEFTV ) THEN +* +* Compute left eigenvectors. +* + IS = 1 + DO 130 KI = 1, N +* + IF( SOMEV ) THEN + IF( .NOT.SELECT( KI ) ) + $ GO TO 130 + END IF + SMIN = MAX( ULP*( CABS1( T( KI, KI ) ) ), SMLNUM ) +* + WORK( N ) = CMONE +* +* Form right-hand side. +* + DO 90 K = KI + 1, N + WORK( K ) = -CONJG( T( KI, K ) ) + 90 CONTINUE +* +* Solve the triangular system: +* (T(KI+1:N,KI+1:N) - T(KI,KI))'*X = SCALE*WORK. +* + DO 100 K = KI + 1, N + T( K, K ) = T( K, K ) - T( KI, KI ) + IF( CABS1( T( K, K ) ).LT.SMIN ) + $ T( K, K ) = SMIN + 100 CONTINUE +* + IF( KI.LT.N ) THEN + CALL CLATRS( 'Upper', 'Conjugate transpose', 'Non-unit', + $ 'Y', N-KI, T( KI+1, KI+1 ), LDT, + $ WORK( KI+1 ), SCALE, RWORK, INFO ) + WORK( KI ) = SCALE + END IF +* +* Copy the vector x or Q*x to VL and normalize. +* + IF( .NOT.OVER ) THEN + CALL CCOPY( N-KI+1, WORK( KI ), 1, VL( KI, IS ), 1 ) +* + II = ICAMAX( N-KI+1, VL( KI, IS ), 1 ) + KI - 1 + REMAX = ONE / CABS1( VL( II, IS ) ) + CALL CSSCAL( N-KI+1, REMAX, VL( KI, IS ), 1 ) +* + DO 110 K = 1, KI - 1 + VL( K, IS ) = CMZERO + 110 CONTINUE + ELSE + IF( KI.LT.N ) + $ CALL CGEMV( 'N', N, N-KI, CMONE, VL( 1, KI+1 ), LDVL, + $ WORK( KI+1 ), 1, CMPLX( SCALE ), + $ VL( 1, KI ), 1 ) +* + II = ICAMAX( N, VL( 1, KI ), 1 ) + REMAX = ONE / CABS1( VL( II, KI ) ) + CALL CSSCAL( N, REMAX, VL( 1, KI ), 1 ) + END IF +* +* Set back the original diagonal elements of T. +* + DO 120 K = KI + 1, N + T( K, K ) = WORK( K+N ) + 120 CONTINUE +* + IS = IS + 1 + 130 CONTINUE + END IF +* + RETURN +* +* End of CTREVC +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/ctrexc.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/ctrexc.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,161 @@ + SUBROUTINE CTREXC( COMPQ, N, T, LDT, Q, LDQ, IFST, ILST, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER COMPQ + INTEGER IFST, ILST, INFO, LDQ, LDT, N +* .. +* .. Array Arguments .. + COMPLEX Q( LDQ, * ), T( LDT, * ) +* .. +* +* Purpose +* ======= +* +* CTREXC reorders the Schur factorization of a complex matrix +* A = Q*T*Q**H, so that the diagonal element of T with row index IFST +* is moved to row ILST. +* +* The Schur form T is reordered by a unitary similarity transformation +* Z**H*T*Z, and optionally the matrix Q of Schur vectors is updated by +* postmultplying it with Z. +* +* Arguments +* ========= +* +* COMPQ (input) CHARACTER*1 +* = 'V': update the matrix Q of Schur vectors; +* = 'N': do not update Q. +* +* N (input) INTEGER +* The order of the matrix T. N >= 0. +* +* T (input/output) COMPLEX array, dimension (LDT,N) +* On entry, the upper triangular matrix T. +* On exit, the reordered upper triangular matrix. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= max(1,N). +* +* Q (input/output) COMPLEX array, dimension (LDQ,N) +* On entry, if COMPQ = 'V', the matrix Q of Schur vectors. +* On exit, if COMPQ = 'V', Q has been postmultiplied by the +* unitary transformation matrix Z which reorders T. +* If COMPQ = 'N', Q is not referenced. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= max(1,N). +* +* IFST (input) INTEGER +* ILST (input) INTEGER +* Specify the reordering of the diagonal elements of T: +* The element with row index IFST is moved to row ILST by a +* sequence of transpositions between adjacent elements. +* 1 <= IFST <= N; 1 <= ILST <= N. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL WANTQ + INTEGER K, M1, M2, M3 + REAL CS + COMPLEX SN, T11, T22, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLARTG, CROT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters. +* + INFO = 0 + WANTQ = LSAME( COMPQ, 'V' ) + IF( .NOT.LSAME( COMPQ, 'N' ) .AND. .NOT.WANTQ ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDT.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.MAX( 1, N ) ) ) THEN + INFO = -6 + ELSE IF( IFST.LT.1 .OR. IFST.GT.N ) THEN + INFO = -7 + ELSE IF( ILST.LT.1 .OR. ILST.GT.N ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTREXC', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.1 .OR. IFST.EQ.ILST ) + $ RETURN +* + IF( IFST.LT.ILST ) THEN +* +* Move the IFST-th diagonal element forward down the diagonal. +* + M1 = 0 + M2 = -1 + M3 = 1 + ELSE +* +* Move the IFST-th diagonal element backward up the diagonal. +* + M1 = -1 + M2 = 0 + M3 = -1 + END IF +* + DO 10 K = IFST + M1, ILST + M2, M3 +* +* Interchange the k-th and (k+1)-th diagonal elements. +* + T11 = T( K, K ) + T22 = T( K+1, K+1 ) +* +* Determine the transformation to perform the interchange. +* + CALL CLARTG( T( K, K+1 ), T22-T11, CS, SN, TEMP ) +* +* Apply transformation to the matrix T. +* + IF( K+2.LE.N ) + $ CALL CROT( N-K-1, T( K, K+2 ), LDT, T( K+1, K+2 ), LDT, CS, + $ SN ) + CALL CROT( K-1, T( 1, K ), 1, T( 1, K+1 ), 1, CS, CONJG( SN ) ) +* + T( K, K ) = T22 + T( K+1, K+1 ) = T11 +* + IF( WANTQ ) THEN +* +* Accumulate transformation in the matrix Q. +* + CALL CROT( N, Q( 1, K ), 1, Q( 1, K+1 ), 1, CS, + $ CONJG( SN ) ) + END IF +* + 10 CONTINUE +* + RETURN +* +* End of CTREXC +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/ctrsen.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/ctrsen.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,359 @@ + SUBROUTINE CTRSEN( JOB, COMPQ, SELECT, N, T, LDT, Q, LDQ, W, M, S, + $ SEP, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* Modified to call CLACN2 in place of CLACON, 10 Feb 03, SJH. +* +* .. Scalar Arguments .. + CHARACTER COMPQ, JOB + INTEGER INFO, LDQ, LDT, LWORK, M, N + REAL S, SEP +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + COMPLEX Q( LDQ, * ), T( LDT, * ), W( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CTRSEN reorders the Schur factorization of a complex matrix +* A = Q*T*Q**H, so that a selected cluster of eigenvalues appears in +* the leading positions on the diagonal of the upper triangular matrix +* T, and the leading columns of Q form an orthonormal basis of the +* corresponding right invariant subspace. +* +* Optionally the routine computes the reciprocal condition numbers of +* the cluster of eigenvalues and/or the invariant subspace. +* +* Arguments +* ========= +* +* JOB (input) CHARACTER*1 +* Specifies whether condition numbers are required for the +* cluster of eigenvalues (S) or the invariant subspace (SEP): +* = 'N': none; +* = 'E': for eigenvalues only (S); +* = 'V': for invariant subspace only (SEP); +* = 'B': for both eigenvalues and invariant subspace (S and +* SEP). +* +* COMPQ (input) CHARACTER*1 +* = 'V': update the matrix Q of Schur vectors; +* = 'N': do not update Q. +* +* SELECT (input) LOGICAL array, dimension (N) +* SELECT specifies the eigenvalues in the selected cluster. To +* select the j-th eigenvalue, SELECT(j) must be set to .TRUE.. +* +* N (input) INTEGER +* The order of the matrix T. N >= 0. +* +* T (input/output) COMPLEX array, dimension (LDT,N) +* On entry, the upper triangular matrix T. +* On exit, T is overwritten by the reordered matrix T, with the +* selected eigenvalues as the leading diagonal elements. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= max(1,N). +* +* Q (input/output) COMPLEX array, dimension (LDQ,N) +* On entry, if COMPQ = 'V', the matrix Q of Schur vectors. +* On exit, if COMPQ = 'V', Q has been postmultiplied by the +* unitary transformation matrix which reorders T; the leading M +* columns of Q form an orthonormal basis for the specified +* invariant subspace. +* If COMPQ = 'N', Q is not referenced. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. +* LDQ >= 1; and if COMPQ = 'V', LDQ >= N. +* +* W (output) COMPLEX array, dimension (N) +* The reordered eigenvalues of T, in the same order as they +* appear on the diagonal of T. +* +* M (output) INTEGER +* The dimension of the specified invariant subspace. +* 0 <= M <= N. +* +* S (output) REAL +* If JOB = 'E' or 'B', S is a lower bound on the reciprocal +* condition number for the selected cluster of eigenvalues. +* S cannot underestimate the true reciprocal condition number +* by more than a factor of sqrt(N). If M = 0 or N, S = 1. +* If JOB = 'N' or 'V', S is not referenced. +* +* SEP (output) REAL +* If JOB = 'V' or 'B', SEP is the estimated reciprocal +* condition number of the specified invariant subspace. If +* M = 0 or N, SEP = norm(T). +* If JOB = 'N' or 'E', SEP is not referenced. +* +* WORK (workspace/output) COMPLEX array, dimension (MAX(1,LWORK)) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If JOB = 'N', LWORK >= 1; +* if JOB = 'E', LWORK = max(1,M*(N-M)); +* if JOB = 'V' or 'B', LWORK >= max(1,2*M*(N-M)). +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* CTRSEN first collects the selected eigenvalues by computing a unitary +* transformation Z to move them to the top left corner of T. In other +* words, the selected eigenvalues are the eigenvalues of T11 in: +* +* Z'*T*Z = ( T11 T12 ) n1 +* ( 0 T22 ) n2 +* n1 n2 +* +* where N = n1+n2 and Z' means the conjugate transpose of Z. The first +* n1 columns of Z span the specified invariant subspace of T. +* +* If T has been obtained from the Schur factorization of a matrix +* A = Q*T*Q', then the reordered Schur factorization of A is given by +* A = (Q*Z)*(Z'*T*Z)*(Q*Z)', and the first n1 columns of Q*Z span the +* corresponding invariant subspace of A. +* +* The reciprocal condition number of the average of the eigenvalues of +* T11 may be returned in S. S lies between 0 (very badly conditioned) +* and 1 (very well conditioned). It is computed as follows. First we +* compute R so that +* +* P = ( I R ) n1 +* ( 0 0 ) n2 +* n1 n2 +* +* is the projector on the invariant subspace associated with T11. +* R is the solution of the Sylvester equation: +* +* T11*R - R*T22 = T12. +* +* Let F-norm(M) denote the Frobenius-norm of M and 2-norm(M) denote +* the two-norm of M. Then S is computed as the lower bound +* +* (1 + F-norm(R)**2)**(-1/2) +* +* on the reciprocal of 2-norm(P), the true reciprocal condition number. +* S cannot underestimate 1 / 2-norm(P) by more than a factor of +* sqrt(N). +* +* An approximate error bound for the computed average of the +* eigenvalues of T11 is +* +* EPS * norm(T) / S +* +* where EPS is the machine precision. +* +* The reciprocal condition number of the right invariant subspace +* spanned by the first n1 columns of Z (or of Q*Z) is returned in SEP. +* SEP is defined as the separation of T11 and T22: +* +* sep( T11, T22 ) = sigma-min( C ) +* +* where sigma-min(C) is the smallest singular value of the +* n1*n2-by-n1*n2 matrix +* +* C = kprod( I(n2), T11 ) - kprod( transpose(T22), I(n1) ) +* +* I(m) is an m by m identity matrix, and kprod denotes the Kronecker +* product. We estimate sigma-min(C) by the reciprocal of an estimate of +* the 1-norm of inverse(C). The true reciprocal 1-norm of inverse(C) +* cannot differ from sigma-min(C) by more than a factor of sqrt(n1*n2). +* +* When SEP is small, small changes in T can cause large changes in +* the invariant subspace. An approximate bound on the maximum angular +* error in the computed right invariant subspace is +* +* EPS * norm(T) / SEP +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WANTBH, WANTQ, WANTS, WANTSP + INTEGER IERR, K, KASE, KS, LWMIN, N1, N2, NN + REAL EST, RNORM, SCALE +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) + REAL RWORK( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + REAL CLANGE + EXTERNAL LSAME, CLANGE +* .. +* .. External Subroutines .. + EXTERNAL CLACN2, CLACPY, CTREXC, CTRSYL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters. +* + WANTBH = LSAME( JOB, 'B' ) + WANTS = LSAME( JOB, 'E' ) .OR. WANTBH + WANTSP = LSAME( JOB, 'V' ) .OR. WANTBH + WANTQ = LSAME( COMPQ, 'V' ) +* +* Set M to the number of selected eigenvalues. +* + M = 0 + DO 10 K = 1, N + IF( SELECT( K ) ) + $ M = M + 1 + 10 CONTINUE +* + N1 = M + N2 = N - M + NN = N1*N2 +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) +* + IF( WANTSP ) THEN + LWMIN = MAX( 1, 2*NN ) + ELSE IF( LSAME( JOB, 'N' ) ) THEN + LWMIN = 1 + ELSE IF( LSAME( JOB, 'E' ) ) THEN + LWMIN = MAX( 1, NN ) + END IF +* + IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.WANTS .AND. .NOT.WANTSP ) + $ THEN + INFO = -1 + ELSE IF( .NOT.LSAME( COMPQ, 'N' ) .AND. .NOT.WANTQ ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDT.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -14 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTRSEN', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.N .OR. M.EQ.0 ) THEN + IF( WANTS ) + $ S = ONE + IF( WANTSP ) + $ SEP = CLANGE( '1', N, N, T, LDT, RWORK ) + GO TO 40 + END IF +* +* Collect the selected eigenvalues at the top left corner of T. +* + KS = 0 + DO 20 K = 1, N + IF( SELECT( K ) ) THEN + KS = KS + 1 +* +* Swap the K-th eigenvalue to position KS. +* + IF( K.NE.KS ) + $ CALL CTREXC( COMPQ, N, T, LDT, Q, LDQ, K, KS, IERR ) + END IF + 20 CONTINUE +* + IF( WANTS ) THEN +* +* Solve the Sylvester equation for R: +* +* T11*R - R*T22 = scale*T12 +* + CALL CLACPY( 'F', N1, N2, T( 1, N1+1 ), LDT, WORK, N1 ) + CALL CTRSYL( 'N', 'N', -1, N1, N2, T, LDT, T( N1+1, N1+1 ), + $ LDT, WORK, N1, SCALE, IERR ) +* +* Estimate the reciprocal of the condition number of the cluster +* of eigenvalues. +* + RNORM = CLANGE( 'F', N1, N2, WORK, N1, RWORK ) + IF( RNORM.EQ.ZERO ) THEN + S = ONE + ELSE + S = SCALE / ( SQRT( SCALE*SCALE / RNORM+RNORM )* + $ SQRT( RNORM ) ) + END IF + END IF +* + IF( WANTSP ) THEN +* +* Estimate sep(T11,T22). +* + EST = ZERO + KASE = 0 + 30 CONTINUE + CALL CLACN2( NN, WORK( NN+1 ), WORK, EST, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Solve T11*R - R*T22 = scale*X. +* + CALL CTRSYL( 'N', 'N', -1, N1, N2, T, LDT, + $ T( N1+1, N1+1 ), LDT, WORK, N1, SCALE, + $ IERR ) + ELSE +* +* Solve T11'*R - R*T22' = scale*X. +* + CALL CTRSYL( 'C', 'C', -1, N1, N2, T, LDT, + $ T( N1+1, N1+1 ), LDT, WORK, N1, SCALE, + $ IERR ) + END IF + GO TO 30 + END IF +* + SEP = SCALE / EST + END IF +* + 40 CONTINUE +* +* Copy reordered eigenvalues to W. +* + DO 50 K = 1, N + W( K ) = T( K, K ) + 50 CONTINUE +* + WORK( 1 ) = LWMIN +* + RETURN +* +* End of CTRSEN +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/ctrsyl.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/ctrsyl.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,365 @@ + SUBROUTINE CTRSYL( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C, + $ LDC, SCALE, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER TRANA, TRANB + INTEGER INFO, ISGN, LDA, LDB, LDC, M, N + REAL SCALE +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* CTRSYL solves the complex Sylvester matrix equation: +* +* op(A)*X + X*op(B) = scale*C or +* op(A)*X - X*op(B) = scale*C, +* +* where op(A) = A or A**H, and A and B are both upper triangular. A is +* M-by-M and B is N-by-N; the right hand side C and the solution X are +* M-by-N; and scale is an output scale factor, set <= 1 to avoid +* overflow in X. +* +* Arguments +* ========= +* +* TRANA (input) CHARACTER*1 +* Specifies the option op(A): +* = 'N': op(A) = A (No transpose) +* = 'C': op(A) = A**H (Conjugate transpose) +* +* TRANB (input) CHARACTER*1 +* Specifies the option op(B): +* = 'N': op(B) = B (No transpose) +* = 'C': op(B) = B**H (Conjugate transpose) +* +* ISGN (input) INTEGER +* Specifies the sign in the equation: +* = +1: solve op(A)*X + X*op(B) = scale*C +* = -1: solve op(A)*X - X*op(B) = scale*C +* +* M (input) INTEGER +* The order of the matrix A, and the number of rows in the +* matrices X and C. M >= 0. +* +* N (input) INTEGER +* The order of the matrix B, and the number of columns in the +* matrices X and C. N >= 0. +* +* A (input) COMPLEX array, dimension (LDA,M) +* The upper triangular matrix A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (input) COMPLEX array, dimension (LDB,N) +* The upper triangular matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* C (input/output) COMPLEX array, dimension (LDC,N) +* On entry, the M-by-N right hand side matrix C. +* On exit, C is overwritten by the solution matrix X. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M) +* +* SCALE (output) REAL +* The scale factor, scale, set <= 1 to avoid overflow in X. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* = 1: A and B have common or very close eigenvalues; perturbed +* values were used to solve the equation (but the matrices +* A and B are unchanged). +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRNA, NOTRNB + INTEGER J, K, L + REAL BIGNUM, DA11, DB, EPS, SCALOC, SGN, SMIN, + $ SMLNUM + COMPLEX A11, SUML, SUMR, VEC, X11 +* .. +* .. Local Arrays .. + REAL DUM( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + REAL CLANGE, SLAMCH + COMPLEX CDOTC, CDOTU, CLADIV + EXTERNAL LSAME, CLANGE, SLAMCH, CDOTC, CDOTU, CLADIV +* .. +* .. External Subroutines .. + EXTERNAL CSSCAL, SLABAD, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, CONJG, MAX, MIN, REAL +* .. +* .. Executable Statements .. +* +* Decode and Test input parameters +* + NOTRNA = LSAME( TRANA, 'N' ) + NOTRNB = LSAME( TRANB, 'N' ) +* + INFO = 0 + IF( .NOT.NOTRNA .AND. .NOT.LSAME( TRANA, 'C' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRNB .AND. .NOT.LSAME( TRANB, 'C' ) ) THEN + INFO = -2 + ELSE IF( ISGN.NE.1 .AND. ISGN.NE.-1 ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTRSYL', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Set constants to control overflow +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) + SMLNUM = SMLNUM*REAL( M*N ) / EPS + BIGNUM = ONE / SMLNUM + SMIN = MAX( SMLNUM, EPS*CLANGE( 'M', M, M, A, LDA, DUM ), + $ EPS*CLANGE( 'M', N, N, B, LDB, DUM ) ) + SCALE = ONE + SGN = ISGN +* + IF( NOTRNA .AND. NOTRNB ) THEN +* +* Solve A*X + ISGN*X*B = scale*C. +* +* The (K,L)th block of X is determined starting from +* bottom-left corner column by column by +* +* A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L) +* +* Where +* M L-1 +* R(K,L) = SUM [A(K,I)*X(I,L)] +ISGN*SUM [X(K,J)*B(J,L)]. +* I=K+1 J=1 +* + DO 30 L = 1, N + DO 20 K = M, 1, -1 +* + SUML = CDOTU( M-K, A( K, MIN( K+1, M ) ), LDA, + $ C( MIN( K+1, M ), L ), 1 ) + SUMR = CDOTU( L-1, C( K, 1 ), LDC, B( 1, L ), 1 ) + VEC = C( K, L ) - ( SUML+SGN*SUMR ) +* + SCALOC = ONE + A11 = A( K, K ) + SGN*B( L, L ) + DA11 = ABS( REAL( A11 ) ) + ABS( AIMAG( A11 ) ) + IF( DA11.LE.SMIN ) THEN + A11 = SMIN + DA11 = SMIN + INFO = 1 + END IF + DB = ABS( REAL( VEC ) ) + ABS( AIMAG( VEC ) ) + IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN + IF( DB.GT.BIGNUM*DA11 ) + $ SCALOC = ONE / DB + END IF + X11 = CLADIV( VEC*CMPLX( SCALOC ), A11 ) +* + IF( SCALOC.NE.ONE ) THEN + DO 10 J = 1, N + CALL CSSCAL( M, SCALOC, C( 1, J ), 1 ) + 10 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K, L ) = X11 +* + 20 CONTINUE + 30 CONTINUE +* + ELSE IF( .NOT.NOTRNA .AND. NOTRNB ) THEN +* +* Solve A' *X + ISGN*X*B = scale*C. +* +* The (K,L)th block of X is determined starting from +* upper-left corner column by column by +* +* A'(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L) +* +* Where +* K-1 L-1 +* R(K,L) = SUM [A'(I,K)*X(I,L)] + ISGN*SUM [X(K,J)*B(J,L)] +* I=1 J=1 +* + DO 60 L = 1, N + DO 50 K = 1, M +* + SUML = CDOTC( K-1, A( 1, K ), 1, C( 1, L ), 1 ) + SUMR = CDOTU( L-1, C( K, 1 ), LDC, B( 1, L ), 1 ) + VEC = C( K, L ) - ( SUML+SGN*SUMR ) +* + SCALOC = ONE + A11 = CONJG( A( K, K ) ) + SGN*B( L, L ) + DA11 = ABS( REAL( A11 ) ) + ABS( AIMAG( A11 ) ) + IF( DA11.LE.SMIN ) THEN + A11 = SMIN + DA11 = SMIN + INFO = 1 + END IF + DB = ABS( REAL( VEC ) ) + ABS( AIMAG( VEC ) ) + IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN + IF( DB.GT.BIGNUM*DA11 ) + $ SCALOC = ONE / DB + END IF +* + X11 = CLADIV( VEC*CMPLX( SCALOC ), A11 ) +* + IF( SCALOC.NE.ONE ) THEN + DO 40 J = 1, N + CALL CSSCAL( M, SCALOC, C( 1, J ), 1 ) + 40 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K, L ) = X11 +* + 50 CONTINUE + 60 CONTINUE +* + ELSE IF( .NOT.NOTRNA .AND. .NOT.NOTRNB ) THEN +* +* Solve A'*X + ISGN*X*B' = C. +* +* The (K,L)th block of X is determined starting from +* upper-right corner column by column by +* +* A'(K,K)*X(K,L) + ISGN*X(K,L)*B'(L,L) = C(K,L) - R(K,L) +* +* Where +* K-1 +* R(K,L) = SUM [A'(I,K)*X(I,L)] + +* I=1 +* N +* ISGN*SUM [X(K,J)*B'(L,J)]. +* J=L+1 +* + DO 90 L = N, 1, -1 + DO 80 K = 1, M +* + SUML = CDOTC( K-1, A( 1, K ), 1, C( 1, L ), 1 ) + SUMR = CDOTC( N-L, C( K, MIN( L+1, N ) ), LDC, + $ B( L, MIN( L+1, N ) ), LDB ) + VEC = C( K, L ) - ( SUML+SGN*CONJG( SUMR ) ) +* + SCALOC = ONE + A11 = CONJG( A( K, K )+SGN*B( L, L ) ) + DA11 = ABS( REAL( A11 ) ) + ABS( AIMAG( A11 ) ) + IF( DA11.LE.SMIN ) THEN + A11 = SMIN + DA11 = SMIN + INFO = 1 + END IF + DB = ABS( REAL( VEC ) ) + ABS( AIMAG( VEC ) ) + IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN + IF( DB.GT.BIGNUM*DA11 ) + $ SCALOC = ONE / DB + END IF +* + X11 = CLADIV( VEC*CMPLX( SCALOC ), A11 ) +* + IF( SCALOC.NE.ONE ) THEN + DO 70 J = 1, N + CALL CSSCAL( M, SCALOC, C( 1, J ), 1 ) + 70 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K, L ) = X11 +* + 80 CONTINUE + 90 CONTINUE +* + ELSE IF( NOTRNA .AND. .NOT.NOTRNB ) THEN +* +* Solve A*X + ISGN*X*B' = C. +* +* The (K,L)th block of X is determined starting from +* bottom-left corner column by column by +* +* A(K,K)*X(K,L) + ISGN*X(K,L)*B'(L,L) = C(K,L) - R(K,L) +* +* Where +* M N +* R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B'(L,J)] +* I=K+1 J=L+1 +* + DO 120 L = N, 1, -1 + DO 110 K = M, 1, -1 +* + SUML = CDOTU( M-K, A( K, MIN( K+1, M ) ), LDA, + $ C( MIN( K+1, M ), L ), 1 ) + SUMR = CDOTC( N-L, C( K, MIN( L+1, N ) ), LDC, + $ B( L, MIN( L+1, N ) ), LDB ) + VEC = C( K, L ) - ( SUML+SGN*CONJG( SUMR ) ) +* + SCALOC = ONE + A11 = A( K, K ) + SGN*CONJG( B( L, L ) ) + DA11 = ABS( REAL( A11 ) ) + ABS( AIMAG( A11 ) ) + IF( DA11.LE.SMIN ) THEN + A11 = SMIN + DA11 = SMIN + INFO = 1 + END IF + DB = ABS( REAL( VEC ) ) + ABS( AIMAG( VEC ) ) + IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN + IF( DB.GT.BIGNUM*DA11 ) + $ SCALOC = ONE / DB + END IF +* + X11 = CLADIV( VEC*CMPLX( SCALOC ), A11 ) +* + IF( SCALOC.NE.ONE ) THEN + DO 100 J = 1, N + CALL CSSCAL( M, SCALOC, C( 1, J ), 1 ) + 100 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K, L ) = X11 +* + 110 CONTINUE + 120 CONTINUE +* + END IF +* + RETURN +* +* End of CTRSYL +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/ctrti2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/ctrti2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,146 @@ + SUBROUTINE CTRTI2( UPLO, DIAG, N, A, LDA, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER DIAG, UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* CTRTI2 computes the inverse of a complex upper or lower triangular +* matrix. +* +* This is the Level 2 BLAS version of the algorithm. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the matrix A is upper or lower triangular. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* DIAG (input) CHARACTER*1 +* Specifies whether or not the matrix A is unit triangular. +* = 'N': Non-unit triangular +* = 'U': Unit triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the triangular matrix A. If UPLO = 'U', the +* leading n by n upper triangular part of the array A contains +* the upper triangular matrix, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading n by n lower triangular part of the array A contains +* the lower triangular matrix, and the strictly upper +* triangular part of A is not referenced. If DIAG = 'U', the +* diagonal elements of A are also not referenced and are +* assumed to be 1. +* +* On exit, the (triangular) inverse of the original matrix, in +* the same storage format. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, UPPER + INTEGER J + COMPLEX AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CSCAL, CTRMV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOUNIT = LSAME( DIAG, 'N' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTRTI2', -INFO ) + RETURN + END IF +* + IF( UPPER ) THEN +* +* Compute inverse of upper triangular matrix. +* + DO 10 J = 1, N + IF( NOUNIT ) THEN + A( J, J ) = ONE / A( J, J ) + AJJ = -A( J, J ) + ELSE + AJJ = -ONE + END IF +* +* Compute elements 1:j-1 of j-th column. +* + CALL CTRMV( 'Upper', 'No transpose', DIAG, J-1, A, LDA, + $ A( 1, J ), 1 ) + CALL CSCAL( J-1, AJJ, A( 1, J ), 1 ) + 10 CONTINUE + ELSE +* +* Compute inverse of lower triangular matrix. +* + DO 20 J = N, 1, -1 + IF( NOUNIT ) THEN + A( J, J ) = ONE / A( J, J ) + AJJ = -A( J, J ) + ELSE + AJJ = -ONE + END IF + IF( J.LT.N ) THEN +* +* Compute elements j+1:n of j-th column. +* + CALL CTRMV( 'Lower', 'No transpose', DIAG, N-J, + $ A( J+1, J+1 ), LDA, A( J+1, J ), 1 ) + CALL CSCAL( N-J, AJJ, A( J+1, J ), 1 ) + END IF + 20 CONTINUE + END IF +* + RETURN +* +* End of CTRTI2 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/ctrtri.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/ctrtri.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,177 @@ + SUBROUTINE CTRTRI( UPLO, DIAG, N, A, LDA, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER DIAG, UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* CTRTRI computes the inverse of a complex upper or lower triangular +* matrix A. +* +* This is the Level 3 BLAS version of the algorithm. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': A is upper triangular; +* = 'L': A is lower triangular. +* +* DIAG (input) CHARACTER*1 +* = 'N': A is non-unit triangular; +* = 'U': A is unit triangular. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the triangular matrix A. If UPLO = 'U', the +* leading N-by-N upper triangular part of the array A contains +* the upper triangular matrix, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading N-by-N lower triangular part of the array A contains +* the lower triangular matrix, and the strictly upper +* triangular part of A is not referenced. If DIAG = 'U', the +* diagonal elements of A are also not referenced and are +* assumed to be 1. +* On exit, the (triangular) inverse of the original matrix, in +* the same storage format. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, A(i,i) is exactly zero. The triangular +* matrix is singular and its inverse can not be computed. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, UPPER + INTEGER J, JB, NB, NN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CTRMM, CTRSM, CTRTI2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOUNIT = LSAME( DIAG, 'N' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTRTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check for singularity if non-unit. +* + IF( NOUNIT ) THEN + DO 10 INFO = 1, N + IF( A( INFO, INFO ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + INFO = 0 + END IF +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'CTRTRI', UPLO // DIAG, N, -1, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code +* + CALL CTRTI2( UPLO, DIAG, N, A, LDA, INFO ) + ELSE +* +* Use blocked code +* + IF( UPPER ) THEN +* +* Compute inverse of upper triangular matrix +* + DO 20 J = 1, N, NB + JB = MIN( NB, N-J+1 ) +* +* Compute rows 1:j-1 of current block column +* + CALL CTRMM( 'Left', 'Upper', 'No transpose', DIAG, J-1, + $ JB, ONE, A, LDA, A( 1, J ), LDA ) + CALL CTRSM( 'Right', 'Upper', 'No transpose', DIAG, J-1, + $ JB, -ONE, A( J, J ), LDA, A( 1, J ), LDA ) +* +* Compute inverse of current diagonal block +* + CALL CTRTI2( 'Upper', DIAG, JB, A( J, J ), LDA, INFO ) + 20 CONTINUE + ELSE +* +* Compute inverse of lower triangular matrix +* + NN = ( ( N-1 ) / NB )*NB + 1 + DO 30 J = NN, 1, -NB + JB = MIN( NB, N-J+1 ) + IF( J+JB.LE.N ) THEN +* +* Compute rows j+jb:n of current block column +* + CALL CTRMM( 'Left', 'Lower', 'No transpose', DIAG, + $ N-J-JB+1, JB, ONE, A( J+JB, J+JB ), LDA, + $ A( J+JB, J ), LDA ) + CALL CTRSM( 'Right', 'Lower', 'No transpose', DIAG, + $ N-J-JB+1, JB, -ONE, A( J, J ), LDA, + $ A( J+JB, J ), LDA ) + END IF +* +* Compute inverse of current diagonal block +* + CALL CTRTI2( 'Lower', DIAG, JB, A( J, J ), LDA, INFO ) + 30 CONTINUE + END IF + END IF +* + RETURN +* +* End of CTRTRI +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/ctrtrs.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/ctrtrs.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,148 @@ + SUBROUTINE CTRTRS( UPLO, TRANS, DIAG, N, NRHS, A, LDA, B, LDB, + $ INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER DIAG, TRANS, UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* CTRTRS solves a triangular system of the form +* +* A * X = B, A**T * X = B, or A**H * X = B, +* +* where A is a triangular matrix of order N, and B is an N-by-NRHS +* matrix. A check is made to verify that A is nonsingular. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': A is upper triangular; +* = 'L': A is lower triangular. +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose) +* +* DIAG (input) CHARACTER*1 +* = 'N': A is non-unit triangular; +* = 'U': A is unit triangular. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* A (input) COMPLEX array, dimension (LDA,N) +* The triangular matrix A. If UPLO = 'U', the leading N-by-N +* upper triangular part of the array A contains the upper +* triangular matrix, and the strictly lower triangular part of +* A is not referenced. If UPLO = 'L', the leading N-by-N lower +* triangular part of the array A contains the lower triangular +* matrix, and the strictly upper triangular part of A is not +* referenced. If DIAG = 'U', the diagonal elements of A are +* also not referenced and are assumed to be 1. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) COMPLEX array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, if INFO = 0, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the i-th diagonal element of A is zero, +* indicating that the matrix is singular and the solutions +* X have not been computed. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), + $ ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOUNIT = LSAME( DIAG, 'N' ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT. + $ LSAME( TRANS, 'T' ) .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTRTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check for singularity. +* + IF( NOUNIT ) THEN + DO 10 INFO = 1, N + IF( A( INFO, INFO ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + END IF + INFO = 0 +* +* Solve A * x = b, A**T * x = b, or A**H * x = b. +* + CALL CTRSM( 'Left', UPLO, TRANS, DIAG, N, NRHS, ONE, A, LDA, B, + $ LDB ) +* + RETURN +* +* End of CTRTRS +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/ctzrzf.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/ctzrzf.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,246 @@ + SUBROUTINE CTZRZF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CTZRZF reduces the M-by-N ( M<=N ) complex upper trapezoidal matrix A +* to upper triangular form by means of unitary transformations. +* +* The upper trapezoidal matrix A is factored as +* +* A = ( R 0 ) * Z, +* +* where Z is an N-by-N unitary matrix and R is an M-by-M upper +* triangular matrix. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= M. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the leading M-by-N upper trapezoidal part of the +* array A must contain the matrix to be factorized. +* On exit, the leading M-by-M upper triangular part of A +* contains the upper triangular matrix R, and elements M+1 to +* N of the first M rows of A, with the array TAU, represent the +* unitary matrix Z as a product of M elementary reflectors. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) COMPLEX array, dimension (M) +* The scalar factors of the elementary reflectors. +* +* WORK (workspace/output) COMPLEX array, dimension (MAX(1,LWORK)) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,M). +* For optimum performance LWORK >= M*NB, where NB is +* the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* Based on contributions by +* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +* The factorization is obtained by Householder's method. The kth +* transformation matrix, Z( k ), which is used to introduce zeros into +* the ( m - k + 1 )th row of A, is given in the form +* +* Z( k ) = ( I 0 ), +* ( 0 T( k ) ) +* +* where +* +* T( k ) = I - tau*u( k )*u( k )', u( k ) = ( 1 ), +* ( 0 ) +* ( z( k ) ) +* +* tau is a scalar and z( k ) is an ( n - m ) element vector. +* tau and z( k ) are chosen to annihilate the elements of the kth row +* of X. +* +* The scalar tau is returned in the kth element of TAU and the vector +* u( k ) in the kth row of A, such that the elements of z( k ) are +* in a( k, m + 1 ), ..., a( k, n ). The elements of R are returned in +* the upper triangular part of A. +* +* Z is given by +* +* Z = Z( 1 ) * Z( 2 ) * ... * Z( m ). +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IWS, KI, KK, LDWORK, LWKOPT, M1, MU, NB, + $ NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL CLARZB, CLARZT, CLATRZ, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( M.EQ.0 .OR. M.EQ.N ) THEN + LWKOPT = 1 + ELSE +* +* Determine the block size. +* + NB = ILAENV( 1, 'CGERQF', ' ', M, N, -1, -1 ) + LWKOPT = M*NB + END IF + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTZRZF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 ) THEN + RETURN + ELSE IF( M.EQ.N ) THEN + DO 10 I = 1, N + TAU( I ) = ZERO + 10 CONTINUE + RETURN + END IF +* + NBMIN = 2 + NX = 1 + IWS = M + IF( NB.GT.1 .AND. NB.LT.M ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'CGERQF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.M ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = M + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'CGERQF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.M .AND. NX.LT.M ) THEN +* +* Use blocked code initially. +* The last kk rows are handled by the block method. +* + M1 = MIN( M+1, N ) + KI = ( ( M-NX-1 ) / NB )*NB + KK = MIN( M, KI+NB ) +* + DO 20 I = M - KK + KI + 1, M - KK + 1, -NB + IB = MIN( M-I+1, NB ) +* +* Compute the TZ factorization of the current block +* A(i:i+ib-1,i:n) +* + CALL CLATRZ( IB, N-I+1, N-M, A( I, I ), LDA, TAU( I ), + $ WORK ) + IF( I.GT.1 ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL CLARZT( 'Backward', 'Rowwise', N-M, IB, A( I, M1 ), + $ LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(1:i-1,i:n) from the right +* + CALL CLARZB( 'Right', 'No transpose', 'Backward', + $ 'Rowwise', I-1, N-I+1, IB, N-M, A( I, M1 ), + $ LDA, WORK, LDWORK, A( 1, I ), LDA, + $ WORK( IB+1 ), LDWORK ) + END IF + 20 CONTINUE + MU = I + NB - 1 + ELSE + MU = M + END IF +* +* Use unblocked code to factor the last or only block +* + IF( MU.GT.0 ) + $ CALL CLATRZ( MU, N, N-M, A, LDA, TAU, WORK ) +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of CTZRZF +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cung2l.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cung2l.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,128 @@ + SUBROUTINE CUNG2L( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CUNG2L generates an m by n complex matrix Q with orthonormal columns, +* which is defined as the last n columns of a product of k elementary +* reflectors of order m +* +* Q = H(k) . . . H(2) H(1) +* +* as returned by CGEQLF. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix Q. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix Q. M >= N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines the +* matrix Q. N >= K >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the (n-k+i)-th column must contain the vector which +* defines the elementary reflector H(i), for i = 1,2,...,k, as +* returned by CGEQLF in the last k columns of its array +* argument A. +* On exit, the m-by-n matrix Q. +* +* LDA (input) INTEGER +* The first dimension of the array A. LDA >= max(1,M). +* +* TAU (input) COMPLEX array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by CGEQLF. +* +* WORK (workspace) COMPLEX array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument has an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, II, J, L +* .. +* .. External Subroutines .. + EXTERNAL CLARF, CSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.GT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNG2L', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* +* Initialise columns 1:n-k to columns of the unit matrix +* + DO 20 J = 1, N - K + DO 10 L = 1, M + A( L, J ) = ZERO + 10 CONTINUE + A( M-N+J, J ) = ONE + 20 CONTINUE +* + DO 40 I = 1, K + II = N - K + I +* +* Apply H(i) to A(1:m-k+i,1:n-k+i) from the left +* + A( M-N+II, II ) = ONE + CALL CLARF( 'Left', M-N+II, II-1, A( 1, II ), 1, TAU( I ), A, + $ LDA, WORK ) + CALL CSCAL( M-N+II-1, -TAU( I ), A( 1, II ), 1 ) + A( M-N+II, II ) = ONE - TAU( I ) +* +* Set A(m-k+i+1:m,n-k+i) to zero +* + DO 30 L = M - N + II + 1, M + A( L, II ) = ZERO + 30 CONTINUE + 40 CONTINUE + RETURN +* +* End of CUNG2L +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cung2r.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cung2r.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,130 @@ + SUBROUTINE CUNG2R( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CUNG2R generates an m by n complex matrix Q with orthonormal columns, +* which is defined as the first n columns of a product of k elementary +* reflectors of order m +* +* Q = H(1) H(2) . . . H(k) +* +* as returned by CGEQRF. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix Q. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix Q. M >= N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines the +* matrix Q. N >= K >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the i-th column must contain the vector which +* defines the elementary reflector H(i), for i = 1,2,...,k, as +* returned by CGEQRF in the first k columns of its array +* argument A. +* On exit, the m by n matrix Q. +* +* LDA (input) INTEGER +* The first dimension of the array A. LDA >= max(1,M). +* +* TAU (input) COMPLEX array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by CGEQRF. +* +* WORK (workspace) COMPLEX array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument has an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, J, L +* .. +* .. External Subroutines .. + EXTERNAL CLARF, CSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.GT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNG2R', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* +* Initialise columns k+1:n to columns of the unit matrix +* + DO 20 J = K + 1, N + DO 10 L = 1, M + A( L, J ) = ZERO + 10 CONTINUE + A( J, J ) = ONE + 20 CONTINUE +* + DO 40 I = K, 1, -1 +* +* Apply H(i) to A(i:m,i:n) from the left +* + IF( I.LT.N ) THEN + A( I, I ) = ONE + CALL CLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAU( I ), + $ A( I, I+1 ), LDA, WORK ) + END IF + IF( I.LT.M ) + $ CALL CSCAL( M-I, -TAU( I ), A( I+1, I ), 1 ) + A( I, I ) = ONE - TAU( I ) +* +* Set A(1:i-1,i) to zero +* + DO 30 L = 1, I - 1 + A( L, I ) = ZERO + 30 CONTINUE + 40 CONTINUE + RETURN +* +* End of CUNG2R +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cungbr.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cungbr.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,245 @@ + SUBROUTINE CUNGBR( VECT, M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER VECT + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CUNGBR generates one of the complex unitary matrices Q or P**H +* determined by CGEBRD when reducing a complex matrix A to bidiagonal +* form: A = Q * B * P**H. Q and P**H are defined as products of +* elementary reflectors H(i) or G(i) respectively. +* +* If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q +* is of order M: +* if m >= k, Q = H(1) H(2) . . . H(k) and CUNGBR returns the first n +* columns of Q, where m >= n >= k; +* if m < k, Q = H(1) H(2) . . . H(m-1) and CUNGBR returns Q as an +* M-by-M matrix. +* +* If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**H +* is of order N: +* if k < n, P**H = G(k) . . . G(2) G(1) and CUNGBR returns the first m +* rows of P**H, where n >= m >= k; +* if k >= n, P**H = G(n-1) . . . G(2) G(1) and CUNGBR returns P**H as +* an N-by-N matrix. +* +* Arguments +* ========= +* +* VECT (input) CHARACTER*1 +* Specifies whether the matrix Q or the matrix P**H is +* required, as defined in the transformation applied by CGEBRD: +* = 'Q': generate Q; +* = 'P': generate P**H. +* +* M (input) INTEGER +* The number of rows of the matrix Q or P**H to be returned. +* M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix Q or P**H to be returned. +* N >= 0. +* If VECT = 'Q', M >= N >= min(M,K); +* if VECT = 'P', N >= M >= min(N,K). +* +* K (input) INTEGER +* If VECT = 'Q', the number of columns in the original M-by-K +* matrix reduced by CGEBRD. +* If VECT = 'P', the number of rows in the original K-by-N +* matrix reduced by CGEBRD. +* K >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the vectors which define the elementary reflectors, +* as returned by CGEBRD. +* On exit, the M-by-N matrix Q or P**H. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= M. +* +* TAU (input) COMPLEX array, dimension +* (min(M,K)) if VECT = 'Q' +* (min(N,K)) if VECT = 'P' +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i) or G(i), which determines Q or P**H, as +* returned by CGEBRD in its array argument TAUQ or TAUP. +* +* WORK (workspace/output) COMPLEX array, dimension (MAX(1,LWORK)) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,min(M,N)). +* For optimum performance LWORK >= min(M,N)*NB, where NB +* is the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), + $ ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WANTQ + INTEGER I, IINFO, J, LWKOPT, MN, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL CUNGLQ, CUNGQR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + WANTQ = LSAME( VECT, 'Q' ) + MN = MIN( M, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.WANTQ .AND. .NOT.LSAME( VECT, 'P' ) ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 .OR. ( WANTQ .AND. ( N.GT.M .OR. N.LT.MIN( M, + $ K ) ) ) .OR. ( .NOT.WANTQ .AND. ( M.GT.N .OR. M.LT. + $ MIN( N, K ) ) ) ) THEN + INFO = -3 + ELSE IF( K.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LWORK.LT.MAX( 1, MN ) .AND. .NOT.LQUERY ) THEN + INFO = -9 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( WANTQ ) THEN + NB = ILAENV( 1, 'CUNGQR', ' ', M, N, K, -1 ) + ELSE + NB = ILAENV( 1, 'CUNGLQ', ' ', M, N, K, -1 ) + END IF + LWKOPT = MAX( 1, MN )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNGBR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( WANTQ ) THEN +* +* Form Q, determined by a call to CGEBRD to reduce an m-by-k +* matrix +* + IF( M.GE.K ) THEN +* +* If m >= k, assume m >= n >= k +* + CALL CUNGQR( M, N, K, A, LDA, TAU, WORK, LWORK, IINFO ) +* + ELSE +* +* If m < k, assume m = n +* +* Shift the vectors which define the elementary reflectors one +* column to the right, and set the first row and column of Q +* to those of the unit matrix +* + DO 20 J = M, 2, -1 + A( 1, J ) = ZERO + DO 10 I = J + 1, M + A( I, J ) = A( I, J-1 ) + 10 CONTINUE + 20 CONTINUE + A( 1, 1 ) = ONE + DO 30 I = 2, M + A( I, 1 ) = ZERO + 30 CONTINUE + IF( M.GT.1 ) THEN +* +* Form Q(2:m,2:m) +* + CALL CUNGQR( M-1, M-1, M-1, A( 2, 2 ), LDA, TAU, WORK, + $ LWORK, IINFO ) + END IF + END IF + ELSE +* +* Form P', determined by a call to CGEBRD to reduce a k-by-n +* matrix +* + IF( K.LT.N ) THEN +* +* If k < n, assume k <= m <= n +* + CALL CUNGLQ( M, N, K, A, LDA, TAU, WORK, LWORK, IINFO ) +* + ELSE +* +* If k >= n, assume m = n +* +* Shift the vectors which define the elementary reflectors one +* row downward, and set the first row and column of P' to +* those of the unit matrix +* + A( 1, 1 ) = ONE + DO 40 I = 2, N + A( I, 1 ) = ZERO + 40 CONTINUE + DO 60 J = 2, N + DO 50 I = J - 1, 2, -1 + A( I, J ) = A( I-1, J ) + 50 CONTINUE + A( 1, J ) = ZERO + 60 CONTINUE + IF( N.GT.1 ) THEN +* +* Form P'(2:n,2:n) +* + CALL CUNGLQ( N-1, N-1, N-1, A( 2, 2 ), LDA, TAU, WORK, + $ LWORK, IINFO ) + END IF + END IF + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of CUNGBR +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cunghr.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cunghr.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,165 @@ + SUBROUTINE CUNGHR( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER IHI, ILO, INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CUNGHR generates a complex unitary matrix Q which is defined as the +* product of IHI-ILO elementary reflectors of order N, as returned by +* CGEHRD: +* +* Q = H(ilo) H(ilo+1) . . . H(ihi-1). +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix Q. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* ILO and IHI must have the same values as in the previous call +* of CGEHRD. Q is equal to the unit matrix except in the +* submatrix Q(ilo+1:ihi,ilo+1:ihi). +* 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the vectors which define the elementary reflectors, +* as returned by CGEHRD. +* On exit, the N-by-N unitary matrix Q. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* TAU (input) COMPLEX array, dimension (N-1) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by CGEHRD. +* +* WORK (workspace/output) COMPLEX array, dimension (MAX(1,LWORK)) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= IHI-ILO. +* For optimum performance LWORK >= (IHI-ILO)*NB, where NB is +* the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), + $ ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IINFO, J, LWKOPT, NB, NH +* .. +* .. External Subroutines .. + EXTERNAL CUNGQR, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NH = IHI - ILO + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN + INFO = -2 + ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, NH ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'CUNGQR', ' ', NH, NH, NH, -1 ) + LWKOPT = MAX( 1, NH )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNGHR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* +* Shift the vectors which define the elementary reflectors one +* column to the right, and set the first ilo and the last n-ihi +* rows and columns to those of the unit matrix +* + DO 40 J = IHI, ILO + 1, -1 + DO 10 I = 1, J - 1 + A( I, J ) = ZERO + 10 CONTINUE + DO 20 I = J + 1, IHI + A( I, J ) = A( I, J-1 ) + 20 CONTINUE + DO 30 I = IHI + 1, N + A( I, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + DO 60 J = 1, ILO + DO 50 I = 1, N + A( I, J ) = ZERO + 50 CONTINUE + A( J, J ) = ONE + 60 CONTINUE + DO 80 J = IHI + 1, N + DO 70 I = 1, N + A( I, J ) = ZERO + 70 CONTINUE + A( J, J ) = ONE + 80 CONTINUE +* + IF( NH.GT.0 ) THEN +* +* Generate Q(ilo+1:ihi,ilo+1:ihi) +* + CALL CUNGQR( NH, NH, NH, A( ILO+1, ILO+1 ), LDA, TAU( ILO ), + $ WORK, LWORK, IINFO ) + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of CUNGHR +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cungl2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cungl2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,136 @@ + SUBROUTINE CUNGL2( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CUNGL2 generates an m-by-n complex matrix Q with orthonormal rows, +* which is defined as the first m rows of a product of k elementary +* reflectors of order n +* +* Q = H(k)' . . . H(2)' H(1)' +* +* as returned by CGELQF. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix Q. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix Q. N >= M. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines the +* matrix Q. M >= K >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the i-th row must contain the vector which defines +* the elementary reflector H(i), for i = 1,2,...,k, as returned +* by CGELQF in the first k rows of its array argument A. +* On exit, the m by n matrix Q. +* +* LDA (input) INTEGER +* The first dimension of the array A. LDA >= max(1,M). +* +* TAU (input) COMPLEX array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by CGELQF. +* +* WORK (workspace) COMPLEX array, dimension (M) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument has an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, J, L +* .. +* .. External Subroutines .. + EXTERNAL CLACGV, CLARF, CSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNGL2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.LE.0 ) + $ RETURN +* + IF( K.LT.M ) THEN +* +* Initialise rows k+1:m to rows of the unit matrix +* + DO 20 J = 1, N + DO 10 L = K + 1, M + A( L, J ) = ZERO + 10 CONTINUE + IF( J.GT.K .AND. J.LE.M ) + $ A( J, J ) = ONE + 20 CONTINUE + END IF +* + DO 40 I = K, 1, -1 +* +* Apply H(i)' to A(i:m,i:n) from the right +* + IF( I.LT.N ) THEN + CALL CLACGV( N-I, A( I, I+1 ), LDA ) + IF( I.LT.M ) THEN + A( I, I ) = ONE + CALL CLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, + $ CONJG( TAU( I ) ), A( I+1, I ), LDA, WORK ) + END IF + CALL CSCAL( N-I, -TAU( I ), A( I, I+1 ), LDA ) + CALL CLACGV( N-I, A( I, I+1 ), LDA ) + END IF + A( I, I ) = ONE - CONJG( TAU( I ) ) +* +* Set A(i,1:i-1,i) to zero +* + DO 30 L = 1, I - 1 + A( I, L ) = ZERO + 30 CONTINUE + 40 CONTINUE + RETURN +* +* End of CUNGL2 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cunglq.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cunglq.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,215 @@ + SUBROUTINE CUNGLQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CUNGLQ generates an M-by-N complex matrix Q with orthonormal rows, +* which is defined as the first M rows of a product of K elementary +* reflectors of order N +* +* Q = H(k)' . . . H(2)' H(1)' +* +* as returned by CGELQF. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix Q. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix Q. N >= M. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines the +* matrix Q. M >= K >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the i-th row must contain the vector which defines +* the elementary reflector H(i), for i = 1,2,...,k, as returned +* by CGELQF in the first k rows of its array argument A. +* On exit, the M-by-N matrix Q. +* +* LDA (input) INTEGER +* The first dimension of the array A. LDA >= max(1,M). +* +* TAU (input) COMPLEX array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by CGELQF. +* +* WORK (workspace/output) COMPLEX array, dimension (MAX(1,LWORK)) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,M). +* For optimum performance LWORK >= M*NB, where NB is +* the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit; +* < 0: if INFO = -i, the i-th argument has an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, J, KI, KK, L, LDWORK, + $ LWKOPT, NB, NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL CLARFB, CLARFT, CUNGL2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'CUNGLQ', ' ', M, N, K, -1 ) + LWKOPT = MAX( 1, M )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNGLQ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.LE.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = M + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'CUNGLQ', ' ', M, N, K, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = M + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'CUNGLQ', ' ', M, N, K, -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code after the last block. +* The first kk rows are handled by the block method. +* + KI = ( ( K-NX-1 ) / NB )*NB + KK = MIN( K, KI+NB ) +* +* Set A(kk+1:m,1:kk) to zero. +* + DO 20 J = 1, KK + DO 10 I = KK + 1, M + A( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + KK = 0 + END IF +* +* Use unblocked code for the last or only block. +* + IF( KK.LT.M ) + $ CALL CUNGL2( M-KK, N-KK, K-KK, A( KK+1, KK+1 ), LDA, + $ TAU( KK+1 ), WORK, IINFO ) +* + IF( KK.GT.0 ) THEN +* +* Use blocked code +* + DO 50 I = KI + 1, 1, -NB + IB = MIN( NB, K-I+1 ) + IF( I+IB.LE.M ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL CLARFT( 'Forward', 'Rowwise', N-I+1, IB, A( I, I ), + $ LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H' to A(i+ib:m,i:n) from the right +* + CALL CLARFB( 'Right', 'Conjugate transpose', 'Forward', + $ 'Rowwise', M-I-IB+1, N-I+1, IB, A( I, I ), + $ LDA, WORK, LDWORK, A( I+IB, I ), LDA, + $ WORK( IB+1 ), LDWORK ) + END IF +* +* Apply H' to columns i:n of current block +* + CALL CUNGL2( IB, N-I+1, IB, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) +* +* Set columns 1:i-1 of current block to zero +* + DO 40 J = 1, I - 1 + DO 30 L = I, I + IB - 1 + A( L, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + 50 CONTINUE + END IF +* + WORK( 1 ) = IWS + RETURN +* +* End of CUNGLQ +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cungql.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cungql.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,222 @@ + SUBROUTINE CUNGQL( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CUNGQL generates an M-by-N complex matrix Q with orthonormal columns, +* which is defined as the last N columns of a product of K elementary +* reflectors of order M +* +* Q = H(k) . . . H(2) H(1) +* +* as returned by CGEQLF. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix Q. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix Q. M >= N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines the +* matrix Q. N >= K >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the (n-k+i)-th column must contain the vector which +* defines the elementary reflector H(i), for i = 1,2,...,k, as +* returned by CGEQLF in the last k columns of its array +* argument A. +* On exit, the M-by-N matrix Q. +* +* LDA (input) INTEGER +* The first dimension of the array A. LDA >= max(1,M). +* +* TAU (input) COMPLEX array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by CGEQLF. +* +* WORK (workspace/output) COMPLEX array, dimension (MAX(1,LWORK)) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N). +* For optimum performance LWORK >= N*NB, where NB is the +* optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument has an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, J, KK, L, LDWORK, LWKOPT, + $ NB, NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL CLARFB, CLARFT, CUNG2L, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.GT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.EQ.0 ) THEN + LWKOPT = 1 + ELSE + NB = ILAENV( 1, 'CUNGQL', ' ', M, N, K, -1 ) + LWKOPT = N*NB + END IF + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNGQL', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = N + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'CUNGQL', ' ', M, N, K, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'CUNGQL', ' ', M, N, K, -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code after the first block. +* The last kk columns are handled by the block method. +* + KK = MIN( K, ( ( K-NX+NB-1 ) / NB )*NB ) +* +* Set A(m-kk+1:m,1:n-kk) to zero. +* + DO 20 J = 1, N - KK + DO 10 I = M - KK + 1, M + A( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + KK = 0 + END IF +* +* Use unblocked code for the first or only block. +* + CALL CUNG2L( M-KK, N-KK, K-KK, A, LDA, TAU, WORK, IINFO ) +* + IF( KK.GT.0 ) THEN +* +* Use blocked code +* + DO 50 I = K - KK + 1, K, NB + IB = MIN( NB, K-I+1 ) + IF( N-K+I.GT.1 ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL CLARFT( 'Backward', 'Columnwise', M-K+I+IB-1, IB, + $ A( 1, N-K+I ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(1:m-k+i+ib-1,1:n-k+i-1) from the left +* + CALL CLARFB( 'Left', 'No transpose', 'Backward', + $ 'Columnwise', M-K+I+IB-1, N-K+I-1, IB, + $ A( 1, N-K+I ), LDA, WORK, LDWORK, A, LDA, + $ WORK( IB+1 ), LDWORK ) + END IF +* +* Apply H to rows 1:m-k+i+ib-1 of current block +* + CALL CUNG2L( M-K+I+IB-1, IB, IB, A( 1, N-K+I ), LDA, + $ TAU( I ), WORK, IINFO ) +* +* Set rows m-k+i+ib:m of current block to zero +* + DO 40 J = N - K + I, N - K + I + IB - 1 + DO 30 L = M - K + I + IB, M + A( L, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + 50 CONTINUE + END IF +* + WORK( 1 ) = IWS + RETURN +* +* End of CUNGQL +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cungqr.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cungqr.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,216 @@ + SUBROUTINE CUNGQR( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CUNGQR generates an M-by-N complex matrix Q with orthonormal columns, +* which is defined as the first N columns of a product of K elementary +* reflectors of order M +* +* Q = H(1) H(2) . . . H(k) +* +* as returned by CGEQRF. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix Q. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix Q. M >= N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines the +* matrix Q. N >= K >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the i-th column must contain the vector which +* defines the elementary reflector H(i), for i = 1,2,...,k, as +* returned by CGEQRF in the first k columns of its array +* argument A. +* On exit, the M-by-N matrix Q. +* +* LDA (input) INTEGER +* The first dimension of the array A. LDA >= max(1,M). +* +* TAU (input) COMPLEX array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by CGEQRF. +* +* WORK (workspace/output) COMPLEX array, dimension (MAX(1,LWORK)) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N). +* For optimum performance LWORK >= N*NB, where NB is the +* optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument has an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, J, KI, KK, L, LDWORK, + $ LWKOPT, NB, NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL CLARFB, CLARFT, CUNG2R, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'CUNGQR', ' ', M, N, K, -1 ) + LWKOPT = MAX( 1, N )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.GT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNGQR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = N + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'CUNGQR', ' ', M, N, K, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'CUNGQR', ' ', M, N, K, -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code after the last block. +* The first kk columns are handled by the block method. +* + KI = ( ( K-NX-1 ) / NB )*NB + KK = MIN( K, KI+NB ) +* +* Set A(1:kk,kk+1:n) to zero. +* + DO 20 J = KK + 1, N + DO 10 I = 1, KK + A( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + KK = 0 + END IF +* +* Use unblocked code for the last or only block. +* + IF( KK.LT.N ) + $ CALL CUNG2R( M-KK, N-KK, K-KK, A( KK+1, KK+1 ), LDA, + $ TAU( KK+1 ), WORK, IINFO ) +* + IF( KK.GT.0 ) THEN +* +* Use blocked code +* + DO 50 I = KI + 1, 1, -NB + IB = MIN( NB, K-I+1 ) + IF( I+IB.LE.N ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL CLARFT( 'Forward', 'Columnwise', M-I+1, IB, + $ A( I, I ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(i:m,i+ib:n) from the left +* + CALL CLARFB( 'Left', 'No transpose', 'Forward', + $ 'Columnwise', M-I+1, N-I-IB+1, IB, + $ A( I, I ), LDA, WORK, LDWORK, A( I, I+IB ), + $ LDA, WORK( IB+1 ), LDWORK ) + END IF +* +* Apply H to rows i:m of current block +* + CALL CUNG2R( M-I+1, IB, IB, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) +* +* Set rows 1:i-1 of current block to zero +* + DO 40 J = I, I + IB - 1 + DO 30 L = 1, I - 1 + A( L, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + 50 CONTINUE + END IF +* + WORK( 1 ) = IWS + RETURN +* +* End of CUNGQR +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cungtr.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cungtr.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,184 @@ + SUBROUTINE CUNGTR( UPLO, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CUNGTR generates a complex unitary matrix Q which is defined as the +* product of n-1 elementary reflectors of order N, as returned by +* CHETRD: +* +* if UPLO = 'U', Q = H(n-1) . . . H(2) H(1), +* +* if UPLO = 'L', Q = H(1) H(2) . . . H(n-1). +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A contains elementary reflectors +* from CHETRD; +* = 'L': Lower triangle of A contains elementary reflectors +* from CHETRD. +* +* N (input) INTEGER +* The order of the matrix Q. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the vectors which define the elementary reflectors, +* as returned by CHETRD. +* On exit, the N-by-N unitary matrix Q. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= N. +* +* TAU (input) COMPLEX array, dimension (N-1) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by CHETRD. +* +* WORK (workspace/output) COMPLEX array, dimension (MAX(1,LWORK)) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= N-1. +* For optimum performance LWORK >= (N-1)*NB, where NB is +* the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), + $ ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER I, IINFO, J, LWKOPT, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL CUNGQL, CUNGQR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, N-1 ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF +* + IF( INFO.EQ.0 ) THEN + IF ( UPPER ) THEN + NB = ILAENV( 1, 'CUNGQL', ' ', N-1, N-1, N-1, -1 ) + ELSE + NB = ILAENV( 1, 'CUNGQR', ' ', N-1, N-1, N-1, -1 ) + END IF + LWKOPT = MAX( 1, N-1 )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNGTR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( UPPER ) THEN +* +* Q was determined by a call to CHETRD with UPLO = 'U' +* +* Shift the vectors which define the elementary reflectors one +* column to the left, and set the last row and column of Q to +* those of the unit matrix +* + DO 20 J = 1, N - 1 + DO 10 I = 1, J - 1 + A( I, J ) = A( I, J+1 ) + 10 CONTINUE + A( N, J ) = ZERO + 20 CONTINUE + DO 30 I = 1, N - 1 + A( I, N ) = ZERO + 30 CONTINUE + A( N, N ) = ONE +* +* Generate Q(1:n-1,1:n-1) +* + CALL CUNGQL( N-1, N-1, N-1, A, LDA, TAU, WORK, LWORK, IINFO ) +* + ELSE +* +* Q was determined by a call to CHETRD with UPLO = 'L'. +* +* Shift the vectors which define the elementary reflectors one +* column to the right, and set the first row and column of Q to +* those of the unit matrix +* + DO 50 J = N, 2, -1 + A( 1, J ) = ZERO + DO 40 I = J + 1, N + A( I, J ) = A( I, J-1 ) + 40 CONTINUE + 50 CONTINUE + A( 1, 1 ) = ONE + DO 60 I = 2, N + A( I, 1 ) = ZERO + 60 CONTINUE + IF( N.GT.1 ) THEN +* +* Generate Q(2:n,2:n) +* + CALL CUNGQR( N-1, N-1, N-1, A( 2, 2 ), LDA, TAU, WORK, + $ LWORK, IINFO ) + END IF + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of CUNGTR +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cunm2r.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cunm2r.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,201 @@ + SUBROUTINE CUNM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CUNM2R overwrites the general complex m-by-n matrix C with +* +* Q * C if SIDE = 'L' and TRANS = 'N', or +* +* Q'* C if SIDE = 'L' and TRANS = 'C', or +* +* C * Q if SIDE = 'R' and TRANS = 'N', or +* +* C * Q' if SIDE = 'R' and TRANS = 'C', +* +* where Q is a complex unitary matrix defined as the product of k +* elementary reflectors +* +* Q = H(1) H(2) . . . H(k) +* +* as returned by CGEQRF. Q is of order m if SIDE = 'L' and of order n +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q' from the Left +* = 'R': apply Q or Q' from the Right +* +* TRANS (input) CHARACTER*1 +* = 'N': apply Q (No transpose) +* = 'C': apply Q' (Conjugate transpose) +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* A (input) COMPLEX array, dimension (LDA,K) +* The i-th column must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* CGEQRF in the first k columns of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. +* If SIDE = 'L', LDA >= max(1,M); +* if SIDE = 'R', LDA >= max(1,N). +* +* TAU (input) COMPLEX array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by CGEQRF. +* +* C (input/output) COMPLEX array, dimension (LDC,N) +* On entry, the m-by-n matrix C. +* On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace) COMPLEX array, dimension +* (N) if SIDE = 'L', +* (M) if SIDE = 'R' +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, NOTRAN + INTEGER I, I1, I2, I3, IC, JC, MI, NI, NQ + COMPLEX AII, TAUI +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLARF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNM2R', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) + $ RETURN +* + IF( ( LEFT .AND. .NOT.NOTRAN .OR. .NOT.LEFT .AND. NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = 1 + ELSE + I1 = K + I2 = 1 + I3 = -1 + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) or H(i)' is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H(i) or H(i)' is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H(i) or H(i)' +* + IF( NOTRAN ) THEN + TAUI = TAU( I ) + ELSE + TAUI = CONJG( TAU( I ) ) + END IF + AII = A( I, I ) + A( I, I ) = ONE + CALL CLARF( SIDE, MI, NI, A( I, I ), 1, TAUI, C( IC, JC ), LDC, + $ WORK ) + A( I, I ) = AII + 10 CONTINUE + RETURN +* +* End of CUNM2R +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cunmbr.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cunmbr.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,289 @@ + SUBROUTINE CUNMBR( VECT, SIDE, TRANS, M, N, K, A, LDA, TAU, C, + $ LDC, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS, VECT + INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), C( LDC, * ), TAU( * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* If VECT = 'Q', CUNMBR overwrites the general complex M-by-N matrix C +* with +* SIDE = 'L' SIDE = 'R' +* TRANS = 'N': Q * C C * Q +* TRANS = 'C': Q**H * C C * Q**H +* +* If VECT = 'P', CUNMBR overwrites the general complex M-by-N matrix C +* with +* SIDE = 'L' SIDE = 'R' +* TRANS = 'N': P * C C * P +* TRANS = 'C': P**H * C C * P**H +* +* Here Q and P**H are the unitary matrices determined by CGEBRD when +* reducing a complex matrix A to bidiagonal form: A = Q * B * P**H. Q +* and P**H are defined as products of elementary reflectors H(i) and +* G(i) respectively. +* +* Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the +* order of the unitary matrix Q or P**H that is applied. +* +* If VECT = 'Q', A is assumed to have been an NQ-by-K matrix: +* if nq >= k, Q = H(1) H(2) . . . H(k); +* if nq < k, Q = H(1) H(2) . . . H(nq-1). +* +* If VECT = 'P', A is assumed to have been a K-by-NQ matrix: +* if k < nq, P = G(1) G(2) . . . G(k); +* if k >= nq, P = G(1) G(2) . . . G(nq-1). +* +* Arguments +* ========= +* +* VECT (input) CHARACTER*1 +* = 'Q': apply Q or Q**H; +* = 'P': apply P or P**H. +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q, Q**H, P or P**H from the Left; +* = 'R': apply Q, Q**H, P or P**H from the Right. +* +* TRANS (input) CHARACTER*1 +* = 'N': No transpose, apply Q or P; +* = 'C': Conjugate transpose, apply Q**H or P**H. +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* If VECT = 'Q', the number of columns in the original +* matrix reduced by CGEBRD. +* If VECT = 'P', the number of rows in the original +* matrix reduced by CGEBRD. +* K >= 0. +* +* A (input) COMPLEX array, dimension +* (LDA,min(nq,K)) if VECT = 'Q' +* (LDA,nq) if VECT = 'P' +* The vectors which define the elementary reflectors H(i) and +* G(i), whose products determine the matrices Q and P, as +* returned by CGEBRD. +* +* LDA (input) INTEGER +* The leading dimension of the array A. +* If VECT = 'Q', LDA >= max(1,nq); +* if VECT = 'P', LDA >= max(1,min(nq,K)). +* +* TAU (input) COMPLEX array, dimension (min(nq,K)) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i) or G(i) which determines Q or P, as returned +* by CGEBRD in the array argument TAUQ or TAUP. +* +* C (input/output) COMPLEX array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q +* or P*C or P**H*C or C*P or C*P**H. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace/output) COMPLEX array, dimension (MAX(1,LWORK)) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If SIDE = 'L', LWORK >= max(1,N); +* if SIDE = 'R', LWORK >= max(1,M); +* if N = 0 or M = 0, LWORK >= 1. +* For optimum performance LWORK >= max(1,N*NB) if SIDE = 'L', +* and LWORK >= max(1,M*NB) if SIDE = 'R', where NB is the +* optimal blocksize. (NB = 0 if M = 0 or N = 0.) +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL APPLYQ, LEFT, LQUERY, NOTRAN + CHARACTER TRANST + INTEGER I1, I2, IINFO, LWKOPT, MI, NB, NI, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL CUNMLQ, CUNMQR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + APPLYQ = LSAME( VECT, 'Q' ) + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q or P and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + NW = 0 + END IF + IF( .NOT.APPLYQ .AND. .NOT.LSAME( VECT, 'P' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( K.LT.0 ) THEN + INFO = -6 + ELSE IF( ( APPLYQ .AND. LDA.LT.MAX( 1, NQ ) ) .OR. + $ ( .NOT.APPLYQ .AND. LDA.LT.MAX( 1, MIN( NQ, K ) ) ) ) + $ THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( NW.GT.0 ) THEN + IF( APPLYQ ) THEN + IF( LEFT ) THEN + NB = ILAENV( 1, 'CUNMQR', SIDE // TRANS, M-1, N, M-1, + $ -1 ) + ELSE + NB = ILAENV( 1, 'CUNMQR', SIDE // TRANS, M, N-1, N-1, + $ -1 ) + END IF + ELSE + IF( LEFT ) THEN + NB = ILAENV( 1, 'CUNMLQ', SIDE // TRANS, M-1, N, M-1, + $ -1 ) + ELSE + NB = ILAENV( 1, 'CUNMLQ', SIDE // TRANS, M, N-1, N-1, + $ -1 ) + END IF + END IF + LWKOPT = MAX( 1, NW*NB ) + ELSE + LWKOPT = 1 + END IF + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNMBR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* + IF( APPLYQ ) THEN +* +* Apply Q +* + IF( NQ.GE.K ) THEN +* +* Q was determined by a call to CGEBRD with nq >= k +* + CALL CUNMQR( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, IINFO ) + ELSE IF( NQ.GT.1 ) THEN +* +* Q was determined by a call to CGEBRD with nq < k +* + IF( LEFT ) THEN + MI = M - 1 + NI = N + I1 = 2 + I2 = 1 + ELSE + MI = M + NI = N - 1 + I1 = 1 + I2 = 2 + END IF + CALL CUNMQR( SIDE, TRANS, MI, NI, NQ-1, A( 2, 1 ), LDA, TAU, + $ C( I1, I2 ), LDC, WORK, LWORK, IINFO ) + END IF + ELSE +* +* Apply P +* + IF( NOTRAN ) THEN + TRANST = 'C' + ELSE + TRANST = 'N' + END IF + IF( NQ.GT.K ) THEN +* +* P was determined by a call to CGEBRD with nq > k +* + CALL CUNMLQ( SIDE, TRANST, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, IINFO ) + ELSE IF( NQ.GT.1 ) THEN +* +* P was determined by a call to CGEBRD with nq <= k +* + IF( LEFT ) THEN + MI = M - 1 + NI = N + I1 = 2 + I2 = 1 + ELSE + MI = M + NI = N - 1 + I1 = 1 + I2 = 2 + END IF + CALL CUNMLQ( SIDE, TRANST, MI, NI, NQ-1, A( 1, 2 ), LDA, + $ TAU, C( I1, I2 ), LDC, WORK, LWORK, IINFO ) + END IF + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of CUNMBR +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cunml2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cunml2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,205 @@ + SUBROUTINE CUNML2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CUNML2 overwrites the general complex m-by-n matrix C with +* +* Q * C if SIDE = 'L' and TRANS = 'N', or +* +* Q'* C if SIDE = 'L' and TRANS = 'C', or +* +* C * Q if SIDE = 'R' and TRANS = 'N', or +* +* C * Q' if SIDE = 'R' and TRANS = 'C', +* +* where Q is a complex unitary matrix defined as the product of k +* elementary reflectors +* +* Q = H(k)' . . . H(2)' H(1)' +* +* as returned by CGELQF. Q is of order m if SIDE = 'L' and of order n +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q' from the Left +* = 'R': apply Q or Q' from the Right +* +* TRANS (input) CHARACTER*1 +* = 'N': apply Q (No transpose) +* = 'C': apply Q' (Conjugate transpose) +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* A (input) COMPLEX array, dimension +* (LDA,M) if SIDE = 'L', +* (LDA,N) if SIDE = 'R' +* The i-th row must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* CGELQF in the first k rows of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,K). +* +* TAU (input) COMPLEX array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by CGELQF. +* +* C (input/output) COMPLEX array, dimension (LDC,N) +* On entry, the m-by-n matrix C. +* On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace) COMPLEX array, dimension +* (N) if SIDE = 'L', +* (M) if SIDE = 'R' +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, NOTRAN + INTEGER I, I1, I2, I3, IC, JC, MI, NI, NQ + COMPLEX AII, TAUI +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLACGV, CLARF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNML2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) + $ RETURN +* + IF( ( LEFT .AND. NOTRAN .OR. .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = 1 + ELSE + I1 = K + I2 = 1 + I3 = -1 + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) or H(i)' is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H(i) or H(i)' is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H(i) or H(i)' +* + IF( NOTRAN ) THEN + TAUI = CONJG( TAU( I ) ) + ELSE + TAUI = TAU( I ) + END IF + IF( I.LT.NQ ) + $ CALL CLACGV( NQ-I, A( I, I+1 ), LDA ) + AII = A( I, I ) + A( I, I ) = ONE + CALL CLARF( SIDE, MI, NI, A( I, I ), LDA, TAUI, C( IC, JC ), + $ LDC, WORK ) + A( I, I ) = AII + IF( I.LT.NQ ) + $ CALL CLACGV( NQ-I, A( I, I+1 ), LDA ) + 10 CONTINUE + RETURN +* +* End of CUNML2 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cunmlq.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cunmlq.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,268 @@ + SUBROUTINE CUNMLQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), C( LDC, * ), TAU( * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* CUNMLQ overwrites the general complex M-by-N matrix C with +* +* SIDE = 'L' SIDE = 'R' +* TRANS = 'N': Q * C C * Q +* TRANS = 'C': Q**H * C C * Q**H +* +* where Q is a complex unitary matrix defined as the product of k +* elementary reflectors +* +* Q = H(k)' . . . H(2)' H(1)' +* +* as returned by CGELQF. Q is of order M if SIDE = 'L' and of order N +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q**H from the Left; +* = 'R': apply Q or Q**H from the Right. +* +* TRANS (input) CHARACTER*1 +* = 'N': No transpose, apply Q; +* = 'C': Conjugate transpose, apply Q**H. +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* A (input) COMPLEX array, dimension +* (LDA,M) if SIDE = 'L', +* (LDA,N) if SIDE = 'R' +* The i-th row must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* CGELQF in the first k rows of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,K). +* +* TAU (input) COMPLEX array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by CGELQF. +* +* C (input/output) COMPLEX array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace/output) COMPLEX array, dimension (MAX(1,LWORK)) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If SIDE = 'L', LWORK >= max(1,N); +* if SIDE = 'R', LWORK >= max(1,M). +* For optimum performance LWORK >= N*NB if SIDE 'L', and +* LWORK >= M*NB if SIDE = 'R', where NB is the optimal +* blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT + PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + CHARACTER TRANST + INTEGER I, I1, I2, I3, IB, IC, IINFO, IWS, JC, LDWORK, + $ LWKOPT, MI, NB, NBMIN, NI, NQ, NW +* .. +* .. Local Arrays .. + COMPLEX T( LDT, NBMAX ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CLARFB, CLARFT, CUNML2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size. NB may be at most NBMAX, where NBMAX +* is used to define the local array T. +* + NB = MIN( NBMAX, ILAENV( 1, 'CUNMLQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + LWKOPT = MAX( 1, NW )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNMLQ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + LDWORK = NW + IF( NB.GT.1 .AND. NB.LT.K ) THEN + IWS = NW*NB + IF( LWORK.LT.IWS ) THEN + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'CUNMLQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + END IF + ELSE + IWS = NW + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN +* +* Use unblocked code +* + CALL CUNML2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, + $ IINFO ) + ELSE +* +* Use blocked code +* + IF( ( LEFT .AND. NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = NB + ELSE + I1 = ( ( K-1 ) / NB )*NB + 1 + I2 = 1 + I3 = -NB + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + IF( NOTRAN ) THEN + TRANST = 'C' + ELSE + TRANST = 'N' + END IF +* + DO 10 I = I1, I2, I3 + IB = MIN( NB, K-I+1 ) +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL CLARFT( 'Forward', 'Rowwise', NQ-I+1, IB, A( I, I ), + $ LDA, TAU( I ), T, LDT ) + IF( LEFT ) THEN +* +* H or H' is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H or H' is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H or H' +* + CALL CLARFB( SIDE, TRANST, 'Forward', 'Rowwise', MI, NI, IB, + $ A( I, I ), LDA, T, LDT, C( IC, JC ), LDC, WORK, + $ LDWORK ) + 10 CONTINUE + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of CUNMLQ +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cunmqr.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cunmqr.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,261 @@ + SUBROUTINE CUNMQR( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), C( LDC, * ), TAU( * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* CUNMQR overwrites the general complex M-by-N matrix C with +* +* SIDE = 'L' SIDE = 'R' +* TRANS = 'N': Q * C C * Q +* TRANS = 'C': Q**H * C C * Q**H +* +* where Q is a complex unitary matrix defined as the product of k +* elementary reflectors +* +* Q = H(1) H(2) . . . H(k) +* +* as returned by CGEQRF. Q is of order M if SIDE = 'L' and of order N +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q**H from the Left; +* = 'R': apply Q or Q**H from the Right. +* +* TRANS (input) CHARACTER*1 +* = 'N': No transpose, apply Q; +* = 'C': Conjugate transpose, apply Q**H. +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* A (input) COMPLEX array, dimension (LDA,K) +* The i-th column must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* CGEQRF in the first k columns of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. +* If SIDE = 'L', LDA >= max(1,M); +* if SIDE = 'R', LDA >= max(1,N). +* +* TAU (input) COMPLEX array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by CGEQRF. +* +* C (input/output) COMPLEX array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace/output) COMPLEX array, dimension (MAX(1,LWORK)) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If SIDE = 'L', LWORK >= max(1,N); +* if SIDE = 'R', LWORK >= max(1,M). +* For optimum performance LWORK >= N*NB if SIDE = 'L', and +* LWORK >= M*NB if SIDE = 'R', where NB is the optimal +* blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT + PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + INTEGER I, I1, I2, I3, IB, IC, IINFO, IWS, JC, LDWORK, + $ LWKOPT, MI, NB, NBMIN, NI, NQ, NW +* .. +* .. Local Arrays .. + COMPLEX T( LDT, NBMAX ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CLARFB, CLARFT, CUNM2R, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size. NB may be at most NBMAX, where NBMAX +* is used to define the local array T. +* + NB = MIN( NBMAX, ILAENV( 1, 'CUNMQR', SIDE // TRANS, M, N, K, + $ -1 ) ) + LWKOPT = MAX( 1, NW )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNMQR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + LDWORK = NW + IF( NB.GT.1 .AND. NB.LT.K ) THEN + IWS = NW*NB + IF( LWORK.LT.IWS ) THEN + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'CUNMQR', SIDE // TRANS, M, N, K, + $ -1 ) ) + END IF + ELSE + IWS = NW + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN +* +* Use unblocked code +* + CALL CUNM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, + $ IINFO ) + ELSE +* +* Use blocked code +* + IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = NB + ELSE + I1 = ( ( K-1 ) / NB )*NB + 1 + I2 = 1 + I3 = -NB + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + DO 10 I = I1, I2, I3 + IB = MIN( NB, K-I+1 ) +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL CLARFT( 'Forward', 'Columnwise', NQ-I+1, IB, A( I, I ), + $ LDA, TAU( I ), T, LDT ) + IF( LEFT ) THEN +* +* H or H' is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H or H' is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H or H' +* + CALL CLARFB( SIDE, TRANS, 'Forward', 'Columnwise', MI, NI, + $ IB, A( I, I ), LDA, T, LDT, C( IC, JC ), LDC, + $ WORK, LDWORK ) + 10 CONTINUE + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of CUNMQR +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cunmr3.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cunmr3.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,212 @@ + SUBROUTINE CUNMR3( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC, + $ WORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, L, LDA, LDC, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CUNMR3 overwrites the general complex m by n matrix C with +* +* Q * C if SIDE = 'L' and TRANS = 'N', or +* +* Q'* C if SIDE = 'L' and TRANS = 'C', or +* +* C * Q if SIDE = 'R' and TRANS = 'N', or +* +* C * Q' if SIDE = 'R' and TRANS = 'C', +* +* where Q is a complex unitary matrix defined as the product of k +* elementary reflectors +* +* Q = H(1) H(2) . . . H(k) +* +* as returned by CTZRZF. Q is of order m if SIDE = 'L' and of order n +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q' from the Left +* = 'R': apply Q or Q' from the Right +* +* TRANS (input) CHARACTER*1 +* = 'N': apply Q (No transpose) +* = 'C': apply Q' (Conjugate transpose) +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* L (input) INTEGER +* The number of columns of the matrix A containing +* the meaningful part of the Householder reflectors. +* If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. +* +* A (input) COMPLEX array, dimension +* (LDA,M) if SIDE = 'L', +* (LDA,N) if SIDE = 'R' +* The i-th row must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* CTZRZF in the last k rows of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,K). +* +* TAU (input) COMPLEX array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by CTZRZF. +* +* C (input/output) COMPLEX array, dimension (LDC,N) +* On entry, the m-by-n matrix C. +* On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace) COMPLEX array, dimension +* (N) if SIDE = 'L', +* (M) if SIDE = 'R' +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* Based on contributions by +* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LEFT, NOTRAN + INTEGER I, I1, I2, I3, IC, JA, JC, MI, NI, NQ + COMPLEX TAUI +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLARZ, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( L.LT.0 .OR. ( LEFT .AND. ( L.GT.M ) ) .OR. + $ ( .NOT.LEFT .AND. ( L.GT.N ) ) ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNMR3', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) + $ RETURN +* + IF( ( LEFT .AND. .NOT.NOTRAN .OR. .NOT.LEFT .AND. NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = 1 + ELSE + I1 = K + I2 = 1 + I3 = -1 + END IF +* + IF( LEFT ) THEN + NI = N + JA = M - L + 1 + JC = 1 + ELSE + MI = M + JA = N - L + 1 + IC = 1 + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) or H(i)' is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H(i) or H(i)' is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H(i) or H(i)' +* + IF( NOTRAN ) THEN + TAUI = TAU( I ) + ELSE + TAUI = CONJG( TAU( I ) ) + END IF + CALL CLARZ( SIDE, MI, NI, L, A( I, JA ), LDA, TAUI, + $ C( IC, JC ), LDC, WORK ) +* + 10 CONTINUE +* + RETURN +* +* End of CUNMR3 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/cunmrz.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/cunmrz.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,297 @@ + SUBROUTINE CUNMRZ( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.1.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* January 2007 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, L, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CUNMRZ overwrites the general complex M-by-N matrix C with +* +* SIDE = 'L' SIDE = 'R' +* TRANS = 'N': Q * C C * Q +* TRANS = 'C': Q**H * C C * Q**H +* +* where Q is a complex unitary matrix defined as the product of k +* elementary reflectors +* +* Q = H(1) H(2) . . . H(k) +* +* as returned by CTZRZF. Q is of order M if SIDE = 'L' and of order N +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q**H from the Left; +* = 'R': apply Q or Q**H from the Right. +* +* TRANS (input) CHARACTER*1 +* = 'N': No transpose, apply Q; +* = 'C': Conjugate transpose, apply Q**H. +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* L (input) INTEGER +* The number of columns of the matrix A containing +* the meaningful part of the Householder reflectors. +* If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. +* +* A (input) COMPLEX array, dimension +* (LDA,M) if SIDE = 'L', +* (LDA,N) if SIDE = 'R' +* The i-th row must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* CTZRZF in the last k rows of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,K). +* +* TAU (input) COMPLEX array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by CTZRZF. +* +* C (input/output) COMPLEX array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace/output) COMPLEX array, dimension (MAX(1,LWORK)) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If SIDE = 'L', LWORK >= max(1,N); +* if SIDE = 'R', LWORK >= max(1,M). +* For optimum performance LWORK >= N*NB if SIDE = 'L', and +* LWORK >= M*NB if SIDE = 'R', where NB is the optimal +* blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* Based on contributions by +* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT + PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + CHARACTER TRANST + INTEGER I, I1, I2, I3, IB, IC, IINFO, IWS, JA, JC, + $ LDWORK, LWKOPT, MI, NB, NBMIN, NI, NQ, NW +* .. +* .. Local Arrays .. + COMPLEX T( LDT, NBMAX ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CLARZB, CLARZT, CUNMR3, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = MAX( 1, N ) + ELSE + NQ = N + NW = MAX( 1, M ) + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( L.LT.0 .OR. ( LEFT .AND. ( L.GT.M ) ) .OR. + $ ( .NOT.LEFT .AND. ( L.GT.N ) ) ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + LWKOPT = 1 + ELSE +* +* Determine the block size. NB may be at most NBMAX, where +* NBMAX is used to define the local array T. +* + NB = MIN( NBMAX, ILAENV( 1, 'CUNMRQ', SIDE // TRANS, M, N, + $ K, -1 ) ) + LWKOPT = NW*NB + END IF + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNMRZ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + RETURN + END IF +* +* Determine the block size. NB may be at most NBMAX, where NBMAX +* is used to define the local array T. +* + NB = MIN( NBMAX, ILAENV( 1, 'CUNMRQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + NBMIN = 2 + LDWORK = NW + IF( NB.GT.1 .AND. NB.LT.K ) THEN + IWS = NW*NB + IF( LWORK.LT.IWS ) THEN + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'CUNMRQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + END IF + ELSE + IWS = NW + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN +* +* Use unblocked code +* + CALL CUNMR3( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC, + $ WORK, IINFO ) + ELSE +* +* Use blocked code +* + IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = NB + ELSE + I1 = ( ( K-1 ) / NB )*NB + 1 + I2 = 1 + I3 = -NB + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + JA = M - L + 1 + ELSE + MI = M + IC = 1 + JA = N - L + 1 + END IF +* + IF( NOTRAN ) THEN + TRANST = 'C' + ELSE + TRANST = 'N' + END IF +* + DO 10 I = I1, I2, I3 + IB = MIN( NB, K-I+1 ) +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL CLARZT( 'Backward', 'Rowwise', L, IB, A( I, JA ), LDA, + $ TAU( I ), T, LDT ) +* + IF( LEFT ) THEN +* +* H or H' is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H or H' is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H or H' +* + CALL CLARZB( SIDE, TRANST, 'Backward', 'Rowwise', MI, NI, + $ IB, L, A( I, JA ), LDA, T, LDT, C( IC, JC ), + $ LDC, WORK, LDWORK ) + 10 CONTINUE +* + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of CUNMRZ +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sbdsqr.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sbdsqr.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,742 @@ + SUBROUTINE SBDSQR( UPLO, N, NCVT, NRU, NCC, D, E, VT, LDVT, U, + $ LDU, C, LDC, WORK, INFO ) +* +* -- LAPACK routine (version 3.1.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* January 2007 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDC, LDU, LDVT, N, NCC, NCVT, NRU +* .. +* .. Array Arguments .. + REAL C( LDC, * ), D( * ), E( * ), U( LDU, * ), + $ VT( LDVT, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SBDSQR computes the singular values and, optionally, the right and/or +* left singular vectors from the singular value decomposition (SVD) of +* a real N-by-N (upper or lower) bidiagonal matrix B using the implicit +* zero-shift QR algorithm. The SVD of B has the form +* +* B = Q * S * P**T +* +* where S is the diagonal matrix of singular values, Q is an orthogonal +* matrix of left singular vectors, and P is an orthogonal matrix of +* right singular vectors. If left singular vectors are requested, this +* subroutine actually returns U*Q instead of Q, and, if right singular +* vectors are requested, this subroutine returns P**T*VT instead of +* P**T, for given real input matrices U and VT. When U and VT are the +* orthogonal matrices that reduce a general matrix A to bidiagonal +* form: A = U*B*VT, as computed by SGEBRD, then +* +* A = (U*Q) * S * (P**T*VT) +* +* is the SVD of A. Optionally, the subroutine may also compute Q**T*C +* for a given real input matrix C. +* +* See "Computing Small Singular Values of Bidiagonal Matrices With +* Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, +* LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11, +* no. 5, pp. 873-912, Sept 1990) and +* "Accurate singular values and differential qd algorithms," by +* B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics +* Department, University of California at Berkeley, July 1992 +* for a detailed description of the algorithm. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': B is upper bidiagonal; +* = 'L': B is lower bidiagonal. +* +* N (input) INTEGER +* The order of the matrix B. N >= 0. +* +* NCVT (input) INTEGER +* The number of columns of the matrix VT. NCVT >= 0. +* +* NRU (input) INTEGER +* The number of rows of the matrix U. NRU >= 0. +* +* NCC (input) INTEGER +* The number of columns of the matrix C. NCC >= 0. +* +* D (input/output) REAL array, dimension (N) +* On entry, the n diagonal elements of the bidiagonal matrix B. +* On exit, if INFO=0, the singular values of B in decreasing +* order. +* +* E (input/output) REAL array, dimension (N-1) +* On entry, the N-1 offdiagonal elements of the bidiagonal +* matrix B. +* On exit, if INFO = 0, E is destroyed; if INFO > 0, D and E +* will contain the diagonal and superdiagonal elements of a +* bidiagonal matrix orthogonally equivalent to the one given +* as input. +* +* VT (input/output) REAL array, dimension (LDVT, NCVT) +* On entry, an N-by-NCVT matrix VT. +* On exit, VT is overwritten by P**T * VT. +* Not referenced if NCVT = 0. +* +* LDVT (input) INTEGER +* The leading dimension of the array VT. +* LDVT >= max(1,N) if NCVT > 0; LDVT >= 1 if NCVT = 0. +* +* U (input/output) REAL array, dimension (LDU, N) +* On entry, an NRU-by-N matrix U. +* On exit, U is overwritten by U * Q. +* Not referenced if NRU = 0. +* +* LDU (input) INTEGER +* The leading dimension of the array U. LDU >= max(1,NRU). +* +* C (input/output) REAL array, dimension (LDC, NCC) +* On entry, an N-by-NCC matrix C. +* On exit, C is overwritten by Q**T * C. +* Not referenced if NCC = 0. +* +* LDC (input) INTEGER +* The leading dimension of the array C. +* LDC >= max(1,N) if NCC > 0; LDC >=1 if NCC = 0. +* +* WORK (workspace) REAL array, dimension (2*N) +* if NCVT = NRU = NCC = 0, (max(1, 4*N)) otherwise +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: If INFO = -i, the i-th argument had an illegal value +* > 0: the algorithm did not converge; D and E contain the +* elements of a bidiagonal matrix which is orthogonally +* similar to the input matrix B; if INFO = i, i +* elements of E have not converged to zero. +* +* Internal Parameters +* =================== +* +* TOLMUL REAL, default = max(10,min(100,EPS**(-1/8))) +* TOLMUL controls the convergence criterion of the QR loop. +* If it is positive, TOLMUL*EPS is the desired relative +* precision in the computed singular values. +* If it is negative, abs(TOLMUL*EPS*sigma_max) is the +* desired absolute accuracy in the computed singular +* values (corresponds to relative accuracy +* abs(TOLMUL*EPS) in the largest singular value. +* abs(TOLMUL) should be between 1 and 1/EPS, and preferably +* between 10 (for fast convergence) and .1/EPS +* (for there to be some accuracy in the results). +* Default is to lose at either one eighth or 2 of the +* available decimal digits in each computed singular value +* (whichever is smaller). +* +* MAXITR INTEGER, default = 6 +* MAXITR controls the maximum number of passes of the +* algorithm through its inner loop. The algorithms stops +* (and so fails to converge) if the number of passes +* through the inner loop exceeds MAXITR*N**2. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E0 ) + REAL ONE + PARAMETER ( ONE = 1.0E0 ) + REAL NEGONE + PARAMETER ( NEGONE = -1.0E0 ) + REAL HNDRTH + PARAMETER ( HNDRTH = 0.01E0 ) + REAL TEN + PARAMETER ( TEN = 10.0E0 ) + REAL HNDRD + PARAMETER ( HNDRD = 100.0E0 ) + REAL MEIGTH + PARAMETER ( MEIGTH = -0.125E0 ) + INTEGER MAXITR + PARAMETER ( MAXITR = 6 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, ROTATE + INTEGER I, IDIR, ISUB, ITER, J, LL, LLL, M, MAXIT, NM1, + $ NM12, NM13, OLDLL, OLDM + REAL ABSE, ABSS, COSL, COSR, CS, EPS, F, G, H, MU, + $ OLDCS, OLDSN, R, SHIFT, SIGMN, SIGMX, SINL, + $ SINR, SLL, SMAX, SMIN, SMINL, SMINOA, + $ SN, THRESH, TOL, TOLMUL, UNFL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SLARTG, SLAS2, SLASQ1, SLASR, SLASV2, SROT, + $ SSCAL, SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, REAL, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LOWER = LSAME( UPLO, 'L' ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LOWER ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NCVT.LT.0 ) THEN + INFO = -3 + ELSE IF( NRU.LT.0 ) THEN + INFO = -4 + ELSE IF( NCC.LT.0 ) THEN + INFO = -5 + ELSE IF( ( NCVT.EQ.0 .AND. LDVT.LT.1 ) .OR. + $ ( NCVT.GT.0 .AND. LDVT.LT.MAX( 1, N ) ) ) THEN + INFO = -9 + ELSE IF( LDU.LT.MAX( 1, NRU ) ) THEN + INFO = -11 + ELSE IF( ( NCC.EQ.0 .AND. LDC.LT.1 ) .OR. + $ ( NCC.GT.0 .AND. LDC.LT.MAX( 1, N ) ) ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SBDSQR', -INFO ) + RETURN + END IF + IF( N.EQ.0 ) + $ RETURN + IF( N.EQ.1 ) + $ GO TO 160 +* +* ROTATE is true if any singular vectors desired, false otherwise +* + ROTATE = ( NCVT.GT.0 ) .OR. ( NRU.GT.0 ) .OR. ( NCC.GT.0 ) +* +* If no singular vectors desired, use qd algorithm +* + IF( .NOT.ROTATE ) THEN + CALL SLASQ1( N, D, E, WORK, INFO ) + RETURN + END IF +* + NM1 = N - 1 + NM12 = NM1 + NM1 + NM13 = NM12 + NM1 + IDIR = 0 +* +* Get machine constants +* + EPS = SLAMCH( 'Epsilon' ) + UNFL = SLAMCH( 'Safe minimum' ) +* +* If matrix lower bidiagonal, rotate to be upper bidiagonal +* by applying Givens rotations on the left +* + IF( LOWER ) THEN + DO 10 I = 1, N - 1 + CALL SLARTG( D( I ), E( I ), CS, SN, R ) + D( I ) = R + E( I ) = SN*D( I+1 ) + D( I+1 ) = CS*D( I+1 ) + WORK( I ) = CS + WORK( NM1+I ) = SN + 10 CONTINUE +* +* Update singular vectors if desired +* + IF( NRU.GT.0 ) + $ CALL SLASR( 'R', 'V', 'F', NRU, N, WORK( 1 ), WORK( N ), U, + $ LDU ) + IF( NCC.GT.0 ) + $ CALL SLASR( 'L', 'V', 'F', N, NCC, WORK( 1 ), WORK( N ), C, + $ LDC ) + END IF +* +* Compute singular values to relative accuracy TOL +* (By setting TOL to be negative, algorithm will compute +* singular values to absolute accuracy ABS(TOL)*norm(input matrix)) +* + TOLMUL = MAX( TEN, MIN( HNDRD, EPS**MEIGTH ) ) + TOL = TOLMUL*EPS +* +* Compute approximate maximum, minimum singular values +* + SMAX = ZERO + DO 20 I = 1, N + SMAX = MAX( SMAX, ABS( D( I ) ) ) + 20 CONTINUE + DO 30 I = 1, N - 1 + SMAX = MAX( SMAX, ABS( E( I ) ) ) + 30 CONTINUE + SMINL = ZERO + IF( TOL.GE.ZERO ) THEN +* +* Relative accuracy desired +* + SMINOA = ABS( D( 1 ) ) + IF( SMINOA.EQ.ZERO ) + $ GO TO 50 + MU = SMINOA + DO 40 I = 2, N + MU = ABS( D( I ) )*( MU / ( MU+ABS( E( I-1 ) ) ) ) + SMINOA = MIN( SMINOA, MU ) + IF( SMINOA.EQ.ZERO ) + $ GO TO 50 + 40 CONTINUE + 50 CONTINUE + SMINOA = SMINOA / SQRT( REAL( N ) ) + THRESH = MAX( TOL*SMINOA, MAXITR*N*N*UNFL ) + ELSE +* +* Absolute accuracy desired +* + THRESH = MAX( ABS( TOL )*SMAX, MAXITR*N*N*UNFL ) + END IF +* +* Prepare for main iteration loop for the singular values +* (MAXIT is the maximum number of passes through the inner +* loop permitted before nonconvergence signalled.) +* + MAXIT = MAXITR*N*N + ITER = 0 + OLDLL = -1 + OLDM = -1 +* +* M points to last element of unconverged part of matrix +* + M = N +* +* Begin main iteration loop +* + 60 CONTINUE +* +* Check for convergence or exceeding iteration count +* + IF( M.LE.1 ) + $ GO TO 160 + IF( ITER.GT.MAXIT ) + $ GO TO 200 +* +* Find diagonal block of matrix to work on +* + IF( TOL.LT.ZERO .AND. ABS( D( M ) ).LE.THRESH ) + $ D( M ) = ZERO + SMAX = ABS( D( M ) ) + SMIN = SMAX + DO 70 LLL = 1, M - 1 + LL = M - LLL + ABSS = ABS( D( LL ) ) + ABSE = ABS( E( LL ) ) + IF( TOL.LT.ZERO .AND. ABSS.LE.THRESH ) + $ D( LL ) = ZERO + IF( ABSE.LE.THRESH ) + $ GO TO 80 + SMIN = MIN( SMIN, ABSS ) + SMAX = MAX( SMAX, ABSS, ABSE ) + 70 CONTINUE + LL = 0 + GO TO 90 + 80 CONTINUE + E( LL ) = ZERO +* +* Matrix splits since E(LL) = 0 +* + IF( LL.EQ.M-1 ) THEN +* +* Convergence of bottom singular value, return to top of loop +* + M = M - 1 + GO TO 60 + END IF + 90 CONTINUE + LL = LL + 1 +* +* E(LL) through E(M-1) are nonzero, E(LL-1) is zero +* + IF( LL.EQ.M-1 ) THEN +* +* 2 by 2 block, handle separately +* + CALL SLASV2( D( M-1 ), E( M-1 ), D( M ), SIGMN, SIGMX, SINR, + $ COSR, SINL, COSL ) + D( M-1 ) = SIGMX + E( M-1 ) = ZERO + D( M ) = SIGMN +* +* Compute singular vectors, if desired +* + IF( NCVT.GT.0 ) + $ CALL SROT( NCVT, VT( M-1, 1 ), LDVT, VT( M, 1 ), LDVT, COSR, + $ SINR ) + IF( NRU.GT.0 ) + $ CALL SROT( NRU, U( 1, M-1 ), 1, U( 1, M ), 1, COSL, SINL ) + IF( NCC.GT.0 ) + $ CALL SROT( NCC, C( M-1, 1 ), LDC, C( M, 1 ), LDC, COSL, + $ SINL ) + M = M - 2 + GO TO 60 + END IF +* +* If working on new submatrix, choose shift direction +* (from larger end diagonal element towards smaller) +* + IF( LL.GT.OLDM .OR. M.LT.OLDLL ) THEN + IF( ABS( D( LL ) ).GE.ABS( D( M ) ) ) THEN +* +* Chase bulge from top (big end) to bottom (small end) +* + IDIR = 1 + ELSE +* +* Chase bulge from bottom (big end) to top (small end) +* + IDIR = 2 + END IF + END IF +* +* Apply convergence tests +* + IF( IDIR.EQ.1 ) THEN +* +* Run convergence test in forward direction +* First apply standard test to bottom of matrix +* + IF( ABS( E( M-1 ) ).LE.ABS( TOL )*ABS( D( M ) ) .OR. + $ ( TOL.LT.ZERO .AND. ABS( E( M-1 ) ).LE.THRESH ) ) THEN + E( M-1 ) = ZERO + GO TO 60 + END IF +* + IF( TOL.GE.ZERO ) THEN +* +* If relative accuracy desired, +* apply convergence criterion forward +* + MU = ABS( D( LL ) ) + SMINL = MU + DO 100 LLL = LL, M - 1 + IF( ABS( E( LLL ) ).LE.TOL*MU ) THEN + E( LLL ) = ZERO + GO TO 60 + END IF + MU = ABS( D( LLL+1 ) )*( MU / ( MU+ABS( E( LLL ) ) ) ) + SMINL = MIN( SMINL, MU ) + 100 CONTINUE + END IF +* + ELSE +* +* Run convergence test in backward direction +* First apply standard test to top of matrix +* + IF( ABS( E( LL ) ).LE.ABS( TOL )*ABS( D( LL ) ) .OR. + $ ( TOL.LT.ZERO .AND. ABS( E( LL ) ).LE.THRESH ) ) THEN + E( LL ) = ZERO + GO TO 60 + END IF +* + IF( TOL.GE.ZERO ) THEN +* +* If relative accuracy desired, +* apply convergence criterion backward +* + MU = ABS( D( M ) ) + SMINL = MU + DO 110 LLL = M - 1, LL, -1 + IF( ABS( E( LLL ) ).LE.TOL*MU ) THEN + E( LLL ) = ZERO + GO TO 60 + END IF + MU = ABS( D( LLL ) )*( MU / ( MU+ABS( E( LLL ) ) ) ) + SMINL = MIN( SMINL, MU ) + 110 CONTINUE + END IF + END IF + OLDLL = LL + OLDM = M +* +* Compute shift. First, test if shifting would ruin relative +* accuracy, and if so set the shift to zero. +* + IF( TOL.GE.ZERO .AND. N*TOL*( SMINL / SMAX ).LE. + $ MAX( EPS, HNDRTH*TOL ) ) THEN +* +* Use a zero shift to avoid loss of relative accuracy +* + SHIFT = ZERO + ELSE +* +* Compute the shift from 2-by-2 block at end of matrix +* + IF( IDIR.EQ.1 ) THEN + SLL = ABS( D( LL ) ) + CALL SLAS2( D( M-1 ), E( M-1 ), D( M ), SHIFT, R ) + ELSE + SLL = ABS( D( M ) ) + CALL SLAS2( D( LL ), E( LL ), D( LL+1 ), SHIFT, R ) + END IF +* +* Test if shift negligible, and if so set to zero +* + IF( SLL.GT.ZERO ) THEN + IF( ( SHIFT / SLL )**2.LT.EPS ) + $ SHIFT = ZERO + END IF + END IF +* +* Increment iteration count +* + ITER = ITER + M - LL +* +* If SHIFT = 0, do simplified QR iteration +* + IF( SHIFT.EQ.ZERO ) THEN + IF( IDIR.EQ.1 ) THEN +* +* Chase bulge from top to bottom +* Save cosines and sines for later singular vector updates +* + CS = ONE + OLDCS = ONE + DO 120 I = LL, M - 1 + CALL SLARTG( D( I )*CS, E( I ), CS, SN, R ) + IF( I.GT.LL ) + $ E( I-1 ) = OLDSN*R + CALL SLARTG( OLDCS*R, D( I+1 )*SN, OLDCS, OLDSN, D( I ) ) + WORK( I-LL+1 ) = CS + WORK( I-LL+1+NM1 ) = SN + WORK( I-LL+1+NM12 ) = OLDCS + WORK( I-LL+1+NM13 ) = OLDSN + 120 CONTINUE + H = D( M )*CS + D( M ) = H*OLDCS + E( M-1 ) = H*OLDSN +* +* Update singular vectors +* + IF( NCVT.GT.0 ) + $ CALL SLASR( 'L', 'V', 'F', M-LL+1, NCVT, WORK( 1 ), + $ WORK( N ), VT( LL, 1 ), LDVT ) + IF( NRU.GT.0 ) + $ CALL SLASR( 'R', 'V', 'F', NRU, M-LL+1, WORK( NM12+1 ), + $ WORK( NM13+1 ), U( 1, LL ), LDU ) + IF( NCC.GT.0 ) + $ CALL SLASR( 'L', 'V', 'F', M-LL+1, NCC, WORK( NM12+1 ), + $ WORK( NM13+1 ), C( LL, 1 ), LDC ) +* +* Test convergence +* + IF( ABS( E( M-1 ) ).LE.THRESH ) + $ E( M-1 ) = ZERO +* + ELSE +* +* Chase bulge from bottom to top +* Save cosines and sines for later singular vector updates +* + CS = ONE + OLDCS = ONE + DO 130 I = M, LL + 1, -1 + CALL SLARTG( D( I )*CS, E( I-1 ), CS, SN, R ) + IF( I.LT.M ) + $ E( I ) = OLDSN*R + CALL SLARTG( OLDCS*R, D( I-1 )*SN, OLDCS, OLDSN, D( I ) ) + WORK( I-LL ) = CS + WORK( I-LL+NM1 ) = -SN + WORK( I-LL+NM12 ) = OLDCS + WORK( I-LL+NM13 ) = -OLDSN + 130 CONTINUE + H = D( LL )*CS + D( LL ) = H*OLDCS + E( LL ) = H*OLDSN +* +* Update singular vectors +* + IF( NCVT.GT.0 ) + $ CALL SLASR( 'L', 'V', 'B', M-LL+1, NCVT, WORK( NM12+1 ), + $ WORK( NM13+1 ), VT( LL, 1 ), LDVT ) + IF( NRU.GT.0 ) + $ CALL SLASR( 'R', 'V', 'B', NRU, M-LL+1, WORK( 1 ), + $ WORK( N ), U( 1, LL ), LDU ) + IF( NCC.GT.0 ) + $ CALL SLASR( 'L', 'V', 'B', M-LL+1, NCC, WORK( 1 ), + $ WORK( N ), C( LL, 1 ), LDC ) +* +* Test convergence +* + IF( ABS( E( LL ) ).LE.THRESH ) + $ E( LL ) = ZERO + END IF + ELSE +* +* Use nonzero shift +* + IF( IDIR.EQ.1 ) THEN +* +* Chase bulge from top to bottom +* Save cosines and sines for later singular vector updates +* + F = ( ABS( D( LL ) )-SHIFT )* + $ ( SIGN( ONE, D( LL ) )+SHIFT / D( LL ) ) + G = E( LL ) + DO 140 I = LL, M - 1 + CALL SLARTG( F, G, COSR, SINR, R ) + IF( I.GT.LL ) + $ E( I-1 ) = R + F = COSR*D( I ) + SINR*E( I ) + E( I ) = COSR*E( I ) - SINR*D( I ) + G = SINR*D( I+1 ) + D( I+1 ) = COSR*D( I+1 ) + CALL SLARTG( F, G, COSL, SINL, R ) + D( I ) = R + F = COSL*E( I ) + SINL*D( I+1 ) + D( I+1 ) = COSL*D( I+1 ) - SINL*E( I ) + IF( I.LT.M-1 ) THEN + G = SINL*E( I+1 ) + E( I+1 ) = COSL*E( I+1 ) + END IF + WORK( I-LL+1 ) = COSR + WORK( I-LL+1+NM1 ) = SINR + WORK( I-LL+1+NM12 ) = COSL + WORK( I-LL+1+NM13 ) = SINL + 140 CONTINUE + E( M-1 ) = F +* +* Update singular vectors +* + IF( NCVT.GT.0 ) + $ CALL SLASR( 'L', 'V', 'F', M-LL+1, NCVT, WORK( 1 ), + $ WORK( N ), VT( LL, 1 ), LDVT ) + IF( NRU.GT.0 ) + $ CALL SLASR( 'R', 'V', 'F', NRU, M-LL+1, WORK( NM12+1 ), + $ WORK( NM13+1 ), U( 1, LL ), LDU ) + IF( NCC.GT.0 ) + $ CALL SLASR( 'L', 'V', 'F', M-LL+1, NCC, WORK( NM12+1 ), + $ WORK( NM13+1 ), C( LL, 1 ), LDC ) +* +* Test convergence +* + IF( ABS( E( M-1 ) ).LE.THRESH ) + $ E( M-1 ) = ZERO +* + ELSE +* +* Chase bulge from bottom to top +* Save cosines and sines for later singular vector updates +* + F = ( ABS( D( M ) )-SHIFT )*( SIGN( ONE, D( M ) )+SHIFT / + $ D( M ) ) + G = E( M-1 ) + DO 150 I = M, LL + 1, -1 + CALL SLARTG( F, G, COSR, SINR, R ) + IF( I.LT.M ) + $ E( I ) = R + F = COSR*D( I ) + SINR*E( I-1 ) + E( I-1 ) = COSR*E( I-1 ) - SINR*D( I ) + G = SINR*D( I-1 ) + D( I-1 ) = COSR*D( I-1 ) + CALL SLARTG( F, G, COSL, SINL, R ) + D( I ) = R + F = COSL*E( I-1 ) + SINL*D( I-1 ) + D( I-1 ) = COSL*D( I-1 ) - SINL*E( I-1 ) + IF( I.GT.LL+1 ) THEN + G = SINL*E( I-2 ) + E( I-2 ) = COSL*E( I-2 ) + END IF + WORK( I-LL ) = COSR + WORK( I-LL+NM1 ) = -SINR + WORK( I-LL+NM12 ) = COSL + WORK( I-LL+NM13 ) = -SINL + 150 CONTINUE + E( LL ) = F +* +* Test convergence +* + IF( ABS( E( LL ) ).LE.THRESH ) + $ E( LL ) = ZERO +* +* Update singular vectors if desired +* + IF( NCVT.GT.0 ) + $ CALL SLASR( 'L', 'V', 'B', M-LL+1, NCVT, WORK( NM12+1 ), + $ WORK( NM13+1 ), VT( LL, 1 ), LDVT ) + IF( NRU.GT.0 ) + $ CALL SLASR( 'R', 'V', 'B', NRU, M-LL+1, WORK( 1 ), + $ WORK( N ), U( 1, LL ), LDU ) + IF( NCC.GT.0 ) + $ CALL SLASR( 'L', 'V', 'B', M-LL+1, NCC, WORK( 1 ), + $ WORK( N ), C( LL, 1 ), LDC ) + END IF + END IF +* +* QR iteration finished, go back and check convergence +* + GO TO 60 +* +* All singular values converged, so make them positive +* + 160 CONTINUE + DO 170 I = 1, N + IF( D( I ).LT.ZERO ) THEN + D( I ) = -D( I ) +* +* Change sign of singular vectors, if desired +* + IF( NCVT.GT.0 ) + $ CALL SSCAL( NCVT, NEGONE, VT( I, 1 ), LDVT ) + END IF + 170 CONTINUE +* +* Sort the singular values into decreasing order (insertion sort on +* singular values, but only one transposition per singular vector) +* + DO 190 I = 1, N - 1 +* +* Scan for smallest D(I) +* + ISUB = 1 + SMIN = D( 1 ) + DO 180 J = 2, N + 1 - I + IF( D( J ).LE.SMIN ) THEN + ISUB = J + SMIN = D( J ) + END IF + 180 CONTINUE + IF( ISUB.NE.N+1-I ) THEN +* +* Swap singular values and vectors +* + D( ISUB ) = D( N+1-I ) + D( N+1-I ) = SMIN + IF( NCVT.GT.0 ) + $ CALL SSWAP( NCVT, VT( ISUB, 1 ), LDVT, VT( N+1-I, 1 ), + $ LDVT ) + IF( NRU.GT.0 ) + $ CALL SSWAP( NRU, U( 1, ISUB ), 1, U( 1, N+1-I ), 1 ) + IF( NCC.GT.0 ) + $ CALL SSWAP( NCC, C( ISUB, 1 ), LDC, C( N+1-I, 1 ), LDC ) + END IF + 190 CONTINUE + GO TO 220 +* +* Maximum number of iterations exceeded, failure to converge +* + 200 CONTINUE + INFO = 0 + DO 210 I = 1, N - 1 + IF( E( I ).NE.ZERO ) + $ INFO = INFO + 1 + 210 CONTINUE + 220 CONTINUE + RETURN +* +* End of SBDSQR +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/scsum1.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/scsum1.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,81 @@ + REAL FUNCTION SCSUM1( N, CX, INCX ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INCX, N +* .. +* .. Array Arguments .. + COMPLEX CX( * ) +* .. +* +* Purpose +* ======= +* +* SCSUM1 takes the sum of the absolute values of a complex +* vector and returns a single precision result. +* +* Based on SCASUM from the Level 1 BLAS. +* The change is to use the 'genuine' absolute value. +* +* Contributed by Nick Higham for use with CLACON. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of elements in the vector CX. +* +* CX (input) COMPLEX array, dimension (N) +* The vector whose elements will be summed. +* +* INCX (input) INTEGER +* The spacing between successive values of CX. INCX > 0. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, NINCX + REAL STEMP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* + SCSUM1 = 0.0E0 + STEMP = 0.0E0 + IF( N.LE.0 ) + $ RETURN + IF( INCX.EQ.1 ) + $ GO TO 20 +* +* CODE FOR INCREMENT NOT EQUAL TO 1 +* + NINCX = N*INCX + DO 10 I = 1, NINCX, INCX +* +* NEXT LINE MODIFIED. +* + STEMP = STEMP + ABS( CX( I ) ) + 10 CONTINUE + SCSUM1 = STEMP + RETURN +* +* CODE FOR INCREMENT EQUAL TO 1 +* + 20 CONTINUE + DO 30 I = 1, N +* +* NEXT LINE MODIFIED. +* + STEMP = STEMP + ABS( CX( I ) ) + 30 CONTINUE + SCSUM1 = STEMP + RETURN +* +* End of SCSUM1 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sgbcon.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sgbcon.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,226 @@ + SUBROUTINE SGBCON( NORM, N, KL, KU, AB, LDAB, IPIV, ANORM, RCOND, + $ WORK, IWORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* Modified to call SLACN2 in place of SLACON, 7 Feb 03, SJH. +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER INFO, KL, KU, LDAB, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + REAL AB( LDAB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SGBCON estimates the reciprocal of the condition number of a real +* general band matrix A, in either the 1-norm or the infinity-norm, +* using the LU factorization computed by SGBTRF. +* +* An estimate is obtained for norm(inv(A)), and the reciprocal of the +* condition number is computed as +* RCOND = 1 / ( norm(A) * norm(inv(A)) ). +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies whether the 1-norm condition number or the +* infinity-norm condition number is required: +* = '1' or 'O': 1-norm; +* = 'I': Infinity-norm. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KL (input) INTEGER +* The number of subdiagonals within the band of A. KL >= 0. +* +* KU (input) INTEGER +* The number of superdiagonals within the band of A. KU >= 0. +* +* AB (input) REAL array, dimension (LDAB,N) +* Details of the LU factorization of the band matrix A, as +* computed by SGBTRF. U is stored as an upper triangular band +* matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and +* the multipliers used during the factorization are stored in +* rows KL+KU+2 to 2*KL+KU+1. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= 2*KL+KU+1. +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices; for 1 <= i <= N, row i of the matrix was +* interchanged with row IPIV(i). +* +* ANORM (input) REAL +* If NORM = '1' or 'O', the 1-norm of the original matrix A. +* If NORM = 'I', the infinity-norm of the original matrix A. +* +* RCOND (output) REAL +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(norm(A) * norm(inv(A))). +* +* WORK (workspace) REAL array, dimension (3*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LNOTI, ONENRM + CHARACTER NORMIN + INTEGER IX, J, JP, KASE, KASE1, KD, LM + REAL AINVNM, SCALE, SMLNUM, T +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX + REAL SDOT, SLAMCH + EXTERNAL LSAME, ISAMAX, SDOT, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SLACN2, SLATBS, SRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.2*KL+KU+1 ) THEN + INFO = -6 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGBCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* + SMLNUM = SLAMCH( 'Safe minimum' ) +* +* Estimate the norm of inv(A). +* + AINVNM = ZERO + NORMIN = 'N' + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KD = KL + KU + 1 + LNOTI = KL.GT.0 + KASE = 0 + 10 CONTINUE + CALL SLACN2( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(L). +* + IF( LNOTI ) THEN + DO 20 J = 1, N - 1 + LM = MIN( KL, N-J ) + JP = IPIV( J ) + T = WORK( JP ) + IF( JP.NE.J ) THEN + WORK( JP ) = WORK( J ) + WORK( J ) = T + END IF + CALL SAXPY( LM, -T, AB( KD+1, J ), 1, WORK( J+1 ), 1 ) + 20 CONTINUE + END IF +* +* Multiply by inv(U). +* + CALL SLATBS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N, + $ KL+KU, AB, LDAB, WORK, SCALE, WORK( 2*N+1 ), + $ INFO ) + ELSE +* +* Multiply by inv(U'). +* + CALL SLATBS( 'Upper', 'Transpose', 'Non-unit', NORMIN, N, + $ KL+KU, AB, LDAB, WORK, SCALE, WORK( 2*N+1 ), + $ INFO ) +* +* Multiply by inv(L'). +* + IF( LNOTI ) THEN + DO 30 J = N - 1, 1, -1 + LM = MIN( KL, N-J ) + WORK( J ) = WORK( J ) - SDOT( LM, AB( KD+1, J ), 1, + $ WORK( J+1 ), 1 ) + JP = IPIV( J ) + IF( JP.NE.J ) THEN + T = WORK( JP ) + WORK( JP ) = WORK( J ) + WORK( J ) = T + END IF + 30 CONTINUE + END IF + END IF +* +* Divide X by 1/SCALE if doing so will not cause overflow. +* + NORMIN = 'Y' + IF( SCALE.NE.ONE ) THEN + IX = ISAMAX( N, WORK, 1 ) + IF( SCALE.LT.ABS( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 40 + CALL SRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + 40 CONTINUE + RETURN +* +* End of SGBCON +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sgbtf2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sgbtf2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,202 @@ + SUBROUTINE SGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, KL, KU, LDAB, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL AB( LDAB, * ) +* .. +* +* Purpose +* ======= +* +* SGBTF2 computes an LU factorization of a real m-by-n band matrix A +* using partial pivoting with row interchanges. +* +* This is the unblocked version of the algorithm, calling Level 2 BLAS. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* KL (input) INTEGER +* The number of subdiagonals within the band of A. KL >= 0. +* +* KU (input) INTEGER +* The number of superdiagonals within the band of A. KU >= 0. +* +* AB (input/output) REAL array, dimension (LDAB,N) +* On entry, the matrix A in band storage, in rows KL+1 to +* 2*KL+KU+1; rows 1 to KL of the array need not be set. +* The j-th column of A is stored in the j-th column of the +* array AB as follows: +* AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) +* +* On exit, details of the factorization: U is stored as an +* upper triangular band matrix with KL+KU superdiagonals in +* rows 1 to KL+KU+1, and the multipliers used during the +* factorization are stored in rows KL+KU+2 to 2*KL+KU+1. +* See below for further details. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= 2*KL+KU+1. +* +* IPIV (output) INTEGER array, dimension (min(M,N)) +* The pivot indices; for 1 <= i <= min(M,N), row i of the +* matrix was interchanged with row IPIV(i). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = +i, U(i,i) is exactly zero. The factorization +* has been completed, but the factor U is exactly +* singular, and division by zero will occur if it is used +* to solve a system of equations. +* +* Further Details +* =============== +* +* The band storage scheme is illustrated by the following example, when +* M = N = 6, KL = 2, KU = 1: +* +* On entry: On exit: +* +* * * * + + + * * * u14 u25 u36 +* * * + + + + * * u13 u24 u35 u46 +* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +* a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * +* a31 a42 a53 a64 * * m31 m42 m53 m64 * * +* +* Array elements marked * are not used by the routine; elements marked +* + need not be set on entry, but are required by the routine to store +* elements of U, because of fill-in resulting from the row +* interchanges. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, JP, JU, KM, KV +* .. +* .. External Functions .. + INTEGER ISAMAX + EXTERNAL ISAMAX +* .. +* .. External Subroutines .. + EXTERNAL SGER, SSCAL, SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* KV is the number of superdiagonals in the factor U, allowing for +* fill-in. +* + KV = KU + KL +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KL+KV+1 ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGBTF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Gaussian elimination with partial pivoting +* +* Set fill-in elements in columns KU+2 to KV to zero. +* + DO 20 J = KU + 2, MIN( KV, N ) + DO 10 I = KV - J + 2, KL + AB( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE +* +* JU is the index of the last column affected by the current stage +* of the factorization. +* + JU = 1 +* + DO 40 J = 1, MIN( M, N ) +* +* Set fill-in elements in column J+KV to zero. +* + IF( J+KV.LE.N ) THEN + DO 30 I = 1, KL + AB( I, J+KV ) = ZERO + 30 CONTINUE + END IF +* +* Find pivot and test for singularity. KM is the number of +* subdiagonal elements in the current column. +* + KM = MIN( KL, M-J ) + JP = ISAMAX( KM+1, AB( KV+1, J ), 1 ) + IPIV( J ) = JP + J - 1 + IF( AB( KV+JP, J ).NE.ZERO ) THEN + JU = MAX( JU, MIN( J+KU+JP-1, N ) ) +* +* Apply interchange to columns J to JU. +* + IF( JP.NE.1 ) + $ CALL SSWAP( JU-J+1, AB( KV+JP, J ), LDAB-1, + $ AB( KV+1, J ), LDAB-1 ) +* + IF( KM.GT.0 ) THEN +* +* Compute multipliers. +* + CALL SSCAL( KM, ONE / AB( KV+1, J ), AB( KV+2, J ), 1 ) +* +* Update trailing submatrix within the band. +* + IF( JU.GT.J ) + $ CALL SGER( KM, JU-J, -ONE, AB( KV+2, J ), 1, + $ AB( KV, J+1 ), LDAB-1, AB( KV+1, J+1 ), + $ LDAB-1 ) + END IF + ELSE +* +* If pivot is zero, set INFO to the index of the pivot +* unless a zero pivot has already been found. +* + IF( INFO.EQ.0 ) + $ INFO = J + END IF + 40 CONTINUE + RETURN +* +* End of SGBTF2 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sgbtrf.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sgbtrf.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,441 @@ + SUBROUTINE SGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, KL, KU, LDAB, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL AB( LDAB, * ) +* .. +* +* Purpose +* ======= +* +* SGBTRF computes an LU factorization of a real m-by-n band matrix A +* using partial pivoting with row interchanges. +* +* This is the blocked version of the algorithm, calling Level 3 BLAS. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* KL (input) INTEGER +* The number of subdiagonals within the band of A. KL >= 0. +* +* KU (input) INTEGER +* The number of superdiagonals within the band of A. KU >= 0. +* +* AB (input/output) REAL array, dimension (LDAB,N) +* On entry, the matrix A in band storage, in rows KL+1 to +* 2*KL+KU+1; rows 1 to KL of the array need not be set. +* The j-th column of A is stored in the j-th column of the +* array AB as follows: +* AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) +* +* On exit, details of the factorization: U is stored as an +* upper triangular band matrix with KL+KU superdiagonals in +* rows 1 to KL+KU+1, and the multipliers used during the +* factorization are stored in rows KL+KU+2 to 2*KL+KU+1. +* See below for further details. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= 2*KL+KU+1. +* +* IPIV (output) INTEGER array, dimension (min(M,N)) +* The pivot indices; for 1 <= i <= min(M,N), row i of the +* matrix was interchanged with row IPIV(i). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = +i, U(i,i) is exactly zero. The factorization +* has been completed, but the factor U is exactly +* singular, and division by zero will occur if it is used +* to solve a system of equations. +* +* Further Details +* =============== +* +* The band storage scheme is illustrated by the following example, when +* M = N = 6, KL = 2, KU = 1: +* +* On entry: On exit: +* +* * * * + + + * * * u14 u25 u36 +* * * + + + + * * u13 u24 u35 u46 +* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +* a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * +* a31 a42 a53 a64 * * m31 m42 m53 m64 * * +* +* Array elements marked * are not used by the routine; elements marked +* + need not be set on entry, but are required by the routine to store +* elements of U because of fill-in resulting from the row interchanges. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) + INTEGER NBMAX, LDWORK + PARAMETER ( NBMAX = 64, LDWORK = NBMAX+1 ) +* .. +* .. Local Scalars .. + INTEGER I, I2, I3, II, IP, J, J2, J3, JB, JJ, JM, JP, + $ JU, K2, KM, KV, NB, NW + REAL TEMP +* .. +* .. Local Arrays .. + REAL WORK13( LDWORK, NBMAX ), + $ WORK31( LDWORK, NBMAX ) +* .. +* .. External Functions .. + INTEGER ILAENV, ISAMAX + EXTERNAL ILAENV, ISAMAX +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGBTF2, SGEMM, SGER, SLASWP, SSCAL, + $ SSWAP, STRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* KV is the number of superdiagonals in the factor U, allowing for +* fill-in +* + KV = KU + KL +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KL+KV+1 ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGBTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment +* + NB = ILAENV( 1, 'SGBTRF', ' ', M, N, KL, KU ) +* +* The block size must not exceed the limit set by the size of the +* local arrays WORK13 and WORK31. +* + NB = MIN( NB, NBMAX ) +* + IF( NB.LE.1 .OR. NB.GT.KL ) THEN +* +* Use unblocked code +* + CALL SGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO ) + ELSE +* +* Use blocked code +* +* Zero the superdiagonal elements of the work array WORK13 +* + DO 20 J = 1, NB + DO 10 I = 1, J - 1 + WORK13( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE +* +* Zero the subdiagonal elements of the work array WORK31 +* + DO 40 J = 1, NB + DO 30 I = J + 1, NB + WORK31( I, J ) = ZERO + 30 CONTINUE + 40 CONTINUE +* +* Gaussian elimination with partial pivoting +* +* Set fill-in elements in columns KU+2 to KV to zero +* + DO 60 J = KU + 2, MIN( KV, N ) + DO 50 I = KV - J + 2, KL + AB( I, J ) = ZERO + 50 CONTINUE + 60 CONTINUE +* +* JU is the index of the last column affected by the current +* stage of the factorization +* + JU = 1 +* + DO 180 J = 1, MIN( M, N ), NB + JB = MIN( NB, MIN( M, N )-J+1 ) +* +* The active part of the matrix is partitioned +* +* A11 A12 A13 +* A21 A22 A23 +* A31 A32 A33 +* +* Here A11, A21 and A31 denote the current block of JB columns +* which is about to be factorized. The number of rows in the +* partitioning are JB, I2, I3 respectively, and the numbers +* of columns are JB, J2, J3. The superdiagonal elements of A13 +* and the subdiagonal elements of A31 lie outside the band. +* + I2 = MIN( KL-JB, M-J-JB+1 ) + I3 = MIN( JB, M-J-KL+1 ) +* +* J2 and J3 are computed after JU has been updated. +* +* Factorize the current block of JB columns +* + DO 80 JJ = J, J + JB - 1 +* +* Set fill-in elements in column JJ+KV to zero +* + IF( JJ+KV.LE.N ) THEN + DO 70 I = 1, KL + AB( I, JJ+KV ) = ZERO + 70 CONTINUE + END IF +* +* Find pivot and test for singularity. KM is the number of +* subdiagonal elements in the current column. +* + KM = MIN( KL, M-JJ ) + JP = ISAMAX( KM+1, AB( KV+1, JJ ), 1 ) + IPIV( JJ ) = JP + JJ - J + IF( AB( KV+JP, JJ ).NE.ZERO ) THEN + JU = MAX( JU, MIN( JJ+KU+JP-1, N ) ) + IF( JP.NE.1 ) THEN +* +* Apply interchange to columns J to J+JB-1 +* + IF( JP+JJ-1.LT.J+KL ) THEN +* + CALL SSWAP( JB, AB( KV+1+JJ-J, J ), LDAB-1, + $ AB( KV+JP+JJ-J, J ), LDAB-1 ) + ELSE +* +* The interchange affects columns J to JJ-1 of A31 +* which are stored in the work array WORK31 +* + CALL SSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1, + $ WORK31( JP+JJ-J-KL, 1 ), LDWORK ) + CALL SSWAP( J+JB-JJ, AB( KV+1, JJ ), LDAB-1, + $ AB( KV+JP, JJ ), LDAB-1 ) + END IF + END IF +* +* Compute multipliers +* + CALL SSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ), + $ 1 ) +* +* Update trailing submatrix within the band and within +* the current block. JM is the index of the last column +* which needs to be updated. +* + JM = MIN( JU, J+JB-1 ) + IF( JM.GT.JJ ) + $ CALL SGER( KM, JM-JJ, -ONE, AB( KV+2, JJ ), 1, + $ AB( KV, JJ+1 ), LDAB-1, + $ AB( KV+1, JJ+1 ), LDAB-1 ) + ELSE +* +* If pivot is zero, set INFO to the index of the pivot +* unless a zero pivot has already been found. +* + IF( INFO.EQ.0 ) + $ INFO = JJ + END IF +* +* Copy current column of A31 into the work array WORK31 +* + NW = MIN( JJ-J+1, I3 ) + IF( NW.GT.0 ) + $ CALL SCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1, + $ WORK31( 1, JJ-J+1 ), 1 ) + 80 CONTINUE + IF( J+JB.LE.N ) THEN +* +* Apply the row interchanges to the other blocks. +* + J2 = MIN( JU-J+1, KV ) - JB + J3 = MAX( 0, JU-J-KV+1 ) +* +* Use SLASWP to apply the row interchanges to A12, A22, and +* A32. +* + CALL SLASWP( J2, AB( KV+1-JB, J+JB ), LDAB-1, 1, JB, + $ IPIV( J ), 1 ) +* +* Adjust the pivot indices. +* + DO 90 I = J, J + JB - 1 + IPIV( I ) = IPIV( I ) + J - 1 + 90 CONTINUE +* +* Apply the row interchanges to A13, A23, and A33 +* columnwise. +* + K2 = J - 1 + JB + J2 + DO 110 I = 1, J3 + JJ = K2 + I + DO 100 II = J + I - 1, J + JB - 1 + IP = IPIV( II ) + IF( IP.NE.II ) THEN + TEMP = AB( KV+1+II-JJ, JJ ) + AB( KV+1+II-JJ, JJ ) = AB( KV+1+IP-JJ, JJ ) + AB( KV+1+IP-JJ, JJ ) = TEMP + END IF + 100 CONTINUE + 110 CONTINUE +* +* Update the relevant part of the trailing submatrix +* + IF( J2.GT.0 ) THEN +* +* Update A12 +* + CALL STRSM( 'Left', 'Lower', 'No transpose', 'Unit', + $ JB, J2, ONE, AB( KV+1, J ), LDAB-1, + $ AB( KV+1-JB, J+JB ), LDAB-1 ) +* + IF( I2.GT.0 ) THEN +* +* Update A22 +* + CALL SGEMM( 'No transpose', 'No transpose', I2, J2, + $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1, + $ AB( KV+1-JB, J+JB ), LDAB-1, ONE, + $ AB( KV+1, J+JB ), LDAB-1 ) + END IF +* + IF( I3.GT.0 ) THEN +* +* Update A32 +* + CALL SGEMM( 'No transpose', 'No transpose', I3, J2, + $ JB, -ONE, WORK31, LDWORK, + $ AB( KV+1-JB, J+JB ), LDAB-1, ONE, + $ AB( KV+KL+1-JB, J+JB ), LDAB-1 ) + END IF + END IF +* + IF( J3.GT.0 ) THEN +* +* Copy the lower triangle of A13 into the work array +* WORK13 +* + DO 130 JJ = 1, J3 + DO 120 II = JJ, JB + WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 ) + 120 CONTINUE + 130 CONTINUE +* +* Update A13 in the work array +* + CALL STRSM( 'Left', 'Lower', 'No transpose', 'Unit', + $ JB, J3, ONE, AB( KV+1, J ), LDAB-1, + $ WORK13, LDWORK ) +* + IF( I2.GT.0 ) THEN +* +* Update A23 +* + CALL SGEMM( 'No transpose', 'No transpose', I2, J3, + $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1, + $ WORK13, LDWORK, ONE, AB( 1+JB, J+KV ), + $ LDAB-1 ) + END IF +* + IF( I3.GT.0 ) THEN +* +* Update A33 +* + CALL SGEMM( 'No transpose', 'No transpose', I3, J3, + $ JB, -ONE, WORK31, LDWORK, WORK13, + $ LDWORK, ONE, AB( 1+KL, J+KV ), LDAB-1 ) + END IF +* +* Copy the lower triangle of A13 back into place +* + DO 150 JJ = 1, J3 + DO 140 II = JJ, JB + AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ ) + 140 CONTINUE + 150 CONTINUE + END IF + ELSE +* +* Adjust the pivot indices. +* + DO 160 I = J, J + JB - 1 + IPIV( I ) = IPIV( I ) + J - 1 + 160 CONTINUE + END IF +* +* Partially undo the interchanges in the current block to +* restore the upper triangular form of A31 and copy the upper +* triangle of A31 back into place +* + DO 170 JJ = J + JB - 1, J, -1 + JP = IPIV( JJ ) - JJ + 1 + IF( JP.NE.1 ) THEN +* +* Apply interchange to columns J to JJ-1 +* + IF( JP+JJ-1.LT.J+KL ) THEN +* +* The interchange does not affect A31 +* + CALL SSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1, + $ AB( KV+JP+JJ-J, J ), LDAB-1 ) + ELSE +* +* The interchange does affect A31 +* + CALL SSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1, + $ WORK31( JP+JJ-J-KL, 1 ), LDWORK ) + END IF + END IF +* +* Copy the current column of A31 back into place +* + NW = MIN( I3, JJ-J+1 ) + IF( NW.GT.0 ) + $ CALL SCOPY( NW, WORK31( 1, JJ-J+1 ), 1, + $ AB( KV+KL+1-JJ+J, JJ ), 1 ) + 170 CONTINUE + 180 CONTINUE + END IF +* + RETURN +* +* End of SGBTRF +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sgbtrs.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sgbtrs.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,186 @@ + SUBROUTINE SGBTRS( TRANS, N, KL, KU, NRHS, AB, LDAB, IPIV, B, LDB, + $ INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, KL, KU, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL AB( LDAB, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* SGBTRS solves a system of linear equations +* A * X = B or A' * X = B +* with a general band matrix A using the LU factorization computed +* by SGBTRF. +* +* Arguments +* ========= +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations. +* = 'N': A * X = B (No transpose) +* = 'T': A'* X = B (Transpose) +* = 'C': A'* X = B (Conjugate transpose = Transpose) +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KL (input) INTEGER +* The number of subdiagonals within the band of A. KL >= 0. +* +* KU (input) INTEGER +* The number of superdiagonals within the band of A. KU >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* AB (input) REAL array, dimension (LDAB,N) +* Details of the LU factorization of the band matrix A, as +* computed by SGBTRF. U is stored as an upper triangular band +* matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and +* the multipliers used during the factorization are stored in +* rows KL+KU+2 to 2*KL+KU+1. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= 2*KL+KU+1. +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices; for 1 <= i <= N, row i of the matrix was +* interchanged with row IPIV(i). +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LNOTI, NOTRAN + INTEGER I, J, KD, L, LM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SGEMV, SGER, SSWAP, STBSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.( 2*KL+KU+1 ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGBTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + KD = KU + KL + 1 + LNOTI = KL.GT.0 +* + IF( NOTRAN ) THEN +* +* Solve A*X = B. +* +* Solve L*X = B, overwriting B with X. +* +* L is represented as a product of permutations and unit lower +* triangular matrices L = P(1) * L(1) * ... * P(n-1) * L(n-1), +* where each transformation L(i) is a rank-one modification of +* the identity matrix. +* + IF( LNOTI ) THEN + DO 10 J = 1, N - 1 + LM = MIN( KL, N-J ) + L = IPIV( J ) + IF( L.NE.J ) + $ CALL SSWAP( NRHS, B( L, 1 ), LDB, B( J, 1 ), LDB ) + CALL SGER( LM, NRHS, -ONE, AB( KD+1, J ), 1, B( J, 1 ), + $ LDB, B( J+1, 1 ), LDB ) + 10 CONTINUE + END IF +* + DO 20 I = 1, NRHS +* +* Solve U*X = B, overwriting B with X. +* + CALL STBSV( 'Upper', 'No transpose', 'Non-unit', N, KL+KU, + $ AB, LDAB, B( 1, I ), 1 ) + 20 CONTINUE +* + ELSE +* +* Solve A'*X = B. +* + DO 30 I = 1, NRHS +* +* Solve U'*X = B, overwriting B with X. +* + CALL STBSV( 'Upper', 'Transpose', 'Non-unit', N, KL+KU, AB, + $ LDAB, B( 1, I ), 1 ) + 30 CONTINUE +* +* Solve L'*X = B, overwriting B with X. +* + IF( LNOTI ) THEN + DO 40 J = N - 1, 1, -1 + LM = MIN( KL, N-J ) + CALL SGEMV( 'Transpose', LM, NRHS, -ONE, B( J+1, 1 ), + $ LDB, AB( KD+1, J ), 1, ONE, B( J, 1 ), LDB ) + L = IPIV( J ) + IF( L.NE.J ) + $ CALL SSWAP( NRHS, B( L, 1 ), LDB, B( J, 1 ), LDB ) + 40 CONTINUE + END IF + END IF + RETURN +* +* End of SGBTRS +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sgebak.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sgebak.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,188 @@ + SUBROUTINE SGEBAK( JOB, SIDE, N, ILO, IHI, SCALE, M, V, LDV, + $ INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER JOB, SIDE + INTEGER IHI, ILO, INFO, LDV, M, N +* .. +* .. Array Arguments .. + REAL V( LDV, * ), SCALE( * ) +* .. +* +* Purpose +* ======= +* +* SGEBAK forms the right or left eigenvectors of a real general matrix +* by backward transformation on the computed eigenvectors of the +* balanced matrix output by SGEBAL. +* +* Arguments +* ========= +* +* JOB (input) CHARACTER*1 +* Specifies the type of backward transformation required: +* = 'N', do nothing, return immediately; +* = 'P', do backward transformation for permutation only; +* = 'S', do backward transformation for scaling only; +* = 'B', do backward transformations for both permutation and +* scaling. +* JOB must be the same as the argument JOB supplied to SGEBAL. +* +* SIDE (input) CHARACTER*1 +* = 'R': V contains right eigenvectors; +* = 'L': V contains left eigenvectors. +* +* N (input) INTEGER +* The number of rows of the matrix V. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* The integers ILO and IHI determined by SGEBAL. +* 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +* +* SCALE (input) REAL array, dimension (N) +* Details of the permutation and scaling factors, as returned +* by SGEBAL. +* +* M (input) INTEGER +* The number of columns of the matrix V. M >= 0. +* +* V (input/output) REAL array, dimension (LDV,M) +* On entry, the matrix of right or left eigenvectors to be +* transformed, as returned by SHSEIN or STREVC. +* On exit, V is overwritten by the transformed eigenvectors. +* +* LDV (input) INTEGER +* The leading dimension of the array V. LDV >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LEFTV, RIGHTV + INTEGER I, II, K + REAL S +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SSCAL, SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Decode and Test the input parameters +* + RIGHTV = LSAME( SIDE, 'R' ) + LEFTV = LSAME( SIDE, 'L' ) +* + INFO = 0 + IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND. + $ .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN + INFO = -1 + ELSE IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN + INFO = -5 + ELSE IF( M.LT.0 ) THEN + INFO = -7 + ELSE IF( LDV.LT.MAX( 1, N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEBAK', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN + IF( M.EQ.0 ) + $ RETURN + IF( LSAME( JOB, 'N' ) ) + $ RETURN +* + IF( ILO.EQ.IHI ) + $ GO TO 30 +* +* Backward balance +* + IF( LSAME( JOB, 'S' ) .OR. LSAME( JOB, 'B' ) ) THEN +* + IF( RIGHTV ) THEN + DO 10 I = ILO, IHI + S = SCALE( I ) + CALL SSCAL( M, S, V( I, 1 ), LDV ) + 10 CONTINUE + END IF +* + IF( LEFTV ) THEN + DO 20 I = ILO, IHI + S = ONE / SCALE( I ) + CALL SSCAL( M, S, V( I, 1 ), LDV ) + 20 CONTINUE + END IF +* + END IF +* +* Backward permutation +* +* For I = ILO-1 step -1 until 1, +* IHI+1 step 1 until N do -- +* + 30 CONTINUE + IF( LSAME( JOB, 'P' ) .OR. LSAME( JOB, 'B' ) ) THEN + IF( RIGHTV ) THEN + DO 40 II = 1, N + I = II + IF( I.GE.ILO .AND. I.LE.IHI ) + $ GO TO 40 + IF( I.LT.ILO ) + $ I = ILO - II + K = SCALE( I ) + IF( K.EQ.I ) + $ GO TO 40 + CALL SSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 40 CONTINUE + END IF +* + IF( LEFTV ) THEN + DO 50 II = 1, N + I = II + IF( I.GE.ILO .AND. I.LE.IHI ) + $ GO TO 50 + IF( I.LT.ILO ) + $ I = ILO - II + K = SCALE( I ) + IF( K.EQ.I ) + $ GO TO 50 + CALL SSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 50 CONTINUE + END IF + END IF +* + RETURN +* +* End of SGEBAK +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sgebal.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sgebal.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,322 @@ + SUBROUTINE SGEBAL( JOB, N, A, LDA, ILO, IHI, SCALE, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER JOB + INTEGER IHI, ILO, INFO, LDA, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), SCALE( * ) +* .. +* +* Purpose +* ======= +* +* SGEBAL balances a general real matrix A. This involves, first, +* permuting A by a similarity transformation to isolate eigenvalues +* in the first 1 to ILO-1 and last IHI+1 to N elements on the +* diagonal; and second, applying a diagonal similarity transformation +* to rows and columns ILO to IHI to make the rows and columns as +* close in norm as possible. Both steps are optional. +* +* Balancing may reduce the 1-norm of the matrix, and improve the +* accuracy of the computed eigenvalues and/or eigenvectors. +* +* Arguments +* ========= +* +* JOB (input) CHARACTER*1 +* Specifies the operations to be performed on A: +* = 'N': none: simply set ILO = 1, IHI = N, SCALE(I) = 1.0 +* for i = 1,...,N; +* = 'P': permute only; +* = 'S': scale only; +* = 'B': both permute and scale. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the input matrix A. +* On exit, A is overwritten by the balanced matrix. +* If JOB = 'N', A is not referenced. +* See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* ILO (output) INTEGER +* IHI (output) INTEGER +* ILO and IHI are set to integers such that on exit +* A(i,j) = 0 if i > j and j = 1,...,ILO-1 or I = IHI+1,...,N. +* If JOB = 'N' or 'S', ILO = 1 and IHI = N. +* +* SCALE (output) REAL array, dimension (N) +* Details of the permutations and scaling factors applied to +* A. If P(j) is the index of the row and column interchanged +* with row and column j and D(j) is the scaling factor +* applied to row and column j, then +* SCALE(j) = P(j) for j = 1,...,ILO-1 +* = D(j) for j = ILO,...,IHI +* = P(j) for j = IHI+1,...,N. +* The order in which the interchanges are made is N to IHI+1, +* then 1 to ILO-1. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* The permutations consist of row and column interchanges which put +* the matrix in the form +* +* ( T1 X Y ) +* P A P = ( 0 B Z ) +* ( 0 0 T2 ) +* +* where T1 and T2 are upper triangular matrices whose eigenvalues lie +* along the diagonal. The column indices ILO and IHI mark the starting +* and ending columns of the submatrix B. Balancing consists of applying +* a diagonal similarity transformation inv(D) * B * D to make the +* 1-norms of each row of B and its corresponding column nearly equal. +* The output matrix is +* +* ( T1 X*D Y ) +* ( 0 inv(D)*B*D inv(D)*Z ). +* ( 0 0 T2 ) +* +* Information about the permutations P and the diagonal matrix D is +* returned in the vector SCALE. +* +* This subroutine is based on the EISPACK routine BALANC. +* +* Modified by Tzu-Yi Chen, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL SCLFAC + PARAMETER ( SCLFAC = 2.0E+0 ) + REAL FACTOR + PARAMETER ( FACTOR = 0.95E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOCONV + INTEGER I, ICA, IEXC, IRA, J, K, L, M + REAL C, CA, F, G, R, RA, S, SFMAX1, SFMAX2, SFMIN1, + $ SFMIN2 +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX + REAL SLAMCH + EXTERNAL LSAME, ISAMAX, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SSCAL, SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND. + $ .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEBAL', -INFO ) + RETURN + END IF +* + K = 1 + L = N +* + IF( N.EQ.0 ) + $ GO TO 210 +* + IF( LSAME( JOB, 'N' ) ) THEN + DO 10 I = 1, N + SCALE( I ) = ONE + 10 CONTINUE + GO TO 210 + END IF +* + IF( LSAME( JOB, 'S' ) ) + $ GO TO 120 +* +* Permutation to isolate eigenvalues if possible +* + GO TO 50 +* +* Row and column exchange. +* + 20 CONTINUE + SCALE( M ) = J + IF( J.EQ.M ) + $ GO TO 30 +* + CALL SSWAP( L, A( 1, J ), 1, A( 1, M ), 1 ) + CALL SSWAP( N-K+1, A( J, K ), LDA, A( M, K ), LDA ) +* + 30 CONTINUE + GO TO ( 40, 80 )IEXC +* +* Search for rows isolating an eigenvalue and push them down. +* + 40 CONTINUE + IF( L.EQ.1 ) + $ GO TO 210 + L = L - 1 +* + 50 CONTINUE + DO 70 J = L, 1, -1 +* + DO 60 I = 1, L + IF( I.EQ.J ) + $ GO TO 60 + IF( A( J, I ).NE.ZERO ) + $ GO TO 70 + 60 CONTINUE +* + M = L + IEXC = 1 + GO TO 20 + 70 CONTINUE +* + GO TO 90 +* +* Search for columns isolating an eigenvalue and push them left. +* + 80 CONTINUE + K = K + 1 +* + 90 CONTINUE + DO 110 J = K, L +* + DO 100 I = K, L + IF( I.EQ.J ) + $ GO TO 100 + IF( A( I, J ).NE.ZERO ) + $ GO TO 110 + 100 CONTINUE +* + M = K + IEXC = 2 + GO TO 20 + 110 CONTINUE +* + 120 CONTINUE + DO 130 I = K, L + SCALE( I ) = ONE + 130 CONTINUE +* + IF( LSAME( JOB, 'P' ) ) + $ GO TO 210 +* +* Balance the submatrix in rows K to L. +* +* Iterative loop for norm reduction +* + SFMIN1 = SLAMCH( 'S' ) / SLAMCH( 'P' ) + SFMAX1 = ONE / SFMIN1 + SFMIN2 = SFMIN1*SCLFAC + SFMAX2 = ONE / SFMIN2 + 140 CONTINUE + NOCONV = .FALSE. +* + DO 200 I = K, L + C = ZERO + R = ZERO +* + DO 150 J = K, L + IF( J.EQ.I ) + $ GO TO 150 + C = C + ABS( A( J, I ) ) + R = R + ABS( A( I, J ) ) + 150 CONTINUE + ICA = ISAMAX( L, A( 1, I ), 1 ) + CA = ABS( A( ICA, I ) ) + IRA = ISAMAX( N-K+1, A( I, K ), LDA ) + RA = ABS( A( I, IRA+K-1 ) ) +* +* Guard against zero C or R due to underflow. +* + IF( C.EQ.ZERO .OR. R.EQ.ZERO ) + $ GO TO 200 + G = R / SCLFAC + F = ONE + S = C + R + 160 CONTINUE + IF( C.GE.G .OR. MAX( F, C, CA ).GE.SFMAX2 .OR. + $ MIN( R, G, RA ).LE.SFMIN2 )GO TO 170 + F = F*SCLFAC + C = C*SCLFAC + CA = CA*SCLFAC + R = R / SCLFAC + G = G / SCLFAC + RA = RA / SCLFAC + GO TO 160 +* + 170 CONTINUE + G = C / SCLFAC + 180 CONTINUE + IF( G.LT.R .OR. MAX( R, RA ).GE.SFMAX2 .OR. + $ MIN( F, C, G, CA ).LE.SFMIN2 )GO TO 190 + F = F / SCLFAC + C = C / SCLFAC + G = G / SCLFAC + CA = CA / SCLFAC + R = R*SCLFAC + RA = RA*SCLFAC + GO TO 180 +* +* Now balance. +* + 190 CONTINUE + IF( ( C+R ).GE.FACTOR*S ) + $ GO TO 200 + IF( F.LT.ONE .AND. SCALE( I ).LT.ONE ) THEN + IF( F*SCALE( I ).LE.SFMIN1 ) + $ GO TO 200 + END IF + IF( F.GT.ONE .AND. SCALE( I ).GT.ONE ) THEN + IF( SCALE( I ).GE.SFMAX1 / F ) + $ GO TO 200 + END IF + G = ONE / F + SCALE( I ) = SCALE( I )*F + NOCONV = .TRUE. +* + CALL SSCAL( N-K+1, G, A( I, K ), LDA ) + CALL SSCAL( L, F, A( 1, I ), 1 ) +* + 200 CONTINUE +* + IF( NOCONV ) + $ GO TO 140 +* + 210 CONTINUE + ILO = K + IHI = L +* + RETURN +* +* End of SGEBAL +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sgebd2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sgebd2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,239 @@ + SUBROUTINE SGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), D( * ), E( * ), TAUP( * ), + $ TAUQ( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SGEBD2 reduces a real general m by n matrix A to upper or lower +* bidiagonal form B by an orthogonal transformation: Q' * A * P = B. +* +* If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows in the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns in the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the m by n general matrix to be reduced. +* On exit, +* if m >= n, the diagonal and the first superdiagonal are +* overwritten with the upper bidiagonal matrix B; the +* elements below the diagonal, with the array TAUQ, represent +* the orthogonal matrix Q as a product of elementary +* reflectors, and the elements above the first superdiagonal, +* with the array TAUP, represent the orthogonal matrix P as +* a product of elementary reflectors; +* if m < n, the diagonal and the first subdiagonal are +* overwritten with the lower bidiagonal matrix B; the +* elements below the first subdiagonal, with the array TAUQ, +* represent the orthogonal matrix Q as a product of +* elementary reflectors, and the elements above the diagonal, +* with the array TAUP, represent the orthogonal matrix P as +* a product of elementary reflectors. +* See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* D (output) REAL array, dimension (min(M,N)) +* The diagonal elements of the bidiagonal matrix B: +* D(i) = A(i,i). +* +* E (output) REAL array, dimension (min(M,N)-1) +* The off-diagonal elements of the bidiagonal matrix B: +* if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; +* if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. +* +* TAUQ (output) REAL array dimension (min(M,N)) +* The scalar factors of the elementary reflectors which +* represent the orthogonal matrix Q. See Further Details. +* +* TAUP (output) REAL array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors which +* represent the orthogonal matrix P. See Further Details. +* +* WORK (workspace) REAL array, dimension (max(M,N)) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* The matrices Q and P are represented as products of elementary +* reflectors: +* +* If m >= n, +* +* Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) +* +* Each H(i) and G(i) has the form: +* +* H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' +* +* where tauq and taup are real scalars, and v and u are real vectors; +* v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i); +* u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n); +* tauq is stored in TAUQ(i) and taup in TAUP(i). +* +* If m < n, +* +* Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) +* +* Each H(i) and G(i) has the form: +* +* H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' +* +* where tauq and taup are real scalars, and v and u are real vectors; +* v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); +* u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); +* tauq is stored in TAUQ(i) and taup in TAUP(i). +* +* The contents of A on exit are illustrated by the following examples: +* +* m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): +* +* ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) +* ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) +* ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) +* ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) +* ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) +* ( v1 v2 v3 v4 v5 ) +* +* where d and e denote diagonal and off-diagonal elements of B, vi +* denotes an element of the vector defining H(i), and ui an element of +* the vector defining G(i). +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I +* .. +* .. External Subroutines .. + EXTERNAL SLARF, SLARFG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.LT.0 ) THEN + CALL XERBLA( 'SGEBD2', -INFO ) + RETURN + END IF +* + IF( M.GE.N ) THEN +* +* Reduce to upper bidiagonal form +* + DO 10 I = 1, N +* +* Generate elementary reflector H(i) to annihilate A(i+1:m,i) +* + CALL SLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1, + $ TAUQ( I ) ) + D( I ) = A( I, I ) + A( I, I ) = ONE +* +* Apply H(i) to A(i:m,i+1:n) from the left +* + IF( I.LT.N ) + $ CALL SLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAUQ( I ), + $ A( I, I+1 ), LDA, WORK ) + A( I, I ) = D( I ) +* + IF( I.LT.N ) THEN +* +* Generate elementary reflector G(i) to annihilate +* A(i,i+2:n) +* + CALL SLARFG( N-I, A( I, I+1 ), A( I, MIN( I+2, N ) ), + $ LDA, TAUP( I ) ) + E( I ) = A( I, I+1 ) + A( I, I+1 ) = ONE +* +* Apply G(i) to A(i+1:m,i+1:n) from the right +* + CALL SLARF( 'Right', M-I, N-I, A( I, I+1 ), LDA, + $ TAUP( I ), A( I+1, I+1 ), LDA, WORK ) + A( I, I+1 ) = E( I ) + ELSE + TAUP( I ) = ZERO + END IF + 10 CONTINUE + ELSE +* +* Reduce to lower bidiagonal form +* + DO 20 I = 1, M +* +* Generate elementary reflector G(i) to annihilate A(i,i+1:n) +* + CALL SLARFG( N-I+1, A( I, I ), A( I, MIN( I+1, N ) ), LDA, + $ TAUP( I ) ) + D( I ) = A( I, I ) + A( I, I ) = ONE +* +* Apply G(i) to A(i+1:m,i:n) from the right +* + IF( I.LT.M ) + $ CALL SLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, + $ TAUP( I ), A( I+1, I ), LDA, WORK ) + A( I, I ) = D( I ) +* + IF( I.LT.M ) THEN +* +* Generate elementary reflector H(i) to annihilate +* A(i+2:m,i) +* + CALL SLARFG( M-I, A( I+1, I ), A( MIN( I+2, M ), I ), 1, + $ TAUQ( I ) ) + E( I ) = A( I+1, I ) + A( I+1, I ) = ONE +* +* Apply H(i) to A(i+1:m,i+1:n) from the left +* + CALL SLARF( 'Left', M-I, N-I, A( I+1, I ), 1, TAUQ( I ), + $ A( I+1, I+1 ), LDA, WORK ) + A( I+1, I ) = E( I ) + ELSE + TAUQ( I ) = ZERO + END IF + 20 CONTINUE + END IF + RETURN +* +* End of SGEBD2 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sgebrd.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sgebrd.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,268 @@ + SUBROUTINE SGEBRD( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK, + $ INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), D( * ), E( * ), TAUP( * ), + $ TAUQ( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SGEBRD reduces a general real M-by-N matrix A to upper or lower +* bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. +* +* If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows in the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns in the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the M-by-N general matrix to be reduced. +* On exit, +* if m >= n, the diagonal and the first superdiagonal are +* overwritten with the upper bidiagonal matrix B; the +* elements below the diagonal, with the array TAUQ, represent +* the orthogonal matrix Q as a product of elementary +* reflectors, and the elements above the first superdiagonal, +* with the array TAUP, represent the orthogonal matrix P as +* a product of elementary reflectors; +* if m < n, the diagonal and the first subdiagonal are +* overwritten with the lower bidiagonal matrix B; the +* elements below the first subdiagonal, with the array TAUQ, +* represent the orthogonal matrix Q as a product of +* elementary reflectors, and the elements above the diagonal, +* with the array TAUP, represent the orthogonal matrix P as +* a product of elementary reflectors. +* See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* D (output) REAL array, dimension (min(M,N)) +* The diagonal elements of the bidiagonal matrix B: +* D(i) = A(i,i). +* +* E (output) REAL array, dimension (min(M,N)-1) +* The off-diagonal elements of the bidiagonal matrix B: +* if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; +* if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. +* +* TAUQ (output) REAL array dimension (min(M,N)) +* The scalar factors of the elementary reflectors which +* represent the orthogonal matrix Q. See Further Details. +* +* TAUP (output) REAL array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors which +* represent the orthogonal matrix P. See Further Details. +* +* WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of the array WORK. LWORK >= max(1,M,N). +* For optimum performance LWORK >= (M+N)*NB, where NB +* is the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* The matrices Q and P are represented as products of elementary +* reflectors: +* +* If m >= n, +* +* Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) +* +* Each H(i) and G(i) has the form: +* +* H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' +* +* where tauq and taup are real scalars, and v and u are real vectors; +* v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i); +* u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n); +* tauq is stored in TAUQ(i) and taup in TAUP(i). +* +* If m < n, +* +* Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) +* +* Each H(i) and G(i) has the form: +* +* H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' +* +* where tauq and taup are real scalars, and v and u are real vectors; +* v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); +* u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); +* tauq is stored in TAUQ(i) and taup in TAUP(i). +* +* The contents of A on exit are illustrated by the following examples: +* +* m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): +* +* ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) +* ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) +* ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) +* ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) +* ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) +* ( v1 v2 v3 v4 v5 ) +* +* where d and e denote diagonal and off-diagonal elements of B, vi +* denotes an element of the vector defining H(i), and ui an element of +* the vector defining G(i). +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IINFO, J, LDWRKX, LDWRKY, LWKOPT, MINMN, NB, + $ NBMIN, NX + REAL WS +* .. +* .. External Subroutines .. + EXTERNAL SGEBD2, SGEMM, SLABRD, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, REAL +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + NB = MAX( 1, ILAENV( 1, 'SGEBRD', ' ', M, N, -1, -1 ) ) + LWKOPT = ( M+N )*NB + WORK( 1 ) = REAL( LWKOPT ) + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, M, N ) .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF + IF( INFO.LT.0 ) THEN + CALL XERBLA( 'SGEBRD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + MINMN = MIN( M, N ) + IF( MINMN.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + WS = MAX( M, N ) + LDWRKX = M + LDWRKY = N +* + IF( NB.GT.1 .AND. NB.LT.MINMN ) THEN +* +* Set the crossover point NX. +* + NX = MAX( NB, ILAENV( 3, 'SGEBRD', ' ', M, N, -1, -1 ) ) +* +* Determine when to switch from blocked to unblocked code. +* + IF( NX.LT.MINMN ) THEN + WS = ( M+N )*NB + IF( LWORK.LT.WS ) THEN +* +* Not enough work space for the optimal NB, consider using +* a smaller block size. +* + NBMIN = ILAENV( 2, 'SGEBRD', ' ', M, N, -1, -1 ) + IF( LWORK.GE.( M+N )*NBMIN ) THEN + NB = LWORK / ( M+N ) + ELSE + NB = 1 + NX = MINMN + END IF + END IF + END IF + ELSE + NX = MINMN + END IF +* + DO 30 I = 1, MINMN - NX, NB +* +* Reduce rows and columns i:i+nb-1 to bidiagonal form and return +* the matrices X and Y which are needed to update the unreduced +* part of the matrix +* + CALL SLABRD( M-I+1, N-I+1, NB, A( I, I ), LDA, D( I ), E( I ), + $ TAUQ( I ), TAUP( I ), WORK, LDWRKX, + $ WORK( LDWRKX*NB+1 ), LDWRKY ) +* +* Update the trailing submatrix A(i+nb:m,i+nb:n), using an update +* of the form A := A - V*Y' - X*U' +* + CALL SGEMM( 'No transpose', 'Transpose', M-I-NB+1, N-I-NB+1, + $ NB, -ONE, A( I+NB, I ), LDA, + $ WORK( LDWRKX*NB+NB+1 ), LDWRKY, ONE, + $ A( I+NB, I+NB ), LDA ) + CALL SGEMM( 'No transpose', 'No transpose', M-I-NB+1, N-I-NB+1, + $ NB, -ONE, WORK( NB+1 ), LDWRKX, A( I, I+NB ), LDA, + $ ONE, A( I+NB, I+NB ), LDA ) +* +* Copy diagonal and off-diagonal elements of B back into A +* + IF( M.GE.N ) THEN + DO 10 J = I, I + NB - 1 + A( J, J ) = D( J ) + A( J, J+1 ) = E( J ) + 10 CONTINUE + ELSE + DO 20 J = I, I + NB - 1 + A( J, J ) = D( J ) + A( J+1, J ) = E( J ) + 20 CONTINUE + END IF + 30 CONTINUE +* +* Use unblocked code to reduce the remainder of the matrix +* + CALL SGEBD2( M-I+1, N-I+1, A( I, I ), LDA, D( I ), E( I ), + $ TAUQ( I ), TAUP( I ), WORK, IINFO ) + WORK( 1 ) = WS + RETURN +* +* End of SGEBRD +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sgecon.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sgecon.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,185 @@ + SUBROUTINE SGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, IWORK, + $ INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* Modified to call SLACN2 in place of SLACON, 7 Feb 03, SJH. +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER INFO, LDA, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SGECON estimates the reciprocal of the condition number of a general +* real matrix A, in either the 1-norm or the infinity-norm, using +* the LU factorization computed by SGETRF. +* +* An estimate is obtained for norm(inv(A)), and the reciprocal of the +* condition number is computed as +* RCOND = 1 / ( norm(A) * norm(inv(A)) ). +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies whether the 1-norm condition number or the +* infinity-norm condition number is required: +* = '1' or 'O': 1-norm; +* = 'I': Infinity-norm. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input) REAL array, dimension (LDA,N) +* The factors L and U from the factorization A = P*L*U +* as computed by SGETRF. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* ANORM (input) REAL +* If NORM = '1' or 'O', the 1-norm of the original matrix A. +* If NORM = 'I', the infinity-norm of the original matrix A. +* +* RCOND (output) REAL +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(norm(A) * norm(inv(A))). +* +* WORK (workspace) REAL array, dimension (4*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL ONENRM + CHARACTER NORMIN + INTEGER IX, KASE, KASE1 + REAL AINVNM, SCALE, SL, SMLNUM, SU +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX + REAL SLAMCH + EXTERNAL LSAME, ISAMAX, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SLACN2, SLATRS, SRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGECON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* + SMLNUM = SLAMCH( 'Safe minimum' ) +* +* Estimate the norm of inv(A). +* + AINVNM = ZERO + NORMIN = 'N' + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KASE = 0 + 10 CONTINUE + CALL SLACN2( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(L). +* + CALL SLATRS( 'Lower', 'No transpose', 'Unit', NORMIN, N, A, + $ LDA, WORK, SL, WORK( 2*N+1 ), INFO ) +* +* Multiply by inv(U). +* + CALL SLATRS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N, + $ A, LDA, WORK, SU, WORK( 3*N+1 ), INFO ) + ELSE +* +* Multiply by inv(U'). +* + CALL SLATRS( 'Upper', 'Transpose', 'Non-unit', NORMIN, N, A, + $ LDA, WORK, SU, WORK( 3*N+1 ), INFO ) +* +* Multiply by inv(L'). +* + CALL SLATRS( 'Lower', 'Transpose', 'Unit', NORMIN, N, A, + $ LDA, WORK, SL, WORK( 2*N+1 ), INFO ) + END IF +* +* Divide X by 1/(SL*SU) if doing so will not cause overflow. +* + SCALE = SL*SU + NORMIN = 'Y' + IF( SCALE.NE.ONE ) THEN + IX = ISAMAX( N, WORK, 1 ) + IF( SCALE.LT.ABS( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL SRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + 20 CONTINUE + RETURN +* +* End of SGECON +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sgeesx.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sgeesx.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,527 @@ + SUBROUTINE SGEESX( JOBVS, SORT, SELECT, SENSE, N, A, LDA, SDIM, + $ WR, WI, VS, LDVS, RCONDE, RCONDV, WORK, LWORK, + $ IWORK, LIWORK, BWORK, INFO ) +* +* -- LAPACK driver routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER JOBVS, SENSE, SORT + INTEGER INFO, LDA, LDVS, LIWORK, LWORK, N, SDIM + REAL RCONDE, RCONDV +* .. +* .. Array Arguments .. + LOGICAL BWORK( * ) + INTEGER IWORK( * ) + REAL A( LDA, * ), VS( LDVS, * ), WI( * ), WORK( * ), + $ WR( * ) +* .. +* .. Function Arguments .. + LOGICAL SELECT + EXTERNAL SELECT +* .. +* +* Purpose +* ======= +* +* SGEESX computes for an N-by-N real nonsymmetric matrix A, the +* eigenvalues, the real Schur form T, and, optionally, the matrix of +* Schur vectors Z. This gives the Schur factorization A = Z*T*(Z**T). +* +* Optionally, it also orders the eigenvalues on the diagonal of the +* real Schur form so that selected eigenvalues are at the top left; +* computes a reciprocal condition number for the average of the +* selected eigenvalues (RCONDE); and computes a reciprocal condition +* number for the right invariant subspace corresponding to the +* selected eigenvalues (RCONDV). The leading columns of Z form an +* orthonormal basis for this invariant subspace. +* +* For further explanation of the reciprocal condition numbers RCONDE +* and RCONDV, see Section 4.10 of the LAPACK Users' Guide (where +* these quantities are called s and sep respectively). +* +* A real matrix is in real Schur form if it is upper quasi-triangular +* with 1-by-1 and 2-by-2 blocks. 2-by-2 blocks will be standardized in +* the form +* [ a b ] +* [ c a ] +* +* where b*c < 0. The eigenvalues of such a block are a +- sqrt(bc). +* +* Arguments +* ========= +* +* JOBVS (input) CHARACTER*1 +* = 'N': Schur vectors are not computed; +* = 'V': Schur vectors are computed. +* +* SORT (input) CHARACTER*1 +* Specifies whether or not to order the eigenvalues on the +* diagonal of the Schur form. +* = 'N': Eigenvalues are not ordered; +* = 'S': Eigenvalues are ordered (see SELECT). +* +* SELECT (external procedure) LOGICAL FUNCTION of two REAL arguments +* SELECT must be declared EXTERNAL in the calling subroutine. +* If SORT = 'S', SELECT is used to select eigenvalues to sort +* to the top left of the Schur form. +* If SORT = 'N', SELECT is not referenced. +* An eigenvalue WR(j)+sqrt(-1)*WI(j) is selected if +* SELECT(WR(j),WI(j)) is true; i.e., if either one of a +* complex conjugate pair of eigenvalues is selected, then both +* are. Note that a selected complex eigenvalue may no longer +* satisfy SELECT(WR(j),WI(j)) = .TRUE. after ordering, since +* ordering may change the value of complex eigenvalues +* (especially if the eigenvalue is ill-conditioned); in this +* case INFO may be set to N+3 (see INFO below). +* +* SENSE (input) CHARACTER*1 +* Determines which reciprocal condition numbers are computed. +* = 'N': None are computed; +* = 'E': Computed for average of selected eigenvalues only; +* = 'V': Computed for selected right invariant subspace only; +* = 'B': Computed for both. +* If SENSE = 'E', 'V' or 'B', SORT must equal 'S'. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA, N) +* On entry, the N-by-N matrix A. +* On exit, A is overwritten by its real Schur form T. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* SDIM (output) INTEGER +* If SORT = 'N', SDIM = 0. +* If SORT = 'S', SDIM = number of eigenvalues (after sorting) +* for which SELECT is true. (Complex conjugate +* pairs for which SELECT is true for either +* eigenvalue count as 2.) +* +* WR (output) REAL array, dimension (N) +* WI (output) REAL array, dimension (N) +* WR and WI contain the real and imaginary parts, respectively, +* of the computed eigenvalues, in the same order that they +* appear on the diagonal of the output Schur form T. Complex +* conjugate pairs of eigenvalues appear consecutively with the +* eigenvalue having the positive imaginary part first. +* +* VS (output) REAL array, dimension (LDVS,N) +* If JOBVS = 'V', VS contains the orthogonal matrix Z of Schur +* vectors. +* If JOBVS = 'N', VS is not referenced. +* +* LDVS (input) INTEGER +* The leading dimension of the array VS. LDVS >= 1, and if +* JOBVS = 'V', LDVS >= N. +* +* RCONDE (output) REAL +* If SENSE = 'E' or 'B', RCONDE contains the reciprocal +* condition number for the average of the selected eigenvalues. +* Not referenced if SENSE = 'N' or 'V'. +* +* RCONDV (output) REAL +* If SENSE = 'V' or 'B', RCONDV contains the reciprocal +* condition number for the selected right invariant subspace. +* Not referenced if SENSE = 'N' or 'E'. +* +* WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,3*N). +* Also, if SENSE = 'E' or 'V' or 'B', +* LWORK >= N+2*SDIM*(N-SDIM), where SDIM is the number of +* selected eigenvalues computed by this routine. Note that +* N+2*SDIM*(N-SDIM) <= N+N*N/2. Note also that an error is only +* returned if LWORK < max(1,3*N), but if SENSE = 'E' or 'V' or +* 'B' this may not be large enough. +* For good performance, LWORK must generally be larger. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates upper bounds on the optimal sizes of the +* arrays WORK and IWORK, returns these values as the first +* entries of the WORK and IWORK arrays, and no error messages +* related to LWORK or LIWORK are issued by XERBLA. +* +* IWORK (workspace/output) INTEGER array, dimension (MAX(1,LIWORK)) +* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +* +* LIWORK (input) INTEGER +* The dimension of the array IWORK. +* LIWORK >= 1; if SENSE = 'V' or 'B', LIWORK >= SDIM*(N-SDIM). +* Note that SDIM*(N-SDIM) <= N*N/4. Note also that an error is +* only returned if LIWORK < 1, but if SENSE = 'V' or 'B' this +* may not be large enough. +* +* If LIWORK = -1, then a workspace query is assumed; the +* routine only calculates upper bounds on the optimal sizes of +* the arrays WORK and IWORK, returns these values as the first +* entries of the WORK and IWORK arrays, and no error messages +* related to LWORK or LIWORK are issued by XERBLA. +* +* BWORK (workspace) LOGICAL array, dimension (N) +* Not referenced if SORT = 'N'. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = i, and i is +* <= N: the QR algorithm failed to compute all the +* eigenvalues; elements 1:ILO-1 and i+1:N of WR and WI +* contain those eigenvalues which have converged; if +* JOBVS = 'V', VS contains the transformation which +* reduces A to its partially converged Schur form. +* = N+1: the eigenvalues could not be reordered because some +* eigenvalues were too close to separate (the problem +* is very ill-conditioned); +* = N+2: after reordering, roundoff changed values of some +* complex eigenvalues so that leading eigenvalues in +* the Schur form no longer satisfy SELECT=.TRUE. This +* could also be caused by underflow due to scaling. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL CURSL, LASTSL, LQUERY, LST2SL, SCALEA, WANTSB, + $ WANTSE, WANTSN, WANTST, WANTSV, WANTVS + INTEGER HSWORK, I, I1, I2, IBAL, ICOND, IERR, IEVAL, + $ IHI, ILO, INXT, IP, ITAU, IWRK, LWRK, LIWRK, + $ MAXWRK, MINWRK + REAL ANRM, BIGNUM, CSCALE, EPS, SMLNUM +* .. +* .. Local Arrays .. + REAL DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEBAK, SGEBAL, SGEHRD, SHSEQR, SLABAD, + $ SLACPY, SLASCL, SORGHR, SSWAP, STRSEN, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANGE + EXTERNAL LSAME, ILAENV, SLAMCH, SLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + WANTVS = LSAME( JOBVS, 'V' ) + WANTST = LSAME( SORT, 'S' ) + WANTSN = LSAME( SENSE, 'N' ) + WANTSE = LSAME( SENSE, 'E' ) + WANTSV = LSAME( SENSE, 'V' ) + WANTSB = LSAME( SENSE, 'B' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) + IF( ( .NOT.WANTVS ) .AND. ( .NOT.LSAME( JOBVS, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( WANTSN .OR. WANTSE .OR. WANTSV .OR. WANTSB ) .OR. + $ ( .NOT.WANTST .AND. .NOT.WANTSN ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVS.LT.1 .OR. ( WANTVS .AND. LDVS.LT.N ) ) THEN + INFO = -12 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "RWorkspace:" describe the +* minimal amount of real workspace needed at that point in the +* code, as well as the preferred amount for good performance. +* IWorkspace refers to integer workspace. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV. +* HSWORK refers to the workspace preferred by SHSEQR, as +* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, +* the worst case. +* If SENSE = 'E', 'V' or 'B', then the amount of workspace needed +* depends on SDIM, which is computed by the routine STRSEN later +* in the code.) +* + IF( INFO.EQ.0 ) THEN + LIWRK = 1 + IF( N.EQ.0 ) THEN + MINWRK = 1 + LWRK = 1 + ELSE + MAXWRK = 2*N + N*ILAENV( 1, 'SGEHRD', ' ', N, 1, N, 0 ) + MINWRK = 3*N +* + CALL SHSEQR( 'S', JOBVS, N, 1, N, A, LDA, WR, WI, VS, LDVS, + $ WORK, -1, IEVAL ) + HSWORK = WORK( 1 ) +* + IF( .NOT.WANTVS ) THEN + MAXWRK = MAX( MAXWRK, N + HSWORK ) + ELSE + MAXWRK = MAX( MAXWRK, 2*N + ( N - 1 )*ILAENV( 1, + $ 'SORGHR', ' ', N, 1, N, -1 ) ) + MAXWRK = MAX( MAXWRK, N + HSWORK ) + END IF + LWRK = MAXWRK + IF( .NOT.WANTSN ) + $ LWRK = MAX( LWRK, N + ( N*N )/2 ) + IF( WANTSV .OR. WANTSB ) + $ LIWRK = ( N*N )/4 + END IF + IWORK( 1 ) = LIWRK + WORK( 1 ) = LWRK +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -16 + ELSE IF( LIWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -18 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEESX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SDIM = 0 + RETURN + END IF +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = SLANGE( 'M', N, N, A, LDA, DUM ) + SCALEA = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + SCALEA = .TRUE. + CSCALE = SMLNUM + ELSE IF( ANRM.GT.BIGNUM ) THEN + SCALEA = .TRUE. + CSCALE = BIGNUM + END IF + IF( SCALEA ) + $ CALL SLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR ) +* +* Permute the matrix to make it more nearly triangular +* (RWorkspace: need N) +* + IBAL = 1 + CALL SGEBAL( 'P', N, A, LDA, ILO, IHI, WORK( IBAL ), IERR ) +* +* Reduce to upper Hessenberg form +* (RWorkspace: need 3*N, prefer 2*N+N*NB) +* + ITAU = N + IBAL + IWRK = N + ITAU + CALL SGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* + IF( WANTVS ) THEN +* +* Copy Householder vectors to VS +* + CALL SLACPY( 'L', N, N, A, LDA, VS, LDVS ) +* +* Generate orthogonal matrix in VS +* (RWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* + CALL SORGHR( N, ILO, IHI, VS, LDVS, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) + END IF +* + SDIM = 0 +* +* Perform QR iteration, accumulating Schur vectors in VS if desired +* (RWorkspace: need N+1, prefer N+HSWORK (see comments) ) +* + IWRK = ITAU + CALL SHSEQR( 'S', JOBVS, N, ILO, IHI, A, LDA, WR, WI, VS, LDVS, + $ WORK( IWRK ), LWORK-IWRK+1, IEVAL ) + IF( IEVAL.GT.0 ) + $ INFO = IEVAL +* +* Sort eigenvalues if desired +* + IF( WANTST .AND. INFO.EQ.0 ) THEN + IF( SCALEA ) THEN + CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, N, 1, WR, N, IERR ) + CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, N, 1, WI, N, IERR ) + END IF + DO 10 I = 1, N + BWORK( I ) = SELECT( WR( I ), WI( I ) ) + 10 CONTINUE +* +* Reorder eigenvalues, transform Schur vectors, and compute +* reciprocal condition numbers +* (RWorkspace: if SENSE is not 'N', need N+2*SDIM*(N-SDIM) +* otherwise, need N ) +* (IWorkspace: if SENSE is 'V' or 'B', need SDIM*(N-SDIM) +* otherwise, need 0 ) +* + CALL STRSEN( SENSE, JOBVS, BWORK, N, A, LDA, VS, LDVS, WR, WI, + $ SDIM, RCONDE, RCONDV, WORK( IWRK ), LWORK-IWRK+1, + $ IWORK, LIWORK, ICOND ) + IF( .NOT.WANTSN ) + $ MAXWRK = MAX( MAXWRK, N+2*SDIM*( N-SDIM ) ) + IF( ICOND.EQ.-15 ) THEN +* +* Not enough real workspace +* + INFO = -16 + ELSE IF( ICOND.EQ.-17 ) THEN +* +* Not enough integer workspace +* + INFO = -18 + ELSE IF( ICOND.GT.0 ) THEN +* +* STRSEN failed to reorder or to restore standard Schur form +* + INFO = ICOND + N + END IF + END IF +* + IF( WANTVS ) THEN +* +* Undo balancing +* (RWorkspace: need N) +* + CALL SGEBAK( 'P', 'R', N, ILO, IHI, WORK( IBAL ), N, VS, LDVS, + $ IERR ) + END IF +* + IF( SCALEA ) THEN +* +* Undo scaling for the Schur form of A +* + CALL SLASCL( 'H', 0, 0, CSCALE, ANRM, N, N, A, LDA, IERR ) + CALL SCOPY( N, A, LDA+1, WR, 1 ) + IF( ( WANTSV .OR. WANTSB ) .AND. INFO.EQ.0 ) THEN + DUM( 1 ) = RCONDV + CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, 1, 1, DUM, 1, IERR ) + RCONDV = DUM( 1 ) + END IF + IF( CSCALE.EQ.SMLNUM ) THEN +* +* If scaling back towards underflow, adjust WI if an +* offdiagonal element of a 2-by-2 block in the Schur form +* underflows. +* + IF( IEVAL.GT.0 ) THEN + I1 = IEVAL + 1 + I2 = IHI - 1 + CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, WI, N, + $ IERR ) + ELSE IF( WANTST ) THEN + I1 = 1 + I2 = N - 1 + ELSE + I1 = ILO + I2 = IHI - 1 + END IF + INXT = I1 - 1 + DO 20 I = I1, I2 + IF( I.LT.INXT ) + $ GO TO 20 + IF( WI( I ).EQ.ZERO ) THEN + INXT = I + 1 + ELSE + IF( A( I+1, I ).EQ.ZERO ) THEN + WI( I ) = ZERO + WI( I+1 ) = ZERO + ELSE IF( A( I+1, I ).NE.ZERO .AND. A( I, I+1 ).EQ. + $ ZERO ) THEN + WI( I ) = ZERO + WI( I+1 ) = ZERO + IF( I.GT.1 ) + $ CALL SSWAP( I-1, A( 1, I ), 1, A( 1, I+1 ), 1 ) + IF( N.GT.I+1 ) + $ CALL SSWAP( N-I-1, A( I, I+2 ), LDA, + $ A( I+1, I+2 ), LDA ) + CALL SSWAP( N, VS( 1, I ), 1, VS( 1, I+1 ), 1 ) + A( I, I+1 ) = A( I+1, I ) + A( I+1, I ) = ZERO + END IF + INXT = I + 2 + END IF + 20 CONTINUE + END IF + CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, N-IEVAL, 1, + $ WI( IEVAL+1 ), MAX( N-IEVAL, 1 ), IERR ) + END IF +* + IF( WANTST .AND. INFO.EQ.0 ) THEN +* +* Check if reordering successful +* + LASTSL = .TRUE. + LST2SL = .TRUE. + SDIM = 0 + IP = 0 + DO 30 I = 1, N + CURSL = SELECT( WR( I ), WI( I ) ) + IF( WI( I ).EQ.ZERO ) THEN + IF( CURSL ) + $ SDIM = SDIM + 1 + IP = 0 + IF( CURSL .AND. .NOT.LASTSL ) + $ INFO = N + 2 + ELSE + IF( IP.EQ.1 ) THEN +* +* Last eigenvalue of conjugate pair +* + CURSL = CURSL .OR. LASTSL + LASTSL = CURSL + IF( CURSL ) + $ SDIM = SDIM + 2 + IP = -1 + IF( CURSL .AND. .NOT.LST2SL ) + $ INFO = N + 2 + ELSE +* +* First eigenvalue of conjugate pair +* + IP = 1 + END IF + END IF + LST2SL = LASTSL + LASTSL = CURSL + 30 CONTINUE + END IF +* + WORK( 1 ) = MAXWRK + IF( WANTSV .OR. WANTSB ) THEN + IWORK( 1 ) = SDIM*(N-SDIM) + ELSE + IWORK( 1 ) = 1 + END IF +* + RETURN +* +* End of SGEESX +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sgeev.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sgeev.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,423 @@ + SUBROUTINE SGEEV( JOBVL, JOBVR, N, A, LDA, WR, WI, VL, LDVL, VR, + $ LDVR, WORK, LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER JOBVL, JOBVR + INTEGER INFO, LDA, LDVL, LDVR, LWORK, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), VL( LDVL, * ), VR( LDVR, * ), + $ WI( * ), WORK( * ), WR( * ) +* .. +* +* Purpose +* ======= +* +* SGEEV computes for an N-by-N real nonsymmetric matrix A, the +* eigenvalues and, optionally, the left and/or right eigenvectors. +* +* The right eigenvector v(j) of A satisfies +* A * v(j) = lambda(j) * v(j) +* where lambda(j) is its eigenvalue. +* The left eigenvector u(j) of A satisfies +* u(j)**H * A = lambda(j) * u(j)**H +* where u(j)**H denotes the conjugate transpose of u(j). +* +* The computed eigenvectors are normalized to have Euclidean norm +* equal to 1 and largest component real. +* +* Arguments +* ========= +* +* JOBVL (input) CHARACTER*1 +* = 'N': left eigenvectors of A are not computed; +* = 'V': left eigenvectors of A are computed. +* +* JOBVR (input) CHARACTER*1 +* = 'N': right eigenvectors of A are not computed; +* = 'V': right eigenvectors of A are computed. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the N-by-N matrix A. +* On exit, A has been overwritten. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* WR (output) REAL array, dimension (N) +* WI (output) REAL array, dimension (N) +* WR and WI contain the real and imaginary parts, +* respectively, of the computed eigenvalues. Complex +* conjugate pairs of eigenvalues appear consecutively +* with the eigenvalue having the positive imaginary part +* first. +* +* VL (output) REAL array, dimension (LDVL,N) +* If JOBVL = 'V', the left eigenvectors u(j) are stored one +* after another in the columns of VL, in the same order +* as their eigenvalues. +* If JOBVL = 'N', VL is not referenced. +* If the j-th eigenvalue is real, then u(j) = VL(:,j), +* the j-th column of VL. +* If the j-th and (j+1)-st eigenvalues form a complex +* conjugate pair, then u(j) = VL(:,j) + i*VL(:,j+1) and +* u(j+1) = VL(:,j) - i*VL(:,j+1). +* +* LDVL (input) INTEGER +* The leading dimension of the array VL. LDVL >= 1; if +* JOBVL = 'V', LDVL >= N. +* +* VR (output) REAL array, dimension (LDVR,N) +* If JOBVR = 'V', the right eigenvectors v(j) are stored one +* after another in the columns of VR, in the same order +* as their eigenvalues. +* If JOBVR = 'N', VR is not referenced. +* If the j-th eigenvalue is real, then v(j) = VR(:,j), +* the j-th column of VR. +* If the j-th and (j+1)-st eigenvalues form a complex +* conjugate pair, then v(j) = VR(:,j) + i*VR(:,j+1) and +* v(j+1) = VR(:,j) - i*VR(:,j+1). +* +* LDVR (input) INTEGER +* The leading dimension of the array VR. LDVR >= 1; if +* JOBVR = 'V', LDVR >= N. +* +* WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,3*N), and +* if JOBVL = 'V' or JOBVR = 'V', LWORK >= 4*N. For good +* performance, LWORK must generally be larger. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = i, the QR algorithm failed to compute all the +* eigenvalues, and no eigenvectors have been computed; +* elements i+1:N of WR and WI contain eigenvalues which +* have converged. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, SCALEA, WANTVL, WANTVR + CHARACTER SIDE + INTEGER HSWORK, I, IBAL, IERR, IHI, ILO, ITAU, IWRK, K, + $ MAXWRK, MINWRK, NOUT + REAL ANRM, BIGNUM, CS, CSCALE, EPS, R, SCL, SMLNUM, + $ SN +* .. +* .. Local Arrays .. + LOGICAL SELECT( 1 ) + REAL DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL SGEBAK, SGEBAL, SGEHRD, SHSEQR, SLABAD, SLACPY, + $ SLARTG, SLASCL, SORGHR, SROT, SSCAL, STREVC, + $ XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV, ISAMAX + REAL SLAMCH, SLANGE, SLAPY2, SNRM2 + EXTERNAL LSAME, ILAENV, ISAMAX, SLAMCH, SLANGE, SLAPY2, + $ SNRM2 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + WANTVL = LSAME( JOBVL, 'V' ) + WANTVR = LSAME( JOBVR, 'V' ) + IF( ( .NOT.WANTVL ) .AND. ( .NOT.LSAME( JOBVL, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( ( .NOT.WANTVR ) .AND. ( .NOT.LSAME( JOBVR, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDVL.LT.1 .OR. ( WANTVL .AND. LDVL.LT.N ) ) THEN + INFO = -9 + ELSE IF( LDVR.LT.1 .OR. ( WANTVR .AND. LDVR.LT.N ) ) THEN + INFO = -11 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV. +* HSWORK refers to the workspace preferred by SHSEQR, as +* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, +* the worst case.) +* + IF( INFO.EQ.0 ) THEN + IF( N.EQ.0 ) THEN + MINWRK = 1 + MAXWRK = 1 + ELSE + MAXWRK = 2*N + N*ILAENV( 1, 'SGEHRD', ' ', N, 1, N, 0 ) + IF( WANTVL ) THEN + MINWRK = 4*N + MAXWRK = MAX( MAXWRK, 2*N + ( N - 1 )*ILAENV( 1, + $ 'SORGHR', ' ', N, 1, N, -1 ) ) + CALL SHSEQR( 'S', 'V', N, 1, N, A, LDA, WR, WI, VL, LDVL, + $ WORK, -1, INFO ) + HSWORK = WORK( 1 ) + MAXWRK = MAX( MAXWRK, N + 1, N + HSWORK ) + MAXWRK = MAX( MAXWRK, 4*N ) + ELSE IF( WANTVR ) THEN + MINWRK = 4*N + MAXWRK = MAX( MAXWRK, 2*N + ( N - 1 )*ILAENV( 1, + $ 'SORGHR', ' ', N, 1, N, -1 ) ) + CALL SHSEQR( 'S', 'V', N, 1, N, A, LDA, WR, WI, VR, LDVR, + $ WORK, -1, INFO ) + HSWORK = WORK( 1 ) + MAXWRK = MAX( MAXWRK, N + 1, N + HSWORK ) + MAXWRK = MAX( MAXWRK, 4*N ) + ELSE + MINWRK = 3*N + CALL SHSEQR( 'E', 'N', N, 1, N, A, LDA, WR, WI, VR, LDVR, + $ WORK, -1, INFO ) + HSWORK = WORK( 1 ) + MAXWRK = MAX( MAXWRK, N + 1, N + HSWORK ) + END IF + MAXWRK = MAX( MAXWRK, MINWRK ) + END IF + WORK( 1 ) = MAXWRK +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEEV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = SLANGE( 'M', N, N, A, LDA, DUM ) + SCALEA = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + SCALEA = .TRUE. + CSCALE = SMLNUM + ELSE IF( ANRM.GT.BIGNUM ) THEN + SCALEA = .TRUE. + CSCALE = BIGNUM + END IF + IF( SCALEA ) + $ CALL SLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR ) +* +* Balance the matrix +* (Workspace: need N) +* + IBAL = 1 + CALL SGEBAL( 'B', N, A, LDA, ILO, IHI, WORK( IBAL ), IERR ) +* +* Reduce to upper Hessenberg form +* (Workspace: need 3*N, prefer 2*N+N*NB) +* + ITAU = IBAL + N + IWRK = ITAU + N + CALL SGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* + IF( WANTVL ) THEN +* +* Want left eigenvectors +* Copy Householder vectors to VL +* + SIDE = 'L' + CALL SLACPY( 'L', N, N, A, LDA, VL, LDVL ) +* +* Generate orthogonal matrix in VL +* (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* + CALL SORGHR( N, ILO, IHI, VL, LDVL, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* +* Perform QR iteration, accumulating Schur vectors in VL +* (Workspace: need N+1, prefer N+HSWORK (see comments) ) +* + IWRK = ITAU + CALL SHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, WR, WI, VL, LDVL, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) +* + IF( WANTVR ) THEN +* +* Want left and right eigenvectors +* Copy Schur vectors to VR +* + SIDE = 'B' + CALL SLACPY( 'F', N, N, VL, LDVL, VR, LDVR ) + END IF +* + ELSE IF( WANTVR ) THEN +* +* Want right eigenvectors +* Copy Householder vectors to VR +* + SIDE = 'R' + CALL SLACPY( 'L', N, N, A, LDA, VR, LDVR ) +* +* Generate orthogonal matrix in VR +* (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* + CALL SORGHR( N, ILO, IHI, VR, LDVR, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* +* Perform QR iteration, accumulating Schur vectors in VR +* (Workspace: need N+1, prefer N+HSWORK (see comments) ) +* + IWRK = ITAU + CALL SHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, WR, WI, VR, LDVR, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) +* + ELSE +* +* Compute eigenvalues only +* (Workspace: need N+1, prefer N+HSWORK (see comments) ) +* + IWRK = ITAU + CALL SHSEQR( 'E', 'N', N, ILO, IHI, A, LDA, WR, WI, VR, LDVR, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) + END IF +* +* If INFO > 0 from SHSEQR, then quit +* + IF( INFO.GT.0 ) + $ GO TO 50 +* + IF( WANTVL .OR. WANTVR ) THEN +* +* Compute left and/or right eigenvectors +* (Workspace: need 4*N) +* + CALL STREVC( SIDE, 'B', SELECT, N, A, LDA, VL, LDVL, VR, LDVR, + $ N, NOUT, WORK( IWRK ), IERR ) + END IF +* + IF( WANTVL ) THEN +* +* Undo balancing of left eigenvectors +* (Workspace: need N) +* + CALL SGEBAK( 'B', 'L', N, ILO, IHI, WORK( IBAL ), N, VL, LDVL, + $ IERR ) +* +* Normalize left eigenvectors and make largest component real +* + DO 20 I = 1, N + IF( WI( I ).EQ.ZERO ) THEN + SCL = ONE / SNRM2( N, VL( 1, I ), 1 ) + CALL SSCAL( N, SCL, VL( 1, I ), 1 ) + ELSE IF( WI( I ).GT.ZERO ) THEN + SCL = ONE / SLAPY2( SNRM2( N, VL( 1, I ), 1 ), + $ SNRM2( N, VL( 1, I+1 ), 1 ) ) + CALL SSCAL( N, SCL, VL( 1, I ), 1 ) + CALL SSCAL( N, SCL, VL( 1, I+1 ), 1 ) + DO 10 K = 1, N + WORK( IWRK+K-1 ) = VL( K, I )**2 + VL( K, I+1 )**2 + 10 CONTINUE + K = ISAMAX( N, WORK( IWRK ), 1 ) + CALL SLARTG( VL( K, I ), VL( K, I+1 ), CS, SN, R ) + CALL SROT( N, VL( 1, I ), 1, VL( 1, I+1 ), 1, CS, SN ) + VL( K, I+1 ) = ZERO + END IF + 20 CONTINUE + END IF +* + IF( WANTVR ) THEN +* +* Undo balancing of right eigenvectors +* (Workspace: need N) +* + CALL SGEBAK( 'B', 'R', N, ILO, IHI, WORK( IBAL ), N, VR, LDVR, + $ IERR ) +* +* Normalize right eigenvectors and make largest component real +* + DO 40 I = 1, N + IF( WI( I ).EQ.ZERO ) THEN + SCL = ONE / SNRM2( N, VR( 1, I ), 1 ) + CALL SSCAL( N, SCL, VR( 1, I ), 1 ) + ELSE IF( WI( I ).GT.ZERO ) THEN + SCL = ONE / SLAPY2( SNRM2( N, VR( 1, I ), 1 ), + $ SNRM2( N, VR( 1, I+1 ), 1 ) ) + CALL SSCAL( N, SCL, VR( 1, I ), 1 ) + CALL SSCAL( N, SCL, VR( 1, I+1 ), 1 ) + DO 30 K = 1, N + WORK( IWRK+K-1 ) = VR( K, I )**2 + VR( K, I+1 )**2 + 30 CONTINUE + K = ISAMAX( N, WORK( IWRK ), 1 ) + CALL SLARTG( VR( K, I ), VR( K, I+1 ), CS, SN, R ) + CALL SROT( N, VR( 1, I ), 1, VR( 1, I+1 ), 1, CS, SN ) + VR( K, I+1 ) = ZERO + END IF + 40 CONTINUE + END IF +* +* Undo scaling if necessary +* + 50 CONTINUE + IF( SCALEA ) THEN + CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, N-INFO, 1, WR( INFO+1 ), + $ MAX( N-INFO, 1 ), IERR ) + CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, N-INFO, 1, WI( INFO+1 ), + $ MAX( N-INFO, 1 ), IERR ) + IF( INFO.GT.0 ) THEN + CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, WR, N, + $ IERR ) + CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, WI, N, + $ IERR ) + END IF + END IF +* + WORK( 1 ) = MAXWRK + RETURN +* +* End of SGEEV +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sgehd2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sgehd2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,149 @@ + SUBROUTINE SGEHD2( N, ILO, IHI, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER IHI, ILO, INFO, LDA, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SGEHD2 reduces a real general matrix A to upper Hessenberg form H by +* an orthogonal similarity transformation: Q' * A * Q = H . +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* It is assumed that A is already upper triangular in rows +* and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally +* set by a previous call to SGEBAL; otherwise they should be +* set to 1 and N respectively. See Further Details. +* 1 <= ILO <= IHI <= max(1,N). +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the n by n general matrix to be reduced. +* On exit, the upper triangle and the first subdiagonal of A +* are overwritten with the upper Hessenberg matrix H, and the +* elements below the first subdiagonal, with the array TAU, +* represent the orthogonal matrix Q as a product of elementary +* reflectors. See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* TAU (output) REAL array, dimension (N-1) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of (ihi-ilo) elementary +* reflectors +* +* Q = H(ilo) H(ilo+1) . . . H(ihi-1). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on +* exit in A(i+2:ihi,i), and tau in TAU(i). +* +* The contents of A are illustrated by the following example, with +* n = 7, ilo = 2 and ihi = 6: +* +* on entry, on exit, +* +* ( a a a a a a a ) ( a a h h h h a ) +* ( a a a a a a ) ( a h h h h a ) +* ( a a a a a a ) ( h h h h h h ) +* ( a a a a a a ) ( v2 h h h h h ) +* ( a a a a a a ) ( v2 v3 h h h h ) +* ( a a a a a a ) ( v2 v3 v4 h h h ) +* ( a ) ( a ) +* +* where a denotes an element of the original matrix A, h denotes a +* modified element of the upper Hessenberg matrix H, and vi denotes an +* element of the vector defining H(i). +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I + REAL AII +* .. +* .. External Subroutines .. + EXTERNAL SLARF, SLARFG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN + INFO = -2 + ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEHD2', -INFO ) + RETURN + END IF +* + DO 10 I = ILO, IHI - 1 +* +* Compute elementary reflector H(i) to annihilate A(i+2:ihi,i) +* + CALL SLARFG( IHI-I, A( I+1, I ), A( MIN( I+2, N ), I ), 1, + $ TAU( I ) ) + AII = A( I+1, I ) + A( I+1, I ) = ONE +* +* Apply H(i) to A(1:ihi,i+1:ihi) from the right +* + CALL SLARF( 'Right', IHI, IHI-I, A( I+1, I ), 1, TAU( I ), + $ A( 1, I+1 ), LDA, WORK ) +* +* Apply H(i) to A(i+1:ihi,i+1:n) from the left +* + CALL SLARF( 'Left', IHI-I, N-I, A( I+1, I ), 1, TAU( I ), + $ A( I+1, I+1 ), LDA, WORK ) +* + A( I+1, I ) = AII + 10 CONTINUE +* + RETURN +* +* End of SGEHD2 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sgehrd.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sgehrd.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,273 @@ + SUBROUTINE SGEHRD( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER IHI, ILO, INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SGEHRD reduces a real general matrix A to upper Hessenberg form H by +* an orthogonal similarity transformation: Q' * A * Q = H . +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* It is assumed that A is already upper triangular in rows +* and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally +* set by a previous call to SGEBAL; otherwise they should be +* set to 1 and N respectively. See Further Details. +* 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the N-by-N general matrix to be reduced. +* On exit, the upper triangle and the first subdiagonal of A +* are overwritten with the upper Hessenberg matrix H, and the +* elements below the first subdiagonal, with the array TAU, +* represent the orthogonal matrix Q as a product of elementary +* reflectors. See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* TAU (output) REAL array, dimension (N-1) +* The scalar factors of the elementary reflectors (see Further +* Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to +* zero. +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of the array WORK. LWORK >= max(1,N). +* For optimum performance LWORK >= N*NB, where NB is the +* optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of (ihi-ilo) elementary +* reflectors +* +* Q = H(ilo) H(ilo+1) . . . H(ihi-1). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on +* exit in A(i+2:ihi,i), and tau in TAU(i). +* +* The contents of A are illustrated by the following example, with +* n = 7, ilo = 2 and ihi = 6: +* +* on entry, on exit, +* +* ( a a a a a a a ) ( a a h h h h a ) +* ( a a a a a a ) ( a h h h h a ) +* ( a a a a a a ) ( h h h h h h ) +* ( a a a a a a ) ( v2 h h h h h ) +* ( a a a a a a ) ( v2 v3 h h h h ) +* ( a a a a a a ) ( v2 v3 v4 h h h ) +* ( a ) ( a ) +* +* where a denotes an element of the original matrix A, h denotes a +* modified element of the upper Hessenberg matrix H, and vi denotes an +* element of the vector defining H(i). +* +* This file is a slight modification of LAPACK-3.0's SGEHRD +* subroutine incorporating improvements proposed by Quintana-Orti and +* Van de Geijn (2005). +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT + PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, + $ ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, J, LDWORK, LWKOPT, NB, + $ NBMIN, NH, NX + REAL EI +* .. +* .. Local Arrays .. + REAL T( LDT, NBMAX ) +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SGEHD2, SGEMM, SLAHR2, SLARFB, STRMM, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + NB = MIN( NBMAX, ILAENV( 1, 'SGEHRD', ' ', N, ILO, IHI, -1 ) ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN + INFO = -2 + ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEHRD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Set elements 1:ILO-1 and IHI:N-1 of TAU to zero +* + DO 10 I = 1, ILO - 1 + TAU( I ) = ZERO + 10 CONTINUE + DO 20 I = MAX( 1, IHI ), N - 1 + TAU( I ) = ZERO + 20 CONTINUE +* +* Quick return if possible +* + NH = IHI - ILO + 1 + IF( NH.LE.1 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* +* Determine the block size +* + NB = MIN( NBMAX, ILAENV( 1, 'SGEHRD', ' ', N, ILO, IHI, -1 ) ) + NBMIN = 2 + IWS = 1 + IF( NB.GT.1 .AND. NB.LT.NH ) THEN +* +* Determine when to cross over from blocked to unblocked code +* (last block is always handled by unblocked code) +* + NX = MAX( NB, ILAENV( 3, 'SGEHRD', ' ', N, ILO, IHI, -1 ) ) + IF( NX.LT.NH ) THEN +* +* Determine if workspace is large enough for blocked code +* + IWS = N*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: determine the +* minimum value of NB, and reduce NB or force use of +* unblocked code +* + NBMIN = MAX( 2, ILAENV( 2, 'SGEHRD', ' ', N, ILO, IHI, + $ -1 ) ) + IF( LWORK.GE.N*NBMIN ) THEN + NB = LWORK / N + ELSE + NB = 1 + END IF + END IF + END IF + END IF + LDWORK = N +* + IF( NB.LT.NBMIN .OR. NB.GE.NH ) THEN +* +* Use unblocked code below +* + I = ILO +* + ELSE +* +* Use blocked code +* + DO 40 I = ILO, IHI - 1 - NX, NB + IB = MIN( NB, IHI-I ) +* +* Reduce columns i:i+ib-1 to Hessenberg form, returning the +* matrices V and T of the block reflector H = I - V*T*V' +* which performs the reduction, and also the matrix Y = A*V*T +* + CALL SLAHR2( IHI, I, IB, A( 1, I ), LDA, TAU( I ), T, LDT, + $ WORK, LDWORK ) +* +* Apply the block reflector H to A(1:ihi,i+ib:ihi) from the +* right, computing A := A - Y * V'. V(i+ib,ib-1) must be set +* to 1 +* + EI = A( I+IB, I+IB-1 ) + A( I+IB, I+IB-1 ) = ONE + CALL SGEMM( 'No transpose', 'Transpose', + $ IHI, IHI-I-IB+1, + $ IB, -ONE, WORK, LDWORK, A( I+IB, I ), LDA, ONE, + $ A( 1, I+IB ), LDA ) + A( I+IB, I+IB-1 ) = EI +* +* Apply the block reflector H to A(1:i,i+1:i+ib-1) from the +* right +* + CALL STRMM( 'Right', 'Lower', 'Transpose', + $ 'Unit', I, IB-1, + $ ONE, A( I+1, I ), LDA, WORK, LDWORK ) + DO 30 J = 0, IB-2 + CALL SAXPY( I, -ONE, WORK( LDWORK*J+1 ), 1, + $ A( 1, I+J+1 ), 1 ) + 30 CONTINUE +* +* Apply the block reflector H to A(i+1:ihi,i+ib:n) from the +* left +* + CALL SLARFB( 'Left', 'Transpose', 'Forward', + $ 'Columnwise', + $ IHI-I, N-I-IB+1, IB, A( I+1, I ), LDA, T, LDT, + $ A( I+1, I+IB ), LDA, WORK, LDWORK ) + 40 CONTINUE + END IF +* +* Use unblocked code to reduce the rest of the matrix +* + CALL SGEHD2( N, I, IHI, A, LDA, TAU, WORK, IINFO ) + WORK( 1 ) = IWS +* + RETURN +* +* End of SGEHRD +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sgelq2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sgelq2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,121 @@ + SUBROUTINE SGELQ2( M, N, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SGELQ2 computes an LQ factorization of a real m by n matrix A: +* A = L * Q. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the m by n matrix A. +* On exit, the elements on and below the diagonal of the array +* contain the m by min(m,n) lower trapezoidal matrix L (L is +* lower triangular if m <= n); the elements above the diagonal, +* with the array TAU, represent the orthogonal matrix Q as a +* product of elementary reflectors (see Further Details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) REAL array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace) REAL array, dimension (M) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(k) . . . H(2) H(1), where k = min(m,n). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), +* and tau in TAU(i). +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, K + REAL AII +* .. +* .. External Subroutines .. + EXTERNAL SLARF, SLARFG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGELQ2', -INFO ) + RETURN + END IF +* + K = MIN( M, N ) +* + DO 10 I = 1, K +* +* Generate elementary reflector H(i) to annihilate A(i,i+1:n) +* + CALL SLARFG( N-I+1, A( I, I ), A( I, MIN( I+1, N ) ), LDA, + $ TAU( I ) ) + IF( I.LT.M ) THEN +* +* Apply H(i) to A(i+1:m,i:n) from the right +* + AII = A( I, I ) + A( I, I ) = ONE + CALL SLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, TAU( I ), + $ A( I+1, I ), LDA, WORK ) + A( I, I ) = AII + END IF + 10 CONTINUE + RETURN +* +* End of SGELQ2 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sgelqf.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sgelqf.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,195 @@ + SUBROUTINE SGELQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SGELQF computes an LQ factorization of a real M-by-N matrix A: +* A = L * Q. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, the elements on and below the diagonal of the array +* contain the m-by-min(m,n) lower trapezoidal matrix L (L is +* lower triangular if m <= n); the elements above the diagonal, +* with the array TAU, represent the orthogonal matrix Q as a +* product of elementary reflectors (see Further Details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) REAL array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,M). +* For optimum performance LWORK >= M*NB, where NB is the +* optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(k) . . . H(2) H(1), where k = min(m,n). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), +* and tau in TAU(i). +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, K, LDWORK, LWKOPT, NB, + $ NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL SGELQ2, SLARFB, SLARFT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 ) + LWKOPT = M*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGELQF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + K = MIN( M, N ) + IF( K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = M + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'SGELQF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = M + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'SGELQF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code initially +* + DO 10 I = 1, K - NX, NB + IB = MIN( K-I+1, NB ) +* +* Compute the LQ factorization of the current block +* A(i:i+ib-1,i:n) +* + CALL SGELQ2( IB, N-I+1, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) + IF( I+IB.LE.M ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL SLARFT( 'Forward', 'Rowwise', N-I+1, IB, A( I, I ), + $ LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(i+ib:m,i:n) from the right +* + CALL SLARFB( 'Right', 'No transpose', 'Forward', + $ 'Rowwise', M-I-IB+1, N-I+1, IB, A( I, I ), + $ LDA, WORK, LDWORK, A( I+IB, I ), LDA, + $ WORK( IB+1 ), LDWORK ) + END IF + 10 CONTINUE + ELSE + I = 1 + END IF +* +* Use unblocked code to factor the last or only block. +* + IF( I.LE.K ) + $ CALL SGELQ2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) +* + WORK( 1 ) = IWS + RETURN +* +* End of SGELQF +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sgelsd.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sgelsd.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,538 @@ + SUBROUTINE SGELSD( M, N, NRHS, A, LDA, B, LDB, S, RCOND, + $ RANK, WORK, LWORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL A( LDA, * ), B( LDB, * ), S( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SGELSD computes the minimum-norm solution to a real linear least +* squares problem: +* minimize 2-norm(| b - A*x |) +* using the singular value decomposition (SVD) of A. A is an M-by-N +* matrix which may be rank-deficient. +* +* Several right hand side vectors b and solution vectors x can be +* handled in a single call; they are stored as the columns of the +* M-by-NRHS right hand side matrix B and the N-by-NRHS solution +* matrix X. +* +* The problem is solved in three steps: +* (1) Reduce the coefficient matrix A to bidiagonal form with +* Householder transformations, reducing the original problem +* into a "bidiagonal least squares problem" (BLS) +* (2) Solve the BLS using a divide and conquer approach. +* (3) Apply back all the Householder tranformations to solve +* the original least squares problem. +* +* The effective rank of A is determined by treating as zero those +* singular values which are less than RCOND times the largest singular +* value. +* +* The divide and conquer algorithm makes very mild assumptions about +* floating point arithmetic. It will work on machines with a guard +* digit in add/subtract, or on those binary machines without guard +* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +* Cray-2. It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of A. M >= 0. +* +* N (input) INTEGER +* The number of columns of A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* A (input) REAL array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, A has been destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the M-by-NRHS right hand side matrix B. +* On exit, B is overwritten by the N-by-NRHS solution +* matrix X. If m >= n and RANK = n, the residual +* sum-of-squares for the solution in the i-th column is given +* by the sum of squares of elements n+1:m in that column. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,max(M,N)). +* +* S (output) REAL array, dimension (min(M,N)) +* The singular values of A in decreasing order. +* The condition number of A in the 2-norm = S(1)/S(min(m,n)). +* +* RCOND (input) REAL +* RCOND is used to determine the effective rank of A. +* Singular values S(i) <= RCOND*S(1) are treated as zero. +* If RCOND < 0, machine precision is used instead. +* +* RANK (output) INTEGER +* The effective rank of A, i.e., the number of singular values +* which are greater than RCOND*S(1). +* +* WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK must be at least 1. +* The exact minimum amount of workspace needed depends on M, +* N and NRHS. As long as LWORK is at least +* 12*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2, +* if M is greater than or equal to N or +* 12*M + 2*M*SMLSIZ + 8*M*NLVL + M*NRHS + (SMLSIZ+1)**2, +* if M is less than N, the code will execute correctly. +* SMLSIZ is returned by ILAENV and is equal to the maximum +* size of the subproblems at the bottom of the computation +* tree (usually about 25), and +* NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 ) +* For good performance, LWORK should generally be larger. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the array WORK and the +* minimum size of the array IWORK, and returns these values as +* the first entries of the WORK and IWORK arrays, and no error +* message related to LWORK is issued by XERBLA. +* +* IWORK (workspace) INTEGER array, dimension (MAX(1,LIWORK)) +* LIWORK >= max(1, 3*MINMN*NLVL + 11*MINMN), +* where MINMN = MIN( M,N ). +* On exit, if INFO = 0, IWORK(1) returns the minimum LIWORK. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: the algorithm for computing the SVD failed to converge; +* if INFO = i, i off-diagonal elements of an intermediate +* bidiagonal form did not converge to zero. +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Ren-Cang Li, Computer Science Division, University of +* California at Berkeley, USA +* Osni Marques, LBNL/NERSC, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER IASCL, IBSCL, IE, IL, ITAU, ITAUP, ITAUQ, + $ LDWORK, LIWORK, MAXMN, MAXWRK, MINMN, MINWRK, + $ MM, MNTHR, NLVL, NWORK, SMLSIZ, WLALSD + REAL ANRM, BIGNUM, BNRM, EPS, SFMIN, SMLNUM +* .. +* .. External Subroutines .. + EXTERNAL SGEBRD, SGELQF, SGEQRF, SLABAD, SLACPY, SLALSD, + $ SLASCL, SLASET, SORMBR, SORMLQ, SORMQR, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + REAL SLAMCH, SLANGE + EXTERNAL SLAMCH, SLANGE, ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, LOG, MAX, MIN, REAL +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + MINMN = MIN( M, N ) + MAXMN = MAX( M, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, MAXMN ) ) THEN + INFO = -7 + END IF +* +* Compute workspace. +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV.) +* + IF( INFO.EQ.0 ) THEN + MINWRK = 1 + MAXWRK = 1 + LIWORK = 1 + IF( MINMN.GT.0 ) THEN + SMLSIZ = ILAENV( 9, 'SGELSD', ' ', 0, 0, 0, 0 ) + MNTHR = ILAENV( 6, 'SGELSD', ' ', M, N, NRHS, -1 ) + NLVL = MAX( INT( LOG( REAL( MINMN ) / REAL( SMLSIZ + 1 ) ) / + $ LOG( TWO ) ) + 1, 0 ) + LIWORK = 3*MINMN*NLVL + 11*MINMN + MM = M + IF( M.GE.N .AND. M.GE.MNTHR ) THEN +* +* Path 1a - overdetermined, with many more rows than +* columns. +* + MM = N + MAXWRK = MAX( MAXWRK, N + N*ILAENV( 1, 'SGEQRF', ' ', M, + $ N, -1, -1 ) ) + MAXWRK = MAX( MAXWRK, N + NRHS*ILAENV( 1, 'SORMQR', 'LT', + $ M, NRHS, N, -1 ) ) + END IF + IF( M.GE.N ) THEN +* +* Path 1 - overdetermined or exactly determined. +* + MAXWRK = MAX( MAXWRK, 3*N + ( MM + N )*ILAENV( 1, + $ 'SGEBRD', ' ', MM, N, -1, -1 ) ) + MAXWRK = MAX( MAXWRK, 3*N + NRHS*ILAENV( 1, 'SORMBR', + $ 'QLT', MM, NRHS, N, -1 ) ) + MAXWRK = MAX( MAXWRK, 3*N + ( N - 1 )*ILAENV( 1, + $ 'SORMBR', 'PLN', N, NRHS, N, -1 ) ) + WLALSD = 9*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + + $ ( SMLSIZ + 1 )**2 + MAXWRK = MAX( MAXWRK, 3*N + WLALSD ) + MINWRK = MAX( 3*N + MM, 3*N + NRHS, 3*N + WLALSD ) + END IF + IF( N.GT.M ) THEN + WLALSD = 9*M + 2*M*SMLSIZ + 8*M*NLVL + M*NRHS + + $ ( SMLSIZ + 1 )**2 + IF( N.GE.MNTHR ) THEN +* +* Path 2a - underdetermined, with many more columns +* than rows. +* + MAXWRK = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, + $ -1 ) + MAXWRK = MAX( MAXWRK, M*M + 4*M + 2*M*ILAENV( 1, + $ 'SGEBRD', ' ', M, M, -1, -1 ) ) + MAXWRK = MAX( MAXWRK, M*M + 4*M + NRHS*ILAENV( 1, + $ 'SORMBR', 'QLT', M, NRHS, M, -1 ) ) + MAXWRK = MAX( MAXWRK, M*M + 4*M + ( M - 1 )*ILAENV( 1, + $ 'SORMBR', 'PLN', M, NRHS, M, -1 ) ) + IF( NRHS.GT.1 ) THEN + MAXWRK = MAX( MAXWRK, M*M + M + M*NRHS ) + ELSE + MAXWRK = MAX( MAXWRK, M*M + 2*M ) + END IF + MAXWRK = MAX( MAXWRK, M + NRHS*ILAENV( 1, 'SORMLQ', + $ 'LT', N, NRHS, M, -1 ) ) + MAXWRK = MAX( MAXWRK, M*M + 4*M + WLALSD ) + ELSE +* +* Path 2 - remaining underdetermined cases. +* + MAXWRK = 3*M + ( N + M )*ILAENV( 1, 'SGEBRD', ' ', M, + $ N, -1, -1 ) + MAXWRK = MAX( MAXWRK, 3*M + NRHS*ILAENV( 1, 'SORMBR', + $ 'QLT', M, NRHS, N, -1 ) ) + MAXWRK = MAX( MAXWRK, 3*M + M*ILAENV( 1, 'SORMBR', + $ 'PLN', N, NRHS, M, -1 ) ) + MAXWRK = MAX( MAXWRK, 3*M + WLALSD ) + END IF + MINWRK = MAX( 3*M + NRHS, 3*M + M, 3*M + WLALSD ) + END IF + END IF + MINWRK = MIN( MINWRK, MAXWRK ) + WORK( 1 ) = MAXWRK + IWORK( 1 ) = LIWORK +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGELSD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible. +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + RANK = 0 + RETURN + END IF +* +* Get machine parameters. +* + EPS = SLAMCH( 'P' ) + SFMIN = SLAMCH( 'S' ) + SMLNUM = SFMIN / EPS + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* +* Scale A if max entry outside range [SMLNUM,BIGNUM]. +* + ANRM = SLANGE( 'M', M, N, A, LDA, WORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM. +* + CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM. +* + CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL SLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) + CALL SLASET( 'F', MINMN, 1, ZERO, ZERO, S, 1 ) + RANK = 0 + GO TO 10 + END IF +* +* Scale B if max entry outside range [SMLNUM,BIGNUM]. +* + BNRM = SLANGE( 'M', M, NRHS, B, LDB, WORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM. +* + CALL SLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM. +* + CALL SLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 2 + END IF +* +* If M < N make sure certain entries of B are zero. +* + IF( M.LT.N ) + $ CALL SLASET( 'F', N-M, NRHS, ZERO, ZERO, B( M+1, 1 ), LDB ) +* +* Overdetermined case. +* + IF( M.GE.N ) THEN +* +* Path 1 - overdetermined or exactly determined. +* + MM = M + IF( M.GE.MNTHR ) THEN +* +* Path 1a - overdetermined, with many more rows than columns. +* + MM = N + ITAU = 1 + NWORK = ITAU + N +* +* Compute A=Q*R. +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, INFO ) +* +* Multiply B by transpose(Q). +* (Workspace: need N+NRHS, prefer N+NRHS*NB) +* + CALL SORMQR( 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAU ), B, + $ LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* +* Zero out below R. +* + IF( N.GT.1 ) THEN + CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA ) + END IF + END IF +* + IE = 1 + ITAUQ = IE + N + ITAUP = ITAUQ + N + NWORK = ITAUP + N +* +* Bidiagonalize R in A. +* (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB) +* + CALL SGEBRD( MM, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors of R. +* (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB) +* + CALL SORMBR( 'Q', 'L', 'T', MM, NRHS, N, A, LDA, WORK( ITAUQ ), + $ B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* +* Solve the bidiagonal least squares problem. +* + CALL SLALSD( 'U', SMLSIZ, N, NRHS, S, WORK( IE ), B, LDB, + $ RCOND, RANK, WORK( NWORK ), IWORK, INFO ) + IF( INFO.NE.0 ) THEN + GO TO 10 + END IF +* +* Multiply B by right bidiagonalizing vectors of R. +* + CALL SORMBR( 'P', 'L', 'N', N, NRHS, N, A, LDA, WORK( ITAUP ), + $ B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* + ELSE IF( N.GE.MNTHR .AND. LWORK.GE.4*M+M*M+ + $ MAX( M, 2*M-4, NRHS, N-3*M, WLALSD ) ) THEN +* +* Path 2a - underdetermined, with many more columns than rows +* and sufficient workspace for an efficient algorithm. +* + LDWORK = M + IF( LWORK.GE.MAX( 4*M+M*LDA+MAX( M, 2*M-4, NRHS, N-3*M ), + $ M*LDA+M+M*NRHS, 4*M+M*LDA+WLALSD ) )LDWORK = LDA + ITAU = 1 + NWORK = M + 1 +* +* Compute A=L*Q. +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, INFO ) + IL = NWORK +* +* Copy L to WORK(IL), zeroing out above its diagonal. +* + CALL SLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWORK ) + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, WORK( IL+LDWORK ), + $ LDWORK ) + IE = IL + LDWORK*M + ITAUQ = IE + M + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IL). +* (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB) +* + CALL SGEBRD( M, M, WORK( IL ), LDWORK, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ), + $ LWORK-NWORK+1, INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors of L. +* (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB) +* + CALL SORMBR( 'Q', 'L', 'T', M, NRHS, M, WORK( IL ), LDWORK, + $ WORK( ITAUQ ), B, LDB, WORK( NWORK ), + $ LWORK-NWORK+1, INFO ) +* +* Solve the bidiagonal least squares problem. +* + CALL SLALSD( 'U', SMLSIZ, M, NRHS, S, WORK( IE ), B, LDB, + $ RCOND, RANK, WORK( NWORK ), IWORK, INFO ) + IF( INFO.NE.0 ) THEN + GO TO 10 + END IF +* +* Multiply B by right bidiagonalizing vectors of L. +* + CALL SORMBR( 'P', 'L', 'N', M, NRHS, M, WORK( IL ), LDWORK, + $ WORK( ITAUP ), B, LDB, WORK( NWORK ), + $ LWORK-NWORK+1, INFO ) +* +* Zero out below first M rows of B. +* + CALL SLASET( 'F', N-M, NRHS, ZERO, ZERO, B( M+1, 1 ), LDB ) + NWORK = ITAU + M +* +* Multiply transpose(Q) by B. +* (Workspace: need M+NRHS, prefer M+NRHS*NB) +* + CALL SORMLQ( 'L', 'T', N, NRHS, M, A, LDA, WORK( ITAU ), B, + $ LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* + ELSE +* +* Path 2 - remaining underdetermined cases. +* + IE = 1 + ITAUQ = IE + M + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize A. +* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) +* + CALL SGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors. +* (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB) +* + CALL SORMBR( 'Q', 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAUQ ), + $ B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* +* Solve the bidiagonal least squares problem. +* + CALL SLALSD( 'L', SMLSIZ, M, NRHS, S, WORK( IE ), B, LDB, + $ RCOND, RANK, WORK( NWORK ), IWORK, INFO ) + IF( INFO.NE.0 ) THEN + GO TO 10 + END IF +* +* Multiply B by right bidiagonalizing vectors of A. +* + CALL SORMBR( 'P', 'L', 'N', N, NRHS, M, A, LDA, WORK( ITAUP ), + $ B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* + END IF +* +* Undo scaling. +* + IF( IASCL.EQ.1 ) THEN + CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO ) + CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO ) + CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL SLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL SLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO ) + END IF +* + 10 CONTINUE + WORK( 1 ) = MAXWRK + IWORK( 1 ) = LIWORK + RETURN +* +* End of SGELSD +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sgelss.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sgelss.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,617 @@ + SUBROUTINE SGELSS( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK, + $ WORK, LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK + REAL RCOND +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ), S( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SGELSS computes the minimum norm solution to a real linear least +* squares problem: +* +* Minimize 2-norm(| b - A*x |). +* +* using the singular value decomposition (SVD) of A. A is an M-by-N +* matrix which may be rank-deficient. +* +* Several right hand side vectors b and solution vectors x can be +* handled in a single call; they are stored as the columns of the +* M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix +* X. +* +* The effective rank of A is determined by treating as zero those +* singular values which are less than RCOND times the largest singular +* value. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, the first min(m,n) rows of A are overwritten with +* its right singular vectors, stored rowwise. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the M-by-NRHS right hand side matrix B. +* On exit, B is overwritten by the N-by-NRHS solution +* matrix X. If m >= n and RANK = n, the residual +* sum-of-squares for the solution in the i-th column is given +* by the sum of squares of elements n+1:m in that column. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,max(M,N)). +* +* S (output) REAL array, dimension (min(M,N)) +* The singular values of A in decreasing order. +* The condition number of A in the 2-norm = S(1)/S(min(m,n)). +* +* RCOND (input) REAL +* RCOND is used to determine the effective rank of A. +* Singular values S(i) <= RCOND*S(1) are treated as zero. +* If RCOND < 0, machine precision is used instead. +* +* RANK (output) INTEGER +* The effective rank of A, i.e., the number of singular values +* which are greater than RCOND*S(1). +* +* WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= 1, and also: +* LWORK >= 3*min(M,N) + max( 2*min(M,N), max(M,N), NRHS ) +* For good performance, LWORK should generally be larger. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: the algorithm for computing the SVD failed to converge; +* if INFO = i, i off-diagonal elements of an intermediate +* bidiagonal form did not converge to zero. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER BDSPAC, BL, CHUNK, I, IASCL, IBSCL, IE, IL, + $ ITAU, ITAUP, ITAUQ, IWORK, LDWORK, MAXMN, + $ MAXWRK, MINMN, MINWRK, MM, MNTHR + REAL ANRM, BIGNUM, BNRM, EPS, SFMIN, SMLNUM, THR +* .. +* .. Local Arrays .. + REAL VDUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL SBDSQR, SCOPY, SGEBRD, SGELQF, SGEMM, SGEMV, + $ SGEQRF, SLABAD, SLACPY, SLASCL, SLASET, SORGBR, + $ SORMBR, SORMLQ, SORMQR, SRSCL, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + REAL SLAMCH, SLANGE + EXTERNAL ILAENV, SLAMCH, SLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + MINMN = MIN( M, N ) + MAXMN = MAX( M, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, MAXMN ) ) THEN + INFO = -7 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV.) +* + IF( INFO.EQ.0 ) THEN + MINWRK = 1 + MAXWRK = 1 + IF( MINMN.GT.0 ) THEN + MM = M + MNTHR = ILAENV( 6, 'SGELSS', ' ', M, N, NRHS, -1 ) + IF( M.GE.N .AND. M.GE.MNTHR ) THEN +* +* Path 1a - overdetermined, with many more rows than +* columns +* + MM = N + MAXWRK = MAX( MAXWRK, N + N*ILAENV( 1, 'SGEQRF', ' ', M, + $ N, -1, -1 ) ) + MAXWRK = MAX( MAXWRK, N + NRHS*ILAENV( 1, 'SORMQR', 'LT', + $ M, NRHS, N, -1 ) ) + END IF + IF( M.GE.N ) THEN +* +* Path 1 - overdetermined or exactly determined +* +* Compute workspace needed for SBDSQR +* + BDSPAC = MAX( 1, 5*N ) + MAXWRK = MAX( MAXWRK, 3*N + ( MM + N )*ILAENV( 1, + $ 'SGEBRD', ' ', MM, N, -1, -1 ) ) + MAXWRK = MAX( MAXWRK, 3*N + NRHS*ILAENV( 1, 'SORMBR', + $ 'QLT', MM, NRHS, N, -1 ) ) + MAXWRK = MAX( MAXWRK, 3*N + ( N - 1 )*ILAENV( 1, + $ 'SORGBR', 'P', N, N, N, -1 ) ) + MAXWRK = MAX( MAXWRK, BDSPAC ) + MAXWRK = MAX( MAXWRK, N*NRHS ) + MINWRK = MAX( 3*N + MM, 3*N + NRHS, BDSPAC ) + MAXWRK = MAX( MINWRK, MAXWRK ) + END IF + IF( N.GT.M ) THEN +* +* Compute workspace needed for SBDSQR +* + BDSPAC = MAX( 1, 5*M ) + MINWRK = MAX( 3*M+NRHS, 3*M+N, BDSPAC ) + IF( N.GE.MNTHR ) THEN +* +* Path 2a - underdetermined, with many more columns +* than rows +* + MAXWRK = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, + $ -1 ) + MAXWRK = MAX( MAXWRK, M*M + 4*M + 2*M*ILAENV( 1, + $ 'SGEBRD', ' ', M, M, -1, -1 ) ) + MAXWRK = MAX( MAXWRK, M*M + 4*M + NRHS*ILAENV( 1, + $ 'SORMBR', 'QLT', M, NRHS, M, -1 ) ) + MAXWRK = MAX( MAXWRK, M*M + 4*M + + $ ( M - 1 )*ILAENV( 1, 'SORGBR', 'P', M, + $ M, M, -1 ) ) + MAXWRK = MAX( MAXWRK, M*M + M + BDSPAC ) + IF( NRHS.GT.1 ) THEN + MAXWRK = MAX( MAXWRK, M*M + M + M*NRHS ) + ELSE + MAXWRK = MAX( MAXWRK, M*M + 2*M ) + END IF + MAXWRK = MAX( MAXWRK, M + NRHS*ILAENV( 1, 'SORMLQ', + $ 'LT', N, NRHS, M, -1 ) ) + ELSE +* +* Path 2 - underdetermined +* + MAXWRK = 3*M + ( N + M )*ILAENV( 1, 'SGEBRD', ' ', M, + $ N, -1, -1 ) + MAXWRK = MAX( MAXWRK, 3*M + NRHS*ILAENV( 1, 'SORMBR', + $ 'QLT', M, NRHS, M, -1 ) ) + MAXWRK = MAX( MAXWRK, 3*M + M*ILAENV( 1, 'SORGBR', + $ 'P', M, N, M, -1 ) ) + MAXWRK = MAX( MAXWRK, BDSPAC ) + MAXWRK = MAX( MAXWRK, N*NRHS ) + END IF + END IF + MAXWRK = MAX( MINWRK, MAXWRK ) + END IF + WORK( 1 ) = MAXWRK +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) + $ INFO = -12 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGELSS', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + RANK = 0 + RETURN + END IF +* +* Get machine parameters +* + EPS = SLAMCH( 'P' ) + SFMIN = SLAMCH( 'S' ) + SMLNUM = SFMIN / EPS + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = SLANGE( 'M', M, N, A, LDA, WORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL SLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) + CALL SLASET( 'F', MINMN, 1, ZERO, ZERO, S, 1 ) + RANK = 0 + GO TO 70 + END IF +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = SLANGE( 'M', M, NRHS, B, LDB, WORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL SLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL SLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 2 + END IF +* +* Overdetermined case +* + IF( M.GE.N ) THEN +* +* Path 1 - overdetermined or exactly determined +* + MM = M + IF( M.GE.MNTHR ) THEN +* +* Path 1a - overdetermined, with many more rows than columns +* + MM = N + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ), + $ LWORK-IWORK+1, INFO ) +* +* Multiply B by transpose(Q) +* (Workspace: need N+NRHS, prefer N+NRHS*NB) +* + CALL SORMQR( 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAU ), B, + $ LDB, WORK( IWORK ), LWORK-IWORK+1, INFO ) +* +* Zero out below R +* + IF( N.GT.1 ) + $ CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA ) + END IF +* + IE = 1 + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in A +* (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB) +* + CALL SGEBRD( MM, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors of R +* (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB) +* + CALL SORMBR( 'Q', 'L', 'T', MM, NRHS, N, A, LDA, WORK( ITAUQ ), + $ B, LDB, WORK( IWORK ), LWORK-IWORK+1, INFO ) +* +* Generate right bidiagonalizing vectors of R in A +* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) +* + CALL SORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, INFO ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration +* multiply B by transpose of left singular vectors +* compute right singular vectors in A +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', N, N, 0, NRHS, S, WORK( IE ), A, LDA, VDUM, + $ 1, B, LDB, WORK( IWORK ), INFO ) + IF( INFO.NE.0 ) + $ GO TO 70 +* +* Multiply B by reciprocals of singular values +* + THR = MAX( RCOND*S( 1 ), SFMIN ) + IF( RCOND.LT.ZERO ) + $ THR = MAX( EPS*S( 1 ), SFMIN ) + RANK = 0 + DO 10 I = 1, N + IF( S( I ).GT.THR ) THEN + CALL SRSCL( NRHS, S( I ), B( I, 1 ), LDB ) + RANK = RANK + 1 + ELSE + CALL SLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB ) + END IF + 10 CONTINUE +* +* Multiply B by right singular vectors +* (Workspace: need N, prefer N*NRHS) +* + IF( LWORK.GE.LDB*NRHS .AND. NRHS.GT.1 ) THEN + CALL SGEMM( 'T', 'N', N, NRHS, N, ONE, A, LDA, B, LDB, ZERO, + $ WORK, LDB ) + CALL SLACPY( 'G', N, NRHS, WORK, LDB, B, LDB ) + ELSE IF( NRHS.GT.1 ) THEN + CHUNK = LWORK / N + DO 20 I = 1, NRHS, CHUNK + BL = MIN( NRHS-I+1, CHUNK ) + CALL SGEMM( 'T', 'N', N, BL, N, ONE, A, LDA, B( 1, I ), + $ LDB, ZERO, WORK, N ) + CALL SLACPY( 'G', N, BL, WORK, N, B( 1, I ), LDB ) + 20 CONTINUE + ELSE + CALL SGEMV( 'T', N, N, ONE, A, LDA, B, 1, ZERO, WORK, 1 ) + CALL SCOPY( N, WORK, 1, B, 1 ) + END IF +* + ELSE IF( N.GE.MNTHR .AND. LWORK.GE.4*M+M*M+ + $ MAX( M, 2*M-4, NRHS, N-3*M ) ) THEN +* +* Path 2a - underdetermined, with many more columns than rows +* and sufficient workspace for an efficient algorithm +* + LDWORK = M + IF( LWORK.GE.MAX( 4*M+M*LDA+MAX( M, 2*M-4, NRHS, N-3*M ), + $ M*LDA+M+M*NRHS ) )LDWORK = LDA + ITAU = 1 + IWORK = M + 1 +* +* Compute A=L*Q +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ), + $ LWORK-IWORK+1, INFO ) + IL = IWORK +* +* Copy L to WORK(IL), zeroing out above it +* + CALL SLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWORK ) + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, WORK( IL+LDWORK ), + $ LDWORK ) + IE = IL + LDWORK*M + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IL) +* (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB) +* + CALL SGEBRD( M, M, WORK( IL ), LDWORK, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors of L +* (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB) +* + CALL SORMBR( 'Q', 'L', 'T', M, NRHS, M, WORK( IL ), LDWORK, + $ WORK( ITAUQ ), B, LDB, WORK( IWORK ), + $ LWORK-IWORK+1, INFO ) +* +* Generate right bidiagonalizing vectors of R in WORK(IL) +* (Workspace: need M*M+5*M-1, prefer M*M+4*M+(M-1)*NB) +* + CALL SORGBR( 'P', M, M, M, WORK( IL ), LDWORK, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, INFO ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, +* computing right singular vectors of L in WORK(IL) and +* multiplying B by transpose of left singular vectors +* (Workspace: need M*M+M+BDSPAC) +* + CALL SBDSQR( 'U', M, M, 0, NRHS, S, WORK( IE ), WORK( IL ), + $ LDWORK, A, LDA, B, LDB, WORK( IWORK ), INFO ) + IF( INFO.NE.0 ) + $ GO TO 70 +* +* Multiply B by reciprocals of singular values +* + THR = MAX( RCOND*S( 1 ), SFMIN ) + IF( RCOND.LT.ZERO ) + $ THR = MAX( EPS*S( 1 ), SFMIN ) + RANK = 0 + DO 30 I = 1, M + IF( S( I ).GT.THR ) THEN + CALL SRSCL( NRHS, S( I ), B( I, 1 ), LDB ) + RANK = RANK + 1 + ELSE + CALL SLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB ) + END IF + 30 CONTINUE + IWORK = IE +* +* Multiply B by right singular vectors of L in WORK(IL) +* (Workspace: need M*M+2*M, prefer M*M+M+M*NRHS) +* + IF( LWORK.GE.LDB*NRHS+IWORK-1 .AND. NRHS.GT.1 ) THEN + CALL SGEMM( 'T', 'N', M, NRHS, M, ONE, WORK( IL ), LDWORK, + $ B, LDB, ZERO, WORK( IWORK ), LDB ) + CALL SLACPY( 'G', M, NRHS, WORK( IWORK ), LDB, B, LDB ) + ELSE IF( NRHS.GT.1 ) THEN + CHUNK = ( LWORK-IWORK+1 ) / M + DO 40 I = 1, NRHS, CHUNK + BL = MIN( NRHS-I+1, CHUNK ) + CALL SGEMM( 'T', 'N', M, BL, M, ONE, WORK( IL ), LDWORK, + $ B( 1, I ), LDB, ZERO, WORK( IWORK ), M ) + CALL SLACPY( 'G', M, BL, WORK( IWORK ), M, B( 1, I ), + $ LDB ) + 40 CONTINUE + ELSE + CALL SGEMV( 'T', M, M, ONE, WORK( IL ), LDWORK, B( 1, 1 ), + $ 1, ZERO, WORK( IWORK ), 1 ) + CALL SCOPY( M, WORK( IWORK ), 1, B( 1, 1 ), 1 ) + END IF +* +* Zero out below first M rows of B +* + CALL SLASET( 'F', N-M, NRHS, ZERO, ZERO, B( M+1, 1 ), LDB ) + IWORK = ITAU + M +* +* Multiply transpose(Q) by B +* (Workspace: need M+NRHS, prefer M+NRHS*NB) +* + CALL SORMLQ( 'L', 'T', N, NRHS, M, A, LDA, WORK( ITAU ), B, + $ LDB, WORK( IWORK ), LWORK-IWORK+1, INFO ) +* + ELSE +* +* Path 2 - remaining underdetermined cases +* + IE = 1 + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize A +* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) +* + CALL SGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors +* (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB) +* + CALL SORMBR( 'Q', 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAUQ ), + $ B, LDB, WORK( IWORK ), LWORK-IWORK+1, INFO ) +* +* Generate right bidiagonalizing vectors in A +* (Workspace: need 4*M, prefer 3*M+M*NB) +* + CALL SORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, INFO ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, +* computing right singular vectors of A in A and +* multiplying B by transpose of left singular vectors +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'L', M, N, 0, NRHS, S, WORK( IE ), A, LDA, VDUM, + $ 1, B, LDB, WORK( IWORK ), INFO ) + IF( INFO.NE.0 ) + $ GO TO 70 +* +* Multiply B by reciprocals of singular values +* + THR = MAX( RCOND*S( 1 ), SFMIN ) + IF( RCOND.LT.ZERO ) + $ THR = MAX( EPS*S( 1 ), SFMIN ) + RANK = 0 + DO 50 I = 1, M + IF( S( I ).GT.THR ) THEN + CALL SRSCL( NRHS, S( I ), B( I, 1 ), LDB ) + RANK = RANK + 1 + ELSE + CALL SLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB ) + END IF + 50 CONTINUE +* +* Multiply B by right singular vectors of A +* (Workspace: need N, prefer N*NRHS) +* + IF( LWORK.GE.LDB*NRHS .AND. NRHS.GT.1 ) THEN + CALL SGEMM( 'T', 'N', N, NRHS, M, ONE, A, LDA, B, LDB, ZERO, + $ WORK, LDB ) + CALL SLACPY( 'F', N, NRHS, WORK, LDB, B, LDB ) + ELSE IF( NRHS.GT.1 ) THEN + CHUNK = LWORK / N + DO 60 I = 1, NRHS, CHUNK + BL = MIN( NRHS-I+1, CHUNK ) + CALL SGEMM( 'T', 'N', N, BL, M, ONE, A, LDA, B( 1, I ), + $ LDB, ZERO, WORK, N ) + CALL SLACPY( 'F', N, BL, WORK, N, B( 1, I ), LDB ) + 60 CONTINUE + ELSE + CALL SGEMV( 'T', M, N, ONE, A, LDA, B, 1, ZERO, WORK, 1 ) + CALL SCOPY( N, WORK, 1, B, 1 ) + END IF + END IF +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO ) + CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO ) + CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL SLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL SLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO ) + END IF +* + 70 CONTINUE + WORK( 1 ) = MAXWRK + RETURN +* +* End of SGELSS +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sgelsy.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sgelsy.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,391 @@ + SUBROUTINE SGELSY( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK, + $ WORK, LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + REAL A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SGELSY computes the minimum-norm solution to a real linear least +* squares problem: +* minimize || A * X - B || +* using a complete orthogonal factorization of A. A is an M-by-N +* matrix which may be rank-deficient. +* +* Several right hand side vectors b and solution vectors x can be +* handled in a single call; they are stored as the columns of the +* M-by-NRHS right hand side matrix B and the N-by-NRHS solution +* matrix X. +* +* The routine first computes a QR factorization with column pivoting: +* A * P = Q * [ R11 R12 ] +* [ 0 R22 ] +* with R11 defined as the largest leading submatrix whose estimated +* condition number is less than 1/RCOND. The order of R11, RANK, +* is the effective rank of A. +* +* Then, R22 is considered to be negligible, and R12 is annihilated +* by orthogonal transformations from the right, arriving at the +* complete orthogonal factorization: +* A * P = Q * [ T11 0 ] * Z +* [ 0 0 ] +* The minimum-norm solution is then +* X = P * Z' [ inv(T11)*Q1'*B ] +* [ 0 ] +* where Q1 consists of the first RANK columns of Q. +* +* This routine is basically identical to the original xGELSX except +* three differences: +* o The call to the subroutine xGEQPF has been substituted by the +* the call to the subroutine xGEQP3. This subroutine is a Blas-3 +* version of the QR factorization with column pivoting. +* o Matrix B (the right hand side) is updated with Blas-3. +* o The permutation of matrix B (the right hand side) is faster and +* more simple. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of +* columns of matrices B and X. NRHS >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, A has been overwritten by details of its +* complete orthogonal factorization. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the M-by-NRHS right hand side matrix B. +* On exit, the N-by-NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,M,N). +* +* JPVT (input/output) INTEGER array, dimension (N) +* On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted +* to the front of AP, otherwise column i is a free column. +* On exit, if JPVT(i) = k, then the i-th column of AP +* was the k-th column of A. +* +* RCOND (input) REAL +* RCOND is used to determine the effective rank of A, which +* is defined as the order of the largest leading triangular +* submatrix R11 in the QR factorization with pivoting of A, +* whose estimated condition number < 1/RCOND. +* +* RANK (output) INTEGER +* The effective rank of A, i.e., the order of the submatrix +* R11. This is the same as the order of the submatrix T11 +* in the complete orthogonal factorization of A. +* +* WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* The unblocked strategy requires that: +* LWORK >= MAX( MN+3*N+1, 2*MN+NRHS ), +* where MN = min( M, N ). +* The block algorithm requires that: +* LWORK >= MAX( MN+2*N+NB*(N+1), 2*MN+NB*NRHS ), +* where NB is an upper bound on the blocksize returned +* by ILAENV for the routines SGEQP3, STZRZF, STZRQF, SORMQR, +* and SORMRZ. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: If INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* Based on contributions by +* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* E. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain +* G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain +* +* ===================================================================== +* +* .. Parameters .. + INTEGER IMAX, IMIN + PARAMETER ( IMAX = 1, IMIN = 2 ) + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IASCL, IBSCL, ISMAX, ISMIN, J, LWKMIN, + $ LWKOPT, MN, NB, NB1, NB2, NB3, NB4 + REAL ANRM, BIGNUM, BNRM, C1, C2, S1, S2, SMAX, + $ SMAXPR, SMIN, SMINPR, SMLNUM, WSIZE +* .. +* .. External Functions .. + INTEGER ILAENV + REAL SLAMCH, SLANGE + EXTERNAL ILAENV, SLAMCH, SLANGE +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEQP3, SLABAD, SLAIC1, SLASCL, SLASET, + $ SORMQR, SORMRZ, STRSM, STZRZF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* + MN = MIN( M, N ) + ISMIN = MN + 1 + ISMAX = 2*MN + 1 +* +* Test the input arguments. +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN + INFO = -7 + END IF +* +* Figure out optimal block size +* + IF( INFO.EQ.0 ) THEN + IF( MN.EQ.0 .OR. NRHS.EQ.0 ) THEN + LWKMIN = 1 + LWKOPT = 1 + ELSE + NB1 = ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 ) + NB2 = ILAENV( 1, 'SGERQF', ' ', M, N, -1, -1 ) + NB3 = ILAENV( 1, 'SORMQR', ' ', M, N, NRHS, -1 ) + NB4 = ILAENV( 1, 'SORMRQ', ' ', M, N, NRHS, -1 ) + NB = MAX( NB1, NB2, NB3, NB4 ) + LWKMIN = MN + MAX( 2*MN, N + 1, MN + NRHS ) + LWKOPT = MAX( LWKMIN, + $ MN + 2*N + NB*( N + 1 ), 2*MN + NB*NRHS ) + END IF + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGELSY', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MN.EQ.0 .OR. NRHS.EQ.0 ) THEN + RANK = 0 + RETURN + END IF +* +* Get machine parameters +* + SMLNUM = SLAMCH( 'S' ) / SLAMCH( 'P' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* +* Scale A, B if max entries outside range [SMLNUM,BIGNUM] +* + ANRM = SLANGE( 'M', M, N, A, LDA, WORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL SLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) + RANK = 0 + GO TO 70 + END IF +* + BNRM = SLANGE( 'M', M, NRHS, B, LDB, WORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL SLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL SLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 2 + END IF +* +* Compute QR factorization with column pivoting of A: +* A * P = Q * R +* + CALL SGEQP3( M, N, A, LDA, JPVT, WORK( 1 ), WORK( MN+1 ), + $ LWORK-MN, INFO ) + WSIZE = MN + WORK( MN+1 ) +* +* workspace: MN+2*N+NB*(N+1). +* Details of Householder rotations stored in WORK(1:MN). +* +* Determine RANK using incremental condition estimation +* + WORK( ISMIN ) = ONE + WORK( ISMAX ) = ONE + SMAX = ABS( A( 1, 1 ) ) + SMIN = SMAX + IF( ABS( A( 1, 1 ) ).EQ.ZERO ) THEN + RANK = 0 + CALL SLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) + GO TO 70 + ELSE + RANK = 1 + END IF +* + 10 CONTINUE + IF( RANK.LT.MN ) THEN + I = RANK + 1 + CALL SLAIC1( IMIN, RANK, WORK( ISMIN ), SMIN, A( 1, I ), + $ A( I, I ), SMINPR, S1, C1 ) + CALL SLAIC1( IMAX, RANK, WORK( ISMAX ), SMAX, A( 1, I ), + $ A( I, I ), SMAXPR, S2, C2 ) +* + IF( SMAXPR*RCOND.LE.SMINPR ) THEN + DO 20 I = 1, RANK + WORK( ISMIN+I-1 ) = S1*WORK( ISMIN+I-1 ) + WORK( ISMAX+I-1 ) = S2*WORK( ISMAX+I-1 ) + 20 CONTINUE + WORK( ISMIN+RANK ) = C1 + WORK( ISMAX+RANK ) = C2 + SMIN = SMINPR + SMAX = SMAXPR + RANK = RANK + 1 + GO TO 10 + END IF + END IF +* +* workspace: 3*MN. +* +* Logically partition R = [ R11 R12 ] +* [ 0 R22 ] +* where R11 = R(1:RANK,1:RANK) +* +* [R11,R12] = [ T11, 0 ] * Y +* + IF( RANK.LT.N ) + $ CALL STZRZF( RANK, N, A, LDA, WORK( MN+1 ), WORK( 2*MN+1 ), + $ LWORK-2*MN, INFO ) +* +* workspace: 2*MN. +* Details of Householder rotations stored in WORK(MN+1:2*MN) +* +* B(1:M,1:NRHS) := Q' * B(1:M,1:NRHS) +* + CALL SORMQR( 'Left', 'Transpose', M, NRHS, MN, A, LDA, WORK( 1 ), + $ B, LDB, WORK( 2*MN+1 ), LWORK-2*MN, INFO ) + WSIZE = MAX( WSIZE, 2*MN+WORK( 2*MN+1 ) ) +* +* workspace: 2*MN+NB*NRHS. +* +* B(1:RANK,1:NRHS) := inv(T11) * B(1:RANK,1:NRHS) +* + CALL STRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', RANK, + $ NRHS, ONE, A, LDA, B, LDB ) +* + DO 40 J = 1, NRHS + DO 30 I = RANK + 1, N + B( I, J ) = ZERO + 30 CONTINUE + 40 CONTINUE +* +* B(1:N,1:NRHS) := Y' * B(1:N,1:NRHS) +* + IF( RANK.LT.N ) THEN + CALL SORMRZ( 'Left', 'Transpose', N, NRHS, RANK, N-RANK, A, + $ LDA, WORK( MN+1 ), B, LDB, WORK( 2*MN+1 ), + $ LWORK-2*MN, INFO ) + END IF +* +* workspace: 2*MN+NRHS. +* +* B(1:N,1:NRHS) := P * B(1:N,1:NRHS) +* + DO 60 J = 1, NRHS + DO 50 I = 1, N + WORK( JPVT( I ) ) = B( I, J ) + 50 CONTINUE + CALL SCOPY( N, WORK( 1 ), 1, B( 1, J ), 1 ) + 60 CONTINUE +* +* workspace: N. +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO ) + CALL SLASCL( 'U', 0, 0, SMLNUM, ANRM, RANK, RANK, A, LDA, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO ) + CALL SLASCL( 'U', 0, 0, BIGNUM, ANRM, RANK, RANK, A, LDA, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL SLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL SLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO ) + END IF +* + 70 CONTINUE + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of SGELSY +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sgeqp3.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sgeqp3.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,284 @@ + SUBROUTINE SGEQP3( M, N, A, LDA, JPVT, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SGEQP3 computes a QR factorization with column pivoting of a +* matrix A: A*P = Q*R using Level 3 BLAS. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, the upper triangle of the array contains the +* min(M,N)-by-N upper trapezoidal matrix R; the elements below +* the diagonal, together with the array TAU, represent the +* orthogonal matrix Q as a product of min(M,N) elementary +* reflectors. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* JPVT (input/output) INTEGER array, dimension (N) +* On entry, if JPVT(J).ne.0, the J-th column of A is permuted +* to the front of A*P (a leading column); if JPVT(J)=0, +* the J-th column of A is a free column. +* On exit, if JPVT(J)=K, then the J-th column of A*P was the +* the K-th column of A. +* +* TAU (output) REAL array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors. +* +* WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) +* On exit, if INFO=0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= 3*N+1. +* For optimal performance LWORK >= 2*N+( N+1 )*NB, where NB +* is the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(1) H(2) . . . H(k), where k = min(m,n). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real/complex scalar, and v is a real/complex vector +* with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in +* A(i+1:m,i), and tau in TAU(i). +* +* Based on contributions by +* G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain +* X. Sun, Computer Science Dept., Duke University, USA +* +* ===================================================================== +* +* .. Parameters .. + INTEGER INB, INBMIN, IXOVER + PARAMETER ( INB = 1, INBMIN = 2, IXOVER = 3 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER FJB, IWS, J, JB, LWKOPT, MINMN, MINWS, NA, NB, + $ NBMIN, NFXD, NX, SM, SMINMN, SN, TOPBMN +* .. +* .. External Subroutines .. + EXTERNAL SGEQRF, SLAQP2, SLAQPS, SORMQR, SSWAP, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + REAL SNRM2 + EXTERNAL ILAENV, SNRM2 +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF +* + IF( INFO.EQ.0 ) THEN + MINMN = MIN( M, N ) + IF( MINMN.EQ.0 ) THEN + IWS = 1 + LWKOPT = 1 + ELSE + IWS = 3*N + 1 + NB = ILAENV( INB, 'SGEQRF', ' ', M, N, -1, -1 ) + LWKOPT = 2*N + ( N + 1 )*NB + END IF + WORK( 1 ) = LWKOPT +* + IF( ( LWORK.LT.IWS ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEQP3', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible. +* + IF( MINMN.EQ.0 ) THEN + RETURN + END IF +* +* Move initial columns up front. +* + NFXD = 1 + DO 10 J = 1, N + IF( JPVT( J ).NE.0 ) THEN + IF( J.NE.NFXD ) THEN + CALL SSWAP( M, A( 1, J ), 1, A( 1, NFXD ), 1 ) + JPVT( J ) = JPVT( NFXD ) + JPVT( NFXD ) = J + ELSE + JPVT( J ) = J + END IF + NFXD = NFXD + 1 + ELSE + JPVT( J ) = J + END IF + 10 CONTINUE + NFXD = NFXD - 1 +* +* Factorize fixed columns +* ======================= +* +* Compute the QR factorization of fixed columns and update +* remaining columns. +* + IF( NFXD.GT.0 ) THEN + NA = MIN( M, NFXD ) +*CC CALL SGEQR2( M, NA, A, LDA, TAU, WORK, INFO ) + CALL SGEQRF( M, NA, A, LDA, TAU, WORK, LWORK, INFO ) + IWS = MAX( IWS, INT( WORK( 1 ) ) ) + IF( NA.LT.N ) THEN +*CC CALL SORM2R( 'Left', 'Transpose', M, N-NA, NA, A, LDA, +*CC $ TAU, A( 1, NA+1 ), LDA, WORK, INFO ) + CALL SORMQR( 'Left', 'Transpose', M, N-NA, NA, A, LDA, TAU, + $ A( 1, NA+1 ), LDA, WORK, LWORK, INFO ) + IWS = MAX( IWS, INT( WORK( 1 ) ) ) + END IF + END IF +* +* Factorize free columns +* ====================== +* + IF( NFXD.LT.MINMN ) THEN +* + SM = M - NFXD + SN = N - NFXD + SMINMN = MINMN - NFXD +* +* Determine the block size. +* + NB = ILAENV( INB, 'SGEQRF', ' ', SM, SN, -1, -1 ) + NBMIN = 2 + NX = 0 +* + IF( ( NB.GT.1 ) .AND. ( NB.LT.SMINMN ) ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( IXOVER, 'SGEQRF', ' ', SM, SN, -1, + $ -1 ) ) +* +* + IF( NX.LT.SMINMN ) THEN +* +* Determine if workspace is large enough for blocked code. +* + MINWS = 2*SN + ( SN+1 )*NB + IWS = MAX( IWS, MINWS ) + IF( LWORK.LT.MINWS ) THEN +* +* Not enough workspace to use optimal NB: Reduce NB and +* determine the minimum value of NB. +* + NB = ( LWORK-2*SN ) / ( SN+1 ) + NBMIN = MAX( 2, ILAENV( INBMIN, 'SGEQRF', ' ', SM, SN, + $ -1, -1 ) ) +* +* + END IF + END IF + END IF +* +* Initialize partial column norms. The first N elements of work +* store the exact column norms. +* + DO 20 J = NFXD + 1, N + WORK( J ) = SNRM2( SM, A( NFXD+1, J ), 1 ) + WORK( N+J ) = WORK( J ) + 20 CONTINUE +* + IF( ( NB.GE.NBMIN ) .AND. ( NB.LT.SMINMN ) .AND. + $ ( NX.LT.SMINMN ) ) THEN +* +* Use blocked code initially. +* + J = NFXD + 1 +* +* Compute factorization: while loop. +* +* + TOPBMN = MINMN - NX + 30 CONTINUE + IF( J.LE.TOPBMN ) THEN + JB = MIN( NB, TOPBMN-J+1 ) +* +* Factorize JB columns among columns J:N. +* + CALL SLAQPS( M, N-J+1, J-1, JB, FJB, A( 1, J ), LDA, + $ JPVT( J ), TAU( J ), WORK( J ), WORK( N+J ), + $ WORK( 2*N+1 ), WORK( 2*N+JB+1 ), N-J+1 ) +* + J = J + FJB + GO TO 30 + END IF + ELSE + J = NFXD + 1 + END IF +* +* Use unblocked code to factor the last or only block. +* +* + IF( J.LE.MINMN ) + $ CALL SLAQP2( M, N-J+1, J-1, A( 1, J ), LDA, JPVT( J ), + $ TAU( J ), WORK( J ), WORK( N+J ), + $ WORK( 2*N+1 ) ) +* + END IF +* + WORK( 1 ) = IWS + RETURN +* +* End of SGEQP3 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sgeqpf.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sgeqpf.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,231 @@ + SUBROUTINE SGEQPF( M, N, A, LDA, JPVT, TAU, WORK, INFO ) +* +* -- LAPACK deprecated driver routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* This routine is deprecated and has been replaced by routine SGEQP3. +* +* SGEQPF computes a QR factorization with column pivoting of a +* real M-by-N matrix A: A*P = Q*R. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0 +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, the upper triangle of the array contains the +* min(M,N)-by-N upper triangular matrix R; the elements +* below the diagonal, together with the array TAU, +* represent the orthogonal matrix Q as a product of +* min(m,n) elementary reflectors. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* JPVT (input/output) INTEGER array, dimension (N) +* On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted +* to the front of A*P (a leading column); if JPVT(i) = 0, +* the i-th column of A is a free column. +* On exit, if JPVT(i) = k, then the i-th column of A*P +* was the k-th column of A. +* +* TAU (output) REAL array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors. +* +* WORK (workspace) REAL array, dimension (3*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(1) H(2) . . . H(n) +* +* Each H(i) has the form +* +* H = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i). +* +* The matrix P is represented in jpvt as follows: If +* jpvt(j) = i +* then the jth column of P is the ith canonical unit vector. +* +* Partial column norm updating strategy modified by +* Z. Drmac and Z. Bujanovic, Dept. of Mathematics, +* University of Zagreb, Croatia. +* June 2006. +* For more details see LAPACK Working Note 176. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, ITEMP, J, MA, MN, PVT + REAL AII, TEMP, TEMP2, TOL3Z +* .. +* .. External Subroutines .. + EXTERNAL SGEQR2, SLARF, SLARFG, SORM2R, SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. External Functions .. + INTEGER ISAMAX + REAL SLAMCH, SNRM2 + EXTERNAL ISAMAX, SLAMCH, SNRM2 +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEQPF', -INFO ) + RETURN + END IF +* + MN = MIN( M, N ) + TOL3Z = SQRT(SLAMCH('Epsilon')) +* +* Move initial columns up front +* + ITEMP = 1 + DO 10 I = 1, N + IF( JPVT( I ).NE.0 ) THEN + IF( I.NE.ITEMP ) THEN + CALL SSWAP( M, A( 1, I ), 1, A( 1, ITEMP ), 1 ) + JPVT( I ) = JPVT( ITEMP ) + JPVT( ITEMP ) = I + ELSE + JPVT( I ) = I + END IF + ITEMP = ITEMP + 1 + ELSE + JPVT( I ) = I + END IF + 10 CONTINUE + ITEMP = ITEMP - 1 +* +* Compute the QR factorization and update remaining columns +* + IF( ITEMP.GT.0 ) THEN + MA = MIN( ITEMP, M ) + CALL SGEQR2( M, MA, A, LDA, TAU, WORK, INFO ) + IF( MA.LT.N ) THEN + CALL SORM2R( 'Left', 'Transpose', M, N-MA, MA, A, LDA, TAU, + $ A( 1, MA+1 ), LDA, WORK, INFO ) + END IF + END IF +* + IF( ITEMP.LT.MN ) THEN +* +* Initialize partial column norms. The first n elements of +* work store the exact column norms. +* + DO 20 I = ITEMP + 1, N + WORK( I ) = SNRM2( M-ITEMP, A( ITEMP+1, I ), 1 ) + WORK( N+I ) = WORK( I ) + 20 CONTINUE +* +* Compute factorization +* + DO 40 I = ITEMP + 1, MN +* +* Determine ith pivot column and swap if necessary +* + PVT = ( I-1 ) + ISAMAX( N-I+1, WORK( I ), 1 ) +* + IF( PVT.NE.I ) THEN + CALL SSWAP( M, A( 1, PVT ), 1, A( 1, I ), 1 ) + ITEMP = JPVT( PVT ) + JPVT( PVT ) = JPVT( I ) + JPVT( I ) = ITEMP + WORK( PVT ) = WORK( I ) + WORK( N+PVT ) = WORK( N+I ) + END IF +* +* Generate elementary reflector H(i) +* + IF( I.LT.M ) THEN + CALL SLARFG( M-I+1, A( I, I ), A( I+1, I ), 1, TAU( I ) ) + ELSE + CALL SLARFG( 1, A( M, M ), A( M, M ), 1, TAU( M ) ) + END IF +* + IF( I.LT.N ) THEN +* +* Apply H(i) to A(i:m,i+1:n) from the left +* + AII = A( I, I ) + A( I, I ) = ONE + CALL SLARF( 'LEFT', M-I+1, N-I, A( I, I ), 1, TAU( I ), + $ A( I, I+1 ), LDA, WORK( 2*N+1 ) ) + A( I, I ) = AII + END IF +* +* Update partial column norms +* + DO 30 J = I + 1, N + IF( WORK( J ).NE.ZERO ) THEN +* +* NOTE: The following 4 lines follow from the analysis in +* Lapack Working Note 176. +* + TEMP = ABS( A( I, J ) ) / WORK( J ) + TEMP = MAX( ZERO, ( ONE+TEMP )*( ONE-TEMP ) ) + TEMP2 = TEMP*( WORK( J ) / WORK( N+J ) )**2 + IF( TEMP2 .LE. TOL3Z ) THEN + IF( M-I.GT.0 ) THEN + WORK( J ) = SNRM2( M-I, A( I+1, J ), 1 ) + WORK( N+J ) = WORK( J ) + ELSE + WORK( J ) = ZERO + WORK( N+J ) = ZERO + END IF + ELSE + WORK( J ) = WORK( J )*SQRT( TEMP ) + END IF + END IF + 30 CONTINUE +* + 40 CONTINUE + END IF + RETURN +* +* End of SGEQPF +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sgeqr2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sgeqr2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,121 @@ + SUBROUTINE SGEQR2( M, N, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SGEQR2 computes a QR factorization of a real m by n matrix A: +* A = Q * R. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the m by n matrix A. +* On exit, the elements on and above the diagonal of the array +* contain the min(m,n) by n upper trapezoidal matrix R (R is +* upper triangular if m >= n); the elements below the diagonal, +* with the array TAU, represent the orthogonal matrix Q as a +* product of elementary reflectors (see Further Details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) REAL array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(1) H(2) . . . H(k), where k = min(m,n). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), +* and tau in TAU(i). +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, K + REAL AII +* .. +* .. External Subroutines .. + EXTERNAL SLARF, SLARFG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEQR2', -INFO ) + RETURN + END IF +* + K = MIN( M, N ) +* + DO 10 I = 1, K +* +* Generate elementary reflector H(i) to annihilate A(i+1:m,i) +* + CALL SLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1, + $ TAU( I ) ) + IF( I.LT.N ) THEN +* +* Apply H(i) to A(i:m,i+1:n) from the left +* + AII = A( I, I ) + A( I, I ) = ONE + CALL SLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAU( I ), + $ A( I, I+1 ), LDA, WORK ) + A( I, I ) = AII + END IF + 10 CONTINUE + RETURN +* +* End of SGEQR2 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sgeqrf.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sgeqrf.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,196 @@ + SUBROUTINE SGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SGEQRF computes a QR factorization of a real M-by-N matrix A: +* A = Q * R. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, the elements on and above the diagonal of the array +* contain the min(M,N)-by-N upper trapezoidal matrix R (R is +* upper triangular if m >= n); the elements below the diagonal, +* with the array TAU, represent the orthogonal matrix Q as a +* product of min(m,n) elementary reflectors (see Further +* Details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) REAL array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N). +* For optimum performance LWORK >= N*NB, where NB is +* the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(1) H(2) . . . H(k), where k = min(m,n). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), +* and tau in TAU(i). +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, K, LDWORK, LWKOPT, NB, + $ NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL SGEQR2, SLARFB, SLARFT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEQRF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + K = MIN( M, N ) + IF( K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = N + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'SGEQRF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'SGEQRF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code initially +* + DO 10 I = 1, K - NX, NB + IB = MIN( K-I+1, NB ) +* +* Compute the QR factorization of the current block +* A(i:m,i:i+ib-1) +* + CALL SGEQR2( M-I+1, IB, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) + IF( I+IB.LE.N ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL SLARFT( 'Forward', 'Columnwise', M-I+1, IB, + $ A( I, I ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H' to A(i:m,i+ib:n) from the left +* + CALL SLARFB( 'Left', 'Transpose', 'Forward', + $ 'Columnwise', M-I+1, N-I-IB+1, IB, + $ A( I, I ), LDA, WORK, LDWORK, A( I, I+IB ), + $ LDA, WORK( IB+1 ), LDWORK ) + END IF + 10 CONTINUE + ELSE + I = 1 + END IF +* +* Use unblocked code to factor the last or only block. +* + IF( I.LE.K ) + $ CALL SGEQR2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) +* + WORK( 1 ) = IWS + RETURN +* +* End of SGEQRF +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sgesv.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sgesv.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,107 @@ + SUBROUTINE SGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* SGESV computes the solution to a real system of linear equations +* A * X = B, +* where A is an N-by-N matrix and X and B are N-by-NRHS matrices. +* +* The LU decomposition with partial pivoting and row interchanges is +* used to factor A as +* A = P * L * U, +* where P is a permutation matrix, L is unit lower triangular, and U is +* upper triangular. The factored form of A is then used to solve the +* system of equations A * X = B. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the N-by-N coefficient matrix A. +* On exit, the factors L and U from the factorization +* A = P*L*U; the unit diagonal elements of L are not stored. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (output) INTEGER array, dimension (N) +* The pivot indices that define the permutation matrix P; +* row i of the matrix was interchanged with row IPIV(i). +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS matrix of right hand side matrix B. +* On exit, if INFO = 0, the N-by-NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, U(i,i) is exactly zero. The factorization +* has been completed, but the factor U is exactly +* singular, so the solution could not be computed. +* +* ===================================================================== +* +* .. External Subroutines .. + EXTERNAL SGETRF, SGETRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGESV ', -INFO ) + RETURN + END IF +* +* Compute the LU factorization of A. +* + CALL SGETRF( N, N, A, LDA, IPIV, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL SGETRS( 'No transpose', N, NRHS, A, LDA, IPIV, B, LDB, + $ INFO ) + END IF + RETURN +* +* End of SGESV +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sgesvd.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sgesvd.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,3402 @@ + SUBROUTINE SGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, + $ WORK, LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER JOBU, JOBVT + INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), S( * ), U( LDU, * ), + $ VT( LDVT, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SGESVD computes the singular value decomposition (SVD) of a real +* M-by-N matrix A, optionally computing the left and/or right singular +* vectors. The SVD is written +* +* A = U * SIGMA * transpose(V) +* +* where SIGMA is an M-by-N matrix which is zero except for its +* min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and +* V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA +* are the singular values of A; they are real and non-negative, and +* are returned in descending order. The first min(m,n) columns of +* U and V are the left and right singular vectors of A. +* +* Note that the routine returns V**T, not V. +* +* Arguments +* ========= +* +* JOBU (input) CHARACTER*1 +* Specifies options for computing all or part of the matrix U: +* = 'A': all M columns of U are returned in array U: +* = 'S': the first min(m,n) columns of U (the left singular +* vectors) are returned in the array U; +* = 'O': the first min(m,n) columns of U (the left singular +* vectors) are overwritten on the array A; +* = 'N': no columns of U (no left singular vectors) are +* computed. +* +* JOBVT (input) CHARACTER*1 +* Specifies options for computing all or part of the matrix +* V**T: +* = 'A': all N rows of V**T are returned in the array VT; +* = 'S': the first min(m,n) rows of V**T (the right singular +* vectors) are returned in the array VT; +* = 'O': the first min(m,n) rows of V**T (the right singular +* vectors) are overwritten on the array A; +* = 'N': no rows of V**T (no right singular vectors) are +* computed. +* +* JOBVT and JOBU cannot both be 'O'. +* +* M (input) INTEGER +* The number of rows of the input matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the input matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, +* if JOBU = 'O', A is overwritten with the first min(m,n) +* columns of U (the left singular vectors, +* stored columnwise); +* if JOBVT = 'O', A is overwritten with the first min(m,n) +* rows of V**T (the right singular vectors, +* stored rowwise); +* if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A +* are destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* S (output) REAL array, dimension (min(M,N)) +* The singular values of A, sorted so that S(i) >= S(i+1). +* +* U (output) REAL array, dimension (LDU,UCOL) +* (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'. +* If JOBU = 'A', U contains the M-by-M orthogonal matrix U; +* if JOBU = 'S', U contains the first min(m,n) columns of U +* (the left singular vectors, stored columnwise); +* if JOBU = 'N' or 'O', U is not referenced. +* +* LDU (input) INTEGER +* The leading dimension of the array U. LDU >= 1; if +* JOBU = 'S' or 'A', LDU >= M. +* +* VT (output) REAL array, dimension (LDVT,N) +* If JOBVT = 'A', VT contains the N-by-N orthogonal matrix +* V**T; +* if JOBVT = 'S', VT contains the first min(m,n) rows of +* V**T (the right singular vectors, stored rowwise); +* if JOBVT = 'N' or 'O', VT is not referenced. +* +* LDVT (input) INTEGER +* The leading dimension of the array VT. LDVT >= 1; if +* JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N). +* +* WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK; +* if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged +* superdiagonal elements of an upper bidiagonal matrix B +* whose diagonal is in S (not necessarily sorted). B +* satisfies A = U * B * VT, so it has the same singular values +* as A, and singular vectors related by U and VT. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* LWORK >= MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)). +* For good performance, LWORK should generally be larger. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if SBDSQR did not converge, INFO specifies how many +* superdiagonals of an intermediate bidiagonal form B +* did not converge to zero. See the description of WORK +* above for details. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS, + $ WNTVA, WNTVAS, WNTVN, WNTVO, WNTVS + INTEGER BDSPAC, BLK, CHUNK, I, IE, IERR, IR, ISCL, + $ ITAU, ITAUP, ITAUQ, IU, IWORK, LDWRKR, LDWRKU, + $ MAXWRK, MINMN, MINWRK, MNTHR, NCU, NCVT, NRU, + $ NRVT, WRKBL + REAL ANRM, BIGNUM, EPS, SMLNUM +* .. +* .. Local Arrays .. + REAL DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL SBDSQR, SGEBRD, SGELQF, SGEMM, SGEQRF, SLACPY, + $ SLASCL, SLASET, SORGBR, SORGLQ, SORGQR, SORMBR, + $ XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANGE + EXTERNAL LSAME, ILAENV, SLAMCH, SLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + MINMN = MIN( M, N ) + WNTUA = LSAME( JOBU, 'A' ) + WNTUS = LSAME( JOBU, 'S' ) + WNTUAS = WNTUA .OR. WNTUS + WNTUO = LSAME( JOBU, 'O' ) + WNTUN = LSAME( JOBU, 'N' ) + WNTVA = LSAME( JOBVT, 'A' ) + WNTVS = LSAME( JOBVT, 'S' ) + WNTVAS = WNTVA .OR. WNTVS + WNTVO = LSAME( JOBVT, 'O' ) + WNTVN = LSAME( JOBVT, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* + IF( .NOT.( WNTUA .OR. WNTUS .OR. WNTUO .OR. WNTUN ) ) THEN + INFO = -1 + ELSE IF( .NOT.( WNTVA .OR. WNTVS .OR. WNTVO .OR. WNTVN ) .OR. + $ ( WNTVO .AND. WNTUO ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDU.LT.1 .OR. ( WNTUAS .AND. LDU.LT.M ) ) THEN + INFO = -9 + ELSE IF( LDVT.LT.1 .OR. ( WNTVA .AND. LDVT.LT.N ) .OR. + $ ( WNTVS .AND. LDVT.LT.MINMN ) ) THEN + INFO = -11 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV.) +* + IF( INFO.EQ.0 ) THEN + MINWRK = 1 + MAXWRK = 1 + IF( M.GE.N .AND. MINMN.GT.0 ) THEN +* +* Compute space needed for SBDSQR +* + MNTHR = ILAENV( 6, 'SGESVD', JOBU // JOBVT, M, N, 0, 0 ) + BDSPAC = 5*N + IF( M.GE.MNTHR ) THEN + IF( WNTUN ) THEN +* +* Path 1 (M much larger than N, JOBU='N') +* + MAXWRK = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, + $ -1 ) + MAXWRK = MAX( MAXWRK, 3*N+2*N* + $ ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) ) + IF( WNTVO .OR. WNTVAS ) + $ MAXWRK = MAX( MAXWRK, 3*N+( N-1 )* + $ ILAENV( 1, 'SORGBR', 'P', N, N, N, -1 ) ) + MAXWRK = MAX( MAXWRK, BDSPAC ) + MINWRK = MAX( 4*N, BDSPAC ) + ELSE IF( WNTUO .AND. WNTVN ) THEN +* +* Path 2 (M much larger than N, JOBU='O', JOBVT='N') +* + WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'SORGQR', ' ', M, + $ N, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+2*N* + $ ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+N* + $ ILAENV( 1, 'SORGBR', 'Q', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = MAX( N*N+WRKBL, N*N+M*N+N ) + MINWRK = MAX( 3*N+M, BDSPAC ) + ELSE IF( WNTUO .AND. WNTVAS ) THEN +* +* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or +* 'A') +* + WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'SORGQR', ' ', M, + $ N, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+2*N* + $ ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+N* + $ ILAENV( 1, 'SORGBR', 'Q', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+( N-1 )* + $ ILAENV( 1, 'SORGBR', 'P', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = MAX( N*N+WRKBL, N*N+M*N+N ) + MINWRK = MAX( 3*N+M, BDSPAC ) + ELSE IF( WNTUS .AND. WNTVN ) THEN +* +* Path 4 (M much larger than N, JOBU='S', JOBVT='N') +* + WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'SORGQR', ' ', M, + $ N, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+2*N* + $ ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+N* + $ ILAENV( 1, 'SORGBR', 'Q', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = N*N + WRKBL + MINWRK = MAX( 3*N+M, BDSPAC ) + ELSE IF( WNTUS .AND. WNTVO ) THEN +* +* Path 5 (M much larger than N, JOBU='S', JOBVT='O') +* + WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'SORGQR', ' ', M, + $ N, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+2*N* + $ ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+N* + $ ILAENV( 1, 'SORGBR', 'Q', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+( N-1 )* + $ ILAENV( 1, 'SORGBR', 'P', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = 2*N*N + WRKBL + MINWRK = MAX( 3*N+M, BDSPAC ) + ELSE IF( WNTUS .AND. WNTVAS ) THEN +* +* Path 6 (M much larger than N, JOBU='S', JOBVT='S' or +* 'A') +* + WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'SORGQR', ' ', M, + $ N, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+2*N* + $ ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+N* + $ ILAENV( 1, 'SORGBR', 'Q', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+( N-1 )* + $ ILAENV( 1, 'SORGBR', 'P', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = N*N + WRKBL + MINWRK = MAX( 3*N+M, BDSPAC ) + ELSE IF( WNTUA .AND. WNTVN ) THEN +* +* Path 7 (M much larger than N, JOBU='A', JOBVT='N') +* + WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'SORGQR', ' ', M, + $ M, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+2*N* + $ ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+N* + $ ILAENV( 1, 'SORGBR', 'Q', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = N*N + WRKBL + MINWRK = MAX( 3*N+M, BDSPAC ) + ELSE IF( WNTUA .AND. WNTVO ) THEN +* +* Path 8 (M much larger than N, JOBU='A', JOBVT='O') +* + WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'SORGQR', ' ', M, + $ M, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+2*N* + $ ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+N* + $ ILAENV( 1, 'SORGBR', 'Q', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+( N-1 )* + $ ILAENV( 1, 'SORGBR', 'P', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = 2*N*N + WRKBL + MINWRK = MAX( 3*N+M, BDSPAC ) + ELSE IF( WNTUA .AND. WNTVAS ) THEN +* +* Path 9 (M much larger than N, JOBU='A', JOBVT='S' or +* 'A') +* + WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'SORGQR', ' ', M, + $ M, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+2*N* + $ ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+N* + $ ILAENV( 1, 'SORGBR', 'Q', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+( N-1 )* + $ ILAENV( 1, 'SORGBR', 'P', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = N*N + WRKBL + MINWRK = MAX( 3*N+M, BDSPAC ) + END IF + ELSE +* +* Path 10 (M at least N, but not much larger) +* + MAXWRK = 3*N + ( M+N )*ILAENV( 1, 'SGEBRD', ' ', M, N, + $ -1, -1 ) + IF( WNTUS .OR. WNTUO ) + $ MAXWRK = MAX( MAXWRK, 3*N+N* + $ ILAENV( 1, 'SORGBR', 'Q', M, N, N, -1 ) ) + IF( WNTUA ) + $ MAXWRK = MAX( MAXWRK, 3*N+M* + $ ILAENV( 1, 'SORGBR', 'Q', M, M, N, -1 ) ) + IF( .NOT.WNTVN ) + $ MAXWRK = MAX( MAXWRK, 3*N+( N-1 )* + $ ILAENV( 1, 'SORGBR', 'P', N, N, N, -1 ) ) + MAXWRK = MAX( MAXWRK, BDSPAC ) + MINWRK = MAX( 3*N+M, BDSPAC ) + END IF + ELSE IF( MINMN.GT.0 ) THEN +* +* Compute space needed for SBDSQR +* + MNTHR = ILAENV( 6, 'SGESVD', JOBU // JOBVT, M, N, 0, 0 ) + BDSPAC = 5*M + IF( N.GE.MNTHR ) THEN + IF( WNTVN ) THEN +* +* Path 1t(N much larger than M, JOBVT='N') +* + MAXWRK = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, + $ -1 ) + MAXWRK = MAX( MAXWRK, 3*M+2*M* + $ ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) ) + IF( WNTUO .OR. WNTUAS ) + $ MAXWRK = MAX( MAXWRK, 3*M+M* + $ ILAENV( 1, 'SORGBR', 'Q', M, M, M, -1 ) ) + MAXWRK = MAX( MAXWRK, BDSPAC ) + MINWRK = MAX( 4*M, BDSPAC ) + ELSE IF( WNTVO .AND. WNTUN ) THEN +* +* Path 2t(N much larger than M, JOBU='N', JOBVT='O') +* + WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'SORGLQ', ' ', M, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+2*M* + $ ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+( M-1 )* + $ ILAENV( 1, 'SORGBR', 'P', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = MAX( M*M+WRKBL, M*M+M*N+M ) + MINWRK = MAX( 3*M+N, BDSPAC ) + ELSE IF( WNTVO .AND. WNTUAS ) THEN +* +* Path 3t(N much larger than M, JOBU='S' or 'A', +* JOBVT='O') +* + WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'SORGLQ', ' ', M, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+2*M* + $ ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+( M-1 )* + $ ILAENV( 1, 'SORGBR', 'P', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+M* + $ ILAENV( 1, 'SORGBR', 'Q', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = MAX( M*M+WRKBL, M*M+M*N+M ) + MINWRK = MAX( 3*M+N, BDSPAC ) + ELSE IF( WNTVS .AND. WNTUN ) THEN +* +* Path 4t(N much larger than M, JOBU='N', JOBVT='S') +* + WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'SORGLQ', ' ', M, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+2*M* + $ ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+( M-1 )* + $ ILAENV( 1, 'SORGBR', 'P', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = M*M + WRKBL + MINWRK = MAX( 3*M+N, BDSPAC ) + ELSE IF( WNTVS .AND. WNTUO ) THEN +* +* Path 5t(N much larger than M, JOBU='O', JOBVT='S') +* + WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'SORGLQ', ' ', M, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+2*M* + $ ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+( M-1 )* + $ ILAENV( 1, 'SORGBR', 'P', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+M* + $ ILAENV( 1, 'SORGBR', 'Q', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = 2*M*M + WRKBL + MINWRK = MAX( 3*M+N, BDSPAC ) + MAXWRK = MAX( MAXWRK, MINWRK ) + ELSE IF( WNTVS .AND. WNTUAS ) THEN +* +* Path 6t(N much larger than M, JOBU='S' or 'A', +* JOBVT='S') +* + WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'SORGLQ', ' ', M, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+2*M* + $ ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+( M-1 )* + $ ILAENV( 1, 'SORGBR', 'P', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+M* + $ ILAENV( 1, 'SORGBR', 'Q', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = M*M + WRKBL + MINWRK = MAX( 3*M+N, BDSPAC ) + ELSE IF( WNTVA .AND. WNTUN ) THEN +* +* Path 7t(N much larger than M, JOBU='N', JOBVT='A') +* + WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'SORGLQ', ' ', N, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+2*M* + $ ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+( M-1 )* + $ ILAENV( 1, 'SORGBR', 'P', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = M*M + WRKBL + MINWRK = MAX( 3*M+N, BDSPAC ) + ELSE IF( WNTVA .AND. WNTUO ) THEN +* +* Path 8t(N much larger than M, JOBU='O', JOBVT='A') +* + WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'SORGLQ', ' ', N, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+2*M* + $ ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+( M-1 )* + $ ILAENV( 1, 'SORGBR', 'P', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+M* + $ ILAENV( 1, 'SORGBR', 'Q', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = 2*M*M + WRKBL + MINWRK = MAX( 3*M+N, BDSPAC ) + ELSE IF( WNTVA .AND. WNTUAS ) THEN +* +* Path 9t(N much larger than M, JOBU='S' or 'A', +* JOBVT='A') +* + WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'SORGLQ', ' ', N, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+2*M* + $ ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+( M-1 )* + $ ILAENV( 1, 'SORGBR', 'P', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+M* + $ ILAENV( 1, 'SORGBR', 'Q', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = M*M + WRKBL + MINWRK = MAX( 3*M+N, BDSPAC ) + END IF + ELSE +* +* Path 10t(N greater than M, but not much larger) +* + MAXWRK = 3*M + ( M+N )*ILAENV( 1, 'SGEBRD', ' ', M, N, + $ -1, -1 ) + IF( WNTVS .OR. WNTVO ) + $ MAXWRK = MAX( MAXWRK, 3*M+M* + $ ILAENV( 1, 'SORGBR', 'P', M, N, M, -1 ) ) + IF( WNTVA ) + $ MAXWRK = MAX( MAXWRK, 3*M+N* + $ ILAENV( 1, 'SORGBR', 'P', N, N, M, -1 ) ) + IF( .NOT.WNTUN ) + $ MAXWRK = MAX( MAXWRK, 3*M+( M-1 )* + $ ILAENV( 1, 'SORGBR', 'Q', M, M, M, -1 ) ) + MAXWRK = MAX( MAXWRK, BDSPAC ) + MINWRK = MAX( 3*M+N, BDSPAC ) + END IF + END IF + MAXWRK = MAX( MAXWRK, MINWRK ) + WORK( 1 ) = MAXWRK +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGESVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + RETURN + END IF +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SQRT( SLAMCH( 'S' ) ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = SLANGE( 'M', M, N, A, LDA, DUM ) + ISCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ISCL = 1 + CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR ) + ELSE IF( ANRM.GT.BIGNUM ) THEN + ISCL = 1 + CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR ) + END IF +* + IF( M.GE.N ) THEN +* +* A has at least as many rows as columns. If A has sufficiently +* more rows than columns, first reduce using the QR +* decomposition (if sufficient workspace available) +* + IF( M.GE.MNTHR ) THEN +* + IF( WNTUN ) THEN +* +* Path 1 (M much larger than N, JOBU='N') +* No left singular vectors to be computed +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Zero out below R +* + CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA ) + IE = 1 + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in A +* (Workspace: need 4*N, prefer 3*N+2*N*NB) +* + CALL SGEBRD( N, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ IERR ) + NCVT = 0 + IF( WNTVO .OR. WNTVAS ) THEN +* +* If right singular vectors desired, generate P'. +* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) +* + CALL SORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + NCVT = N + END IF + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of A in A if desired +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', N, NCVT, 0, 0, S, WORK( IE ), A, LDA, + $ DUM, 1, DUM, 1, WORK( IWORK ), INFO ) +* +* If right singular vectors desired in VT, copy them there +* + IF( WNTVAS ) + $ CALL SLACPY( 'F', N, N, A, LDA, VT, LDVT ) +* + ELSE IF( WNTUO .AND. WNTVN ) THEN +* +* Path 2 (M much larger than N, JOBU='O', JOBVT='N') +* N left singular vectors to be overwritten on A and +* no right singular vectors to be computed +* + IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+LDA*N ) THEN +* +* WORK(IU) is LDA by N, WORK(IR) is LDA by N +* + LDWRKU = LDA + LDWRKR = LDA + ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+N*N ) THEN +* +* WORK(IU) is LDA by N, WORK(IR) is N by N +* + LDWRKU = LDA + LDWRKR = N + ELSE +* +* WORK(IU) is LDWRKU by N, WORK(IR) is N by N +* + LDWRKU = ( LWORK-N*N-N ) / N + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IR) and zero out below it +* + CALL SLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR ) + CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, WORK( IR+1 ), + $ LDWRKR ) +* +* Generate Q in A +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IR) +* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) +* + CALL SGEBRD( N, N, WORK( IR ), LDWRKR, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing R +* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) +* + CALL SORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IR) +* (Workspace: need N*N+BDSPAC) +* + CALL SBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM, 1, + $ WORK( IR ), LDWRKR, DUM, 1, + $ WORK( IWORK ), INFO ) + IU = IE + N +* +* Multiply Q in A by left singular vectors of R in +* WORK(IR), storing result in WORK(IU) and copying to A +* (Workspace: need N*N+2*N, prefer N*N+M*N+N) +* + DO 10 I = 1, M, LDWRKU + CHUNK = MIN( M-I+1, LDWRKU ) + CALL SGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ), + $ LDA, WORK( IR ), LDWRKR, ZERO, + $ WORK( IU ), LDWRKU ) + CALL SLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU, + $ A( I, 1 ), LDA ) + 10 CONTINUE +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + IE = 1 + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize A +* (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB) +* + CALL SGEBRD( M, N, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing A +* (Workspace: need 4*N, prefer 3*N+N*NB) +* + CALL SORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in A +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM, 1, + $ A, LDA, DUM, 1, WORK( IWORK ), INFO ) +* + END IF +* + ELSE IF( WNTUO .AND. WNTVAS ) THEN +* +* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A') +* N left singular vectors to be overwritten on A and +* N right singular vectors to be computed in VT +* + IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+LDA*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by N +* + LDWRKU = LDA + LDWRKR = LDA + ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+N*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is N by N +* + LDWRKU = LDA + LDWRKR = N + ELSE +* +* WORK(IU) is LDWRKU by N and WORK(IR) is N by N +* + LDWRKU = ( LWORK-N*N-N ) / N + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to VT, zeroing out below it +* + CALL SLACPY( 'U', N, N, A, LDA, VT, LDVT ) + IF( N.GT.1 ) + $ CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, + $ VT( 2, 1 ), LDVT ) +* +* Generate Q in A +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in VT, copying result to WORK(IR) +* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) +* + CALL SGEBRD( N, N, VT, LDVT, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'L', N, N, VT, LDVT, WORK( IR ), LDWRKR ) +* +* Generate left vectors bidiagonalizing R in WORK(IR) +* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) +* + CALL SORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing R in VT +* (Workspace: need N*N+4*N-1, prefer N*N+3*N+(N-1)*NB) +* + CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IR) and computing right +* singular vectors of R in VT +* (Workspace: need N*N+BDSPAC) +* + CALL SBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT, LDVT, + $ WORK( IR ), LDWRKR, DUM, 1, + $ WORK( IWORK ), INFO ) + IU = IE + N +* +* Multiply Q in A by left singular vectors of R in +* WORK(IR), storing result in WORK(IU) and copying to A +* (Workspace: need N*N+2*N, prefer N*N+M*N+N) +* + DO 20 I = 1, M, LDWRKU + CHUNK = MIN( M-I+1, LDWRKU ) + CALL SGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ), + $ LDA, WORK( IR ), LDWRKR, ZERO, + $ WORK( IU ), LDWRKU ) + CALL SLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU, + $ A( I, 1 ), LDA ) + 20 CONTINUE +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to VT, zeroing out below it +* + CALL SLACPY( 'U', N, N, A, LDA, VT, LDVT ) + IF( N.GT.1 ) + $ CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, + $ VT( 2, 1 ), LDVT ) +* +* Generate Q in A +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in VT +* (Workspace: need 4*N, prefer 3*N+2*N*NB) +* + CALL SGEBRD( N, N, VT, LDVT, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in A by left vectors bidiagonalizing R +* (Workspace: need 3*N+M, prefer 3*N+M*NB) +* + CALL SORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT, + $ WORK( ITAUQ ), A, LDA, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing R in VT +* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) +* + CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in A and computing right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT, LDVT, + $ A, LDA, DUM, 1, WORK( IWORK ), INFO ) +* + END IF +* + ELSE IF( WNTUS ) THEN +* + IF( WNTVN ) THEN +* +* Path 4 (M much larger than N, JOBU='S', JOBVT='N') +* N left singular vectors to be computed in U and +* no right singular vectors to be computed +* + IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.WRKBL+LDA*N ) THEN +* +* WORK(IR) is LDA by N +* + LDWRKR = LDA + ELSE +* +* WORK(IR) is N by N +* + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IR), zeroing out below it +* + CALL SLACPY( 'U', N, N, A, LDA, WORK( IR ), + $ LDWRKR ) + CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, + $ WORK( IR+1 ), LDWRKR ) +* +* Generate Q in A +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IR) +* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) +* + CALL SGEBRD( N, N, WORK( IR ), LDWRKR, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing R in WORK(IR) +* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) +* + CALL SORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IR) +* (Workspace: need N*N+BDSPAC) +* + CALL SBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM, + $ 1, WORK( IR ), LDWRKR, DUM, 1, + $ WORK( IWORK ), INFO ) +* +* Multiply Q in A by left singular vectors of R in +* WORK(IR), storing result in U +* (Workspace: need N*N) +* + CALL SGEMM( 'N', 'N', M, N, N, ONE, A, LDA, + $ WORK( IR ), LDWRKR, ZERO, U, LDU ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL SORGQR( M, N, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Zero out below R in A +* + CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), + $ LDA ) +* +* Bidiagonalize R in A +* (Workspace: need 4*N, prefer 3*N+2*N*NB) +* + CALL SGEBRD( N, N, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left vectors bidiagonalizing R +* (Workspace: need 3*N+M, prefer 3*N+M*NB) +* + CALL SORMBR( 'Q', 'R', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM, + $ 1, U, LDU, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTVO ) THEN +* +* Path 5 (M much larger than N, JOBU='S', JOBVT='O') +* N left singular vectors to be computed in U and +* N right singular vectors to be overwritten on A +* + IF( LWORK.GE.2*N*N+MAX( 4*N, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+2*LDA*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by N +* + LDWRKU = LDA + IR = IU + LDWRKU*N + LDWRKR = LDA + ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is N by N +* + LDWRKU = LDA + IR = IU + LDWRKU*N + LDWRKR = N + ELSE +* +* WORK(IU) is N by N and WORK(IR) is N by N +* + LDWRKU = N + IR = IU + LDWRKU*N + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IU), zeroing out below it +* + CALL SLACPY( 'U', N, N, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, + $ WORK( IU+1 ), LDWRKU ) +* +* Generate Q in A +* (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) +* + CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IU), copying result to +* WORK(IR) +* (Workspace: need 2*N*N+4*N, +* prefer 2*N*N+3*N+2*N*NB) +* + CALL SGEBRD( N, N, WORK( IU ), LDWRKU, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL SLACPY( 'U', N, N, WORK( IU ), LDWRKU, + $ WORK( IR ), LDWRKR ) +* +* Generate left bidiagonalizing vectors in WORK(IU) +* (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB) +* + CALL SORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in WORK(IR) +* (Workspace: need 2*N*N+4*N-1, +* prefer 2*N*N+3*N+(N-1)*NB) +* + CALL SORGBR( 'P', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IU) and computing +* right singular vectors of R in WORK(IR) +* (Workspace: need 2*N*N+BDSPAC) +* + CALL SBDSQR( 'U', N, N, N, 0, S, WORK( IE ), + $ WORK( IR ), LDWRKR, WORK( IU ), + $ LDWRKU, DUM, 1, WORK( IWORK ), INFO ) +* +* Multiply Q in A by left singular vectors of R in +* WORK(IU), storing result in U +* (Workspace: need N*N) +* + CALL SGEMM( 'N', 'N', M, N, N, ONE, A, LDA, + $ WORK( IU ), LDWRKU, ZERO, U, LDU ) +* +* Copy right singular vectors of R to A +* (Workspace: need N*N) +* + CALL SLACPY( 'F', N, N, WORK( IR ), LDWRKR, A, + $ LDA ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL SORGQR( M, N, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Zero out below R in A +* + CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), + $ LDA ) +* +* Bidiagonalize R in A +* (Workspace: need 4*N, prefer 3*N+2*N*NB) +* + CALL SGEBRD( N, N, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left vectors bidiagonalizing R +* (Workspace: need 3*N+M, prefer 3*N+M*NB) +* + CALL SORMBR( 'Q', 'R', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing R in A +* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) +* + CALL SORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in A +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A, + $ LDA, U, LDU, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTVAS ) THEN +* +* Path 6 (M much larger than N, JOBU='S', JOBVT='S' +* or 'A') +* N left singular vectors to be computed in U and +* N right singular vectors to be computed in VT +* + IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+LDA*N ) THEN +* +* WORK(IU) is LDA by N +* + LDWRKU = LDA + ELSE +* +* WORK(IU) is N by N +* + LDWRKU = N + END IF + ITAU = IU + LDWRKU*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IU), zeroing out below it +* + CALL SLACPY( 'U', N, N, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, + $ WORK( IU+1 ), LDWRKU ) +* +* Generate Q in A +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IU), copying result to VT +* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) +* + CALL SGEBRD( N, N, WORK( IU ), LDWRKU, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL SLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT, + $ LDVT ) +* +* Generate left bidiagonalizing vectors in WORK(IU) +* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) +* + CALL SORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in VT +* (Workspace: need N*N+4*N-1, +* prefer N*N+3*N+(N-1)*NB) +* + CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IU) and computing +* right singular vectors of R in VT +* (Workspace: need N*N+BDSPAC) +* + CALL SBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT, + $ LDVT, WORK( IU ), LDWRKU, DUM, 1, + $ WORK( IWORK ), INFO ) +* +* Multiply Q in A by left singular vectors of R in +* WORK(IU), storing result in U +* (Workspace: need N*N) +* + CALL SGEMM( 'N', 'N', M, N, N, ONE, A, LDA, + $ WORK( IU ), LDWRKU, ZERO, U, LDU ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL SORGQR( M, N, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to VT, zeroing out below it +* + CALL SLACPY( 'U', N, N, A, LDA, VT, LDVT ) + IF( N.GT.1 ) + $ CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, + $ VT( 2, 1 ), LDVT ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in VT +* (Workspace: need 4*N, prefer 3*N+2*N*NB) +* + CALL SGEBRD( N, N, VT, LDVT, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left bidiagonalizing vectors +* in VT +* (Workspace: need 3*N+M, prefer 3*N+M*NB) +* + CALL SORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in VT +* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) +* + CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT, + $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + END IF +* + ELSE IF( WNTUA ) THEN +* + IF( WNTVN ) THEN +* +* Path 7 (M much larger than N, JOBU='A', JOBVT='N') +* M left singular vectors to be computed in U and +* no right singular vectors to be computed +* + IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.WRKBL+LDA*N ) THEN +* +* WORK(IR) is LDA by N +* + LDWRKR = LDA + ELSE +* +* WORK(IR) is N by N +* + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Copy R to WORK(IR), zeroing out below it +* + CALL SLACPY( 'U', N, N, A, LDA, WORK( IR ), + $ LDWRKR ) + CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, + $ WORK( IR+1 ), LDWRKR ) +* +* Generate Q in U +* (Workspace: need N*N+N+M, prefer N*N+N+M*NB) +* + CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IR) +* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) +* + CALL SGEBRD( N, N, WORK( IR ), LDWRKR, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in WORK(IR) +* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) +* + CALL SORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IR) +* (Workspace: need N*N+BDSPAC) +* + CALL SBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM, + $ 1, WORK( IR ), LDWRKR, DUM, 1, + $ WORK( IWORK ), INFO ) +* +* Multiply Q in U by left singular vectors of R in +* WORK(IR), storing result in A +* (Workspace: need N*N) +* + CALL SGEMM( 'N', 'N', M, N, N, ONE, U, LDU, + $ WORK( IR ), LDWRKR, ZERO, A, LDA ) +* +* Copy left singular vectors of A from A to U +* + CALL SLACPY( 'F', M, N, A, LDA, U, LDU ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (Workspace: need N+M, prefer N+M*NB) +* + CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Zero out below R in A +* + CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), + $ LDA ) +* +* Bidiagonalize R in A +* (Workspace: need 4*N, prefer 3*N+2*N*NB) +* + CALL SGEBRD( N, N, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left bidiagonalizing vectors +* in A +* (Workspace: need 3*N+M, prefer 3*N+M*NB) +* + CALL SORMBR( 'Q', 'R', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM, + $ 1, U, LDU, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTVO ) THEN +* +* Path 8 (M much larger than N, JOBU='A', JOBVT='O') +* M left singular vectors to be computed in U and +* N right singular vectors to be overwritten on A +* + IF( LWORK.GE.2*N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+2*LDA*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by N +* + LDWRKU = LDA + IR = IU + LDWRKU*N + LDWRKR = LDA + ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is N by N +* + LDWRKU = LDA + IR = IU + LDWRKU*N + LDWRKR = N + ELSE +* +* WORK(IU) is N by N and WORK(IR) is N by N +* + LDWRKU = N + IR = IU + LDWRKU*N + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (Workspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB) +* + CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IU), zeroing out below it +* + CALL SLACPY( 'U', N, N, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, + $ WORK( IU+1 ), LDWRKU ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IU), copying result to +* WORK(IR) +* (Workspace: need 2*N*N+4*N, +* prefer 2*N*N+3*N+2*N*NB) +* + CALL SGEBRD( N, N, WORK( IU ), LDWRKU, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL SLACPY( 'U', N, N, WORK( IU ), LDWRKU, + $ WORK( IR ), LDWRKR ) +* +* Generate left bidiagonalizing vectors in WORK(IU) +* (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB) +* + CALL SORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in WORK(IR) +* (Workspace: need 2*N*N+4*N-1, +* prefer 2*N*N+3*N+(N-1)*NB) +* + CALL SORGBR( 'P', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IU) and computing +* right singular vectors of R in WORK(IR) +* (Workspace: need 2*N*N+BDSPAC) +* + CALL SBDSQR( 'U', N, N, N, 0, S, WORK( IE ), + $ WORK( IR ), LDWRKR, WORK( IU ), + $ LDWRKU, DUM, 1, WORK( IWORK ), INFO ) +* +* Multiply Q in U by left singular vectors of R in +* WORK(IU), storing result in A +* (Workspace: need N*N) +* + CALL SGEMM( 'N', 'N', M, N, N, ONE, U, LDU, + $ WORK( IU ), LDWRKU, ZERO, A, LDA ) +* +* Copy left singular vectors of A from A to U +* + CALL SLACPY( 'F', M, N, A, LDA, U, LDU ) +* +* Copy right singular vectors of R from WORK(IR) to A +* + CALL SLACPY( 'F', N, N, WORK( IR ), LDWRKR, A, + $ LDA ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (Workspace: need N+M, prefer N+M*NB) +* + CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Zero out below R in A +* + CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), + $ LDA ) +* +* Bidiagonalize R in A +* (Workspace: need 4*N, prefer 3*N+2*N*NB) +* + CALL SGEBRD( N, N, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left bidiagonalizing vectors +* in A +* (Workspace: need 3*N+M, prefer 3*N+M*NB) +* + CALL SORMBR( 'Q', 'R', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in A +* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) +* + CALL SORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in A +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A, + $ LDA, U, LDU, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTVAS ) THEN +* +* Path 9 (M much larger than N, JOBU='A', JOBVT='S' +* or 'A') +* M left singular vectors to be computed in U and +* N right singular vectors to be computed in VT +* + IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+LDA*N ) THEN +* +* WORK(IU) is LDA by N +* + LDWRKU = LDA + ELSE +* +* WORK(IU) is N by N +* + LDWRKU = N + END IF + ITAU = IU + LDWRKU*N + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (Workspace: need N*N+N+M, prefer N*N+N+M*NB) +* + CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IU), zeroing out below it +* + CALL SLACPY( 'U', N, N, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, + $ WORK( IU+1 ), LDWRKU ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IU), copying result to VT +* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) +* + CALL SGEBRD( N, N, WORK( IU ), LDWRKU, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL SLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT, + $ LDVT ) +* +* Generate left bidiagonalizing vectors in WORK(IU) +* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) +* + CALL SORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in VT +* (Workspace: need N*N+4*N-1, +* prefer N*N+3*N+(N-1)*NB) +* + CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IU) and computing +* right singular vectors of R in VT +* (Workspace: need N*N+BDSPAC) +* + CALL SBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT, + $ LDVT, WORK( IU ), LDWRKU, DUM, 1, + $ WORK( IWORK ), INFO ) +* +* Multiply Q in U by left singular vectors of R in +* WORK(IU), storing result in A +* (Workspace: need N*N) +* + CALL SGEMM( 'N', 'N', M, N, N, ONE, U, LDU, + $ WORK( IU ), LDWRKU, ZERO, A, LDA ) +* +* Copy left singular vectors of A from A to U +* + CALL SLACPY( 'F', M, N, A, LDA, U, LDU ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (Workspace: need N+M, prefer N+M*NB) +* + CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R from A to VT, zeroing out below it +* + CALL SLACPY( 'U', N, N, A, LDA, VT, LDVT ) + IF( N.GT.1 ) + $ CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, + $ VT( 2, 1 ), LDVT ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in VT +* (Workspace: need 4*N, prefer 3*N+2*N*NB) +* + CALL SGEBRD( N, N, VT, LDVT, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left bidiagonalizing vectors +* in VT +* (Workspace: need 3*N+M, prefer 3*N+M*NB) +* + CALL SORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in VT +* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) +* + CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT, + $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + END IF +* + END IF +* + ELSE +* +* M .LT. MNTHR +* +* Path 10 (M at least N, but not much larger) +* Reduce to bidiagonal form without QR decomposition +* + IE = 1 + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize A +* (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB) +* + CALL SGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ IERR ) + IF( WNTUAS ) THEN +* +* If left singular vectors desired in U, copy result to U +* and generate left bidiagonalizing vectors in U +* (Workspace: need 3*N+NCU, prefer 3*N+NCU*NB) +* + CALL SLACPY( 'L', M, N, A, LDA, U, LDU ) + IF( WNTUS ) + $ NCU = N + IF( WNTUA ) + $ NCU = M + CALL SORGBR( 'Q', M, NCU, N, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTVAS ) THEN +* +* If right singular vectors desired in VT, copy result to +* VT and generate right bidiagonalizing vectors in VT +* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) +* + CALL SLACPY( 'U', N, N, A, LDA, VT, LDVT ) + CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTUO ) THEN +* +* If left singular vectors desired in A, generate left +* bidiagonalizing vectors in A +* (Workspace: need 4*N, prefer 3*N+N*NB) +* + CALL SORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTVO ) THEN +* +* If right singular vectors desired in A, generate right +* bidiagonalizing vectors in A +* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) +* + CALL SORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IWORK = IE + N + IF( WNTUAS .OR. WNTUO ) + $ NRU = M + IF( WNTUN ) + $ NRU = 0 + IF( WNTVAS .OR. WNTVO ) + $ NCVT = N + IF( WNTVN ) + $ NCVT = 0 + IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in U and computing right singular +* vectors in VT +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT, + $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO ) + ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in U and computing right singular +* vectors in A +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), A, LDA, + $ U, LDU, DUM, 1, WORK( IWORK ), INFO ) + ELSE +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in A and computing right singular +* vectors in VT +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT, + $ LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO ) + END IF +* + END IF +* + ELSE +* +* A has more columns than rows. If A has sufficiently more +* columns than rows, first reduce using the LQ decomposition (if +* sufficient workspace available) +* + IF( N.GE.MNTHR ) THEN +* + IF( WNTVN ) THEN +* +* Path 1t(N much larger than M, JOBVT='N') +* No right singular vectors to be computed +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Zero out above L +* + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), LDA ) + IE = 1 + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in A +* (Workspace: need 4*M, prefer 3*M+2*M*NB) +* + CALL SGEBRD( M, M, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ IERR ) + IF( WNTUO .OR. WNTUAS ) THEN +* +* If left singular vectors desired, generate Q +* (Workspace: need 4*M, prefer 3*M+M*NB) +* + CALL SORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IWORK = IE + M + NRU = 0 + IF( WNTUO .OR. WNTUAS ) + $ NRU = M +* +* Perform bidiagonal QR iteration, computing left singular +* vectors of A in A if desired +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', M, 0, NRU, 0, S, WORK( IE ), DUM, 1, A, + $ LDA, DUM, 1, WORK( IWORK ), INFO ) +* +* If left singular vectors desired in U, copy them there +* + IF( WNTUAS ) + $ CALL SLACPY( 'F', M, M, A, LDA, U, LDU ) +* + ELSE IF( WNTVO .AND. WNTUN ) THEN +* +* Path 2t(N much larger than M, JOBU='N', JOBVT='O') +* M right singular vectors to be overwritten on A and +* no left singular vectors to be computed +* + IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+LDA*M ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by M +* + LDWRKU = LDA + CHUNK = N + LDWRKR = LDA + ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+M*M ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is M by M +* + LDWRKU = LDA + CHUNK = N + LDWRKR = M + ELSE +* +* WORK(IU) is M by CHUNK and WORK(IR) is M by M +* + LDWRKU = M + CHUNK = ( LWORK-M*M-M ) / M + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IR) and zero out above it +* + CALL SLACPY( 'L', M, M, A, LDA, WORK( IR ), LDWRKR ) + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, + $ WORK( IR+LDWRKR ), LDWRKR ) +* +* Generate Q in A +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IR) +* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +* + CALL SGEBRD( M, M, WORK( IR ), LDWRKR, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing L +* (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB) +* + CALL SORGBR( 'P', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of L in WORK(IR) +* (Workspace: need M*M+BDSPAC) +* + CALL SBDSQR( 'U', M, M, 0, 0, S, WORK( IE ), + $ WORK( IR ), LDWRKR, DUM, 1, DUM, 1, + $ WORK( IWORK ), INFO ) + IU = IE + M +* +* Multiply right singular vectors of L in WORK(IR) by Q +* in A, storing result in WORK(IU) and copying to A +* (Workspace: need M*M+2*M, prefer M*M+M*N+M) +* + DO 30 I = 1, N, CHUNK + BLK = MIN( N-I+1, CHUNK ) + CALL SGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ), + $ LDWRKR, A( 1, I ), LDA, ZERO, + $ WORK( IU ), LDWRKU ) + CALL SLACPY( 'F', M, BLK, WORK( IU ), LDWRKU, + $ A( 1, I ), LDA ) + 30 CONTINUE +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + IE = 1 + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize A +* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) +* + CALL SGEBRD( M, N, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing A +* (Workspace: need 4*M, prefer 3*M+M*NB) +* + CALL SORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of A in A +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'L', M, N, 0, 0, S, WORK( IE ), A, LDA, + $ DUM, 1, DUM, 1, WORK( IWORK ), INFO ) +* + END IF +* + ELSE IF( WNTVO .AND. WNTUAS ) THEN +* +* Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O') +* M right singular vectors to be overwritten on A and +* M left singular vectors to be computed in U +* + IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+LDA*M ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by M +* + LDWRKU = LDA + CHUNK = N + LDWRKR = LDA + ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+M*M ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is M by M +* + LDWRKU = LDA + CHUNK = N + LDWRKR = M + ELSE +* +* WORK(IU) is M by CHUNK and WORK(IR) is M by M +* + LDWRKU = M + CHUNK = ( LWORK-M*M-M ) / M + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to U, zeroing about above it +* + CALL SLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ), + $ LDU ) +* +* Generate Q in A +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in U, copying result to WORK(IR) +* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +* + CALL SGEBRD( M, M, U, LDU, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'U', M, M, U, LDU, WORK( IR ), LDWRKR ) +* +* Generate right vectors bidiagonalizing L in WORK(IR) +* (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB) +* + CALL SORGBR( 'P', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing L in U +* (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB) +* + CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in U, and computing right +* singular vectors of L in WORK(IR) +* (Workspace: need M*M+BDSPAC) +* + CALL SBDSQR( 'U', M, M, M, 0, S, WORK( IE ), + $ WORK( IR ), LDWRKR, U, LDU, DUM, 1, + $ WORK( IWORK ), INFO ) + IU = IE + M +* +* Multiply right singular vectors of L in WORK(IR) by Q +* in A, storing result in WORK(IU) and copying to A +* (Workspace: need M*M+2*M, prefer M*M+M*N+M)) +* + DO 40 I = 1, N, CHUNK + BLK = MIN( N-I+1, CHUNK ) + CALL SGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ), + $ LDWRKR, A( 1, I ), LDA, ZERO, + $ WORK( IU ), LDWRKU ) + CALL SLACPY( 'F', M, BLK, WORK( IU ), LDWRKU, + $ A( 1, I ), LDA ) + 40 CONTINUE +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to U, zeroing out above it +* + CALL SLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ), + $ LDU ) +* +* Generate Q in A +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in U +* (Workspace: need 4*M, prefer 3*M+2*M*NB) +* + CALL SGEBRD( M, M, U, LDU, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right vectors bidiagonalizing L by Q in A +* (Workspace: need 3*M+N, prefer 3*M+N*NB) +* + CALL SORMBR( 'P', 'L', 'T', M, N, M, U, LDU, + $ WORK( ITAUP ), A, LDA, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing L in U +* (Workspace: need 4*M, prefer 3*M+M*NB) +* + CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in A +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', M, N, M, 0, S, WORK( IE ), A, LDA, + $ U, LDU, DUM, 1, WORK( IWORK ), INFO ) +* + END IF +* + ELSE IF( WNTVS ) THEN +* + IF( WNTUN ) THEN +* +* Path 4t(N much larger than M, JOBU='N', JOBVT='S') +* M right singular vectors to be computed in VT and +* no left singular vectors to be computed +* + IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.WRKBL+LDA*M ) THEN +* +* WORK(IR) is LDA by M +* + LDWRKR = LDA + ELSE +* +* WORK(IR) is M by M +* + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IR), zeroing out above it +* + CALL SLACPY( 'L', M, M, A, LDA, WORK( IR ), + $ LDWRKR ) + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, + $ WORK( IR+LDWRKR ), LDWRKR ) +* +* Generate Q in A +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IR) +* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +* + CALL SGEBRD( M, M, WORK( IR ), LDWRKR, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing L in +* WORK(IR) +* (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB) +* + CALL SORGBR( 'P', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of L in WORK(IR) +* (Workspace: need M*M+BDSPAC) +* + CALL SBDSQR( 'U', M, M, 0, 0, S, WORK( IE ), + $ WORK( IR ), LDWRKR, DUM, 1, DUM, 1, + $ WORK( IWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IR) by +* Q in A, storing result in VT +* (Workspace: need M*M) +* + CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ), + $ LDWRKR, A, LDA, ZERO, VT, LDVT ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy result to VT +* + CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL SORGLQ( M, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Zero out above L in A +* + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), + $ LDA ) +* +* Bidiagonalize L in A +* (Workspace: need 4*M, prefer 3*M+2*M*NB) +* + CALL SGEBRD( M, M, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right vectors bidiagonalizing L by Q in VT +* (Workspace: need 3*M+N, prefer 3*M+N*NB) +* + CALL SORMBR( 'P', 'L', 'T', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT, + $ LDVT, DUM, 1, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTUO ) THEN +* +* Path 5t(N much larger than M, JOBU='O', JOBVT='S') +* M right singular vectors to be computed in VT and +* M left singular vectors to be overwritten on A +* + IF( LWORK.GE.2*M*M+MAX( 4*M, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+2*LDA*M ) THEN +* +* WORK(IU) is LDA by M and WORK(IR) is LDA by M +* + LDWRKU = LDA + IR = IU + LDWRKU*M + LDWRKR = LDA + ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN +* +* WORK(IU) is LDA by M and WORK(IR) is M by M +* + LDWRKU = LDA + IR = IU + LDWRKU*M + LDWRKR = M + ELSE +* +* WORK(IU) is M by M and WORK(IR) is M by M +* + LDWRKU = M + IR = IU + LDWRKU*M + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IU), zeroing out below it +* + CALL SLACPY( 'L', M, M, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, + $ WORK( IU+LDWRKU ), LDWRKU ) +* +* Generate Q in A +* (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) +* + CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IU), copying result to +* WORK(IR) +* (Workspace: need 2*M*M+4*M, +* prefer 2*M*M+3*M+2*M*NB) +* + CALL SGEBRD( M, M, WORK( IU ), LDWRKU, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL SLACPY( 'L', M, M, WORK( IU ), LDWRKU, + $ WORK( IR ), LDWRKR ) +* +* Generate right bidiagonalizing vectors in WORK(IU) +* (Workspace: need 2*M*M+4*M-1, +* prefer 2*M*M+3*M+(M-1)*NB) +* + CALL SORGBR( 'P', M, M, M, WORK( IU ), LDWRKU, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in WORK(IR) +* (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB) +* + CALL SORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in WORK(IR) and computing +* right singular vectors of L in WORK(IU) +* (Workspace: need 2*M*M+BDSPAC) +* + CALL SBDSQR( 'U', M, M, M, 0, S, WORK( IE ), + $ WORK( IU ), LDWRKU, WORK( IR ), + $ LDWRKR, DUM, 1, WORK( IWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IU) by +* Q in A, storing result in VT +* (Workspace: need M*M) +* + CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ), + $ LDWRKU, A, LDA, ZERO, VT, LDVT ) +* +* Copy left singular vectors of L to A +* (Workspace: need M*M) +* + CALL SLACPY( 'F', M, M, WORK( IR ), LDWRKR, A, + $ LDA ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL SORGLQ( M, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Zero out above L in A +* + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), + $ LDA ) +* +* Bidiagonalize L in A +* (Workspace: need 4*M, prefer 3*M+2*M*NB) +* + CALL SGEBRD( M, M, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right vectors bidiagonalizing L by Q in VT +* (Workspace: need 3*M+N, prefer 3*M+N*NB) +* + CALL SORMBR( 'P', 'L', 'T', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors of L in A +* (Workspace: need 4*M, prefer 3*M+M*NB) +* + CALL SORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, compute left +* singular vectors of A in A and compute right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT, + $ LDVT, A, LDA, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTUAS ) THEN +* +* Path 6t(N much larger than M, JOBU='S' or 'A', +* JOBVT='S') +* M right singular vectors to be computed in VT and +* M left singular vectors to be computed in U +* + IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+LDA*M ) THEN +* +* WORK(IU) is LDA by N +* + LDWRKU = LDA + ELSE +* +* WORK(IU) is LDA by M +* + LDWRKU = M + END IF + ITAU = IU + LDWRKU*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IU), zeroing out above it +* + CALL SLACPY( 'L', M, M, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, + $ WORK( IU+LDWRKU ), LDWRKU ) +* +* Generate Q in A +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IU), copying result to U +* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +* + CALL SGEBRD( M, M, WORK( IU ), LDWRKU, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL SLACPY( 'L', M, M, WORK( IU ), LDWRKU, U, + $ LDU ) +* +* Generate right bidiagonalizing vectors in WORK(IU) +* (Workspace: need M*M+4*M-1, +* prefer M*M+3*M+(M-1)*NB) +* + CALL SORGBR( 'P', M, M, M, WORK( IU ), LDWRKU, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in U +* (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB) +* + CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in U and computing right +* singular vectors of L in WORK(IU) +* (Workspace: need M*M+BDSPAC) +* + CALL SBDSQR( 'U', M, M, M, 0, S, WORK( IE ), + $ WORK( IU ), LDWRKU, U, LDU, DUM, 1, + $ WORK( IWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IU) by +* Q in A, storing result in VT +* (Workspace: need M*M) +* + CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ), + $ LDWRKU, A, LDA, ZERO, VT, LDVT ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL SORGLQ( M, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to U, zeroing out above it +* + CALL SLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ), + $ LDU ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in U +* (Workspace: need 4*M, prefer 3*M+2*M*NB) +* + CALL SGEBRD( M, M, U, LDU, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right bidiagonalizing vectors in U by Q +* in VT +* (Workspace: need 3*M+N, prefer 3*M+N*NB) +* + CALL SORMBR( 'P', 'L', 'T', M, N, M, U, LDU, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in U +* (Workspace: need 4*M, prefer 3*M+M*NB) +* + CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT, + $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + END IF +* + ELSE IF( WNTVA ) THEN +* + IF( WNTUN ) THEN +* +* Path 7t(N much larger than M, JOBU='N', JOBVT='A') +* N right singular vectors to be computed in VT and +* no left singular vectors to be computed +* + IF( LWORK.GE.M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.WRKBL+LDA*M ) THEN +* +* WORK(IR) is LDA by M +* + LDWRKR = LDA + ELSE +* +* WORK(IR) is M by M +* + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Copy L to WORK(IR), zeroing out above it +* + CALL SLACPY( 'L', M, M, A, LDA, WORK( IR ), + $ LDWRKR ) + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, + $ WORK( IR+LDWRKR ), LDWRKR ) +* +* Generate Q in VT +* (Workspace: need M*M+M+N, prefer M*M+M+N*NB) +* + CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IR) +* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +* + CALL SGEBRD( M, M, WORK( IR ), LDWRKR, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in WORK(IR) +* (Workspace: need M*M+4*M-1, +* prefer M*M+3*M+(M-1)*NB) +* + CALL SORGBR( 'P', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of L in WORK(IR) +* (Workspace: need M*M+BDSPAC) +* + CALL SBDSQR( 'U', M, M, 0, 0, S, WORK( IE ), + $ WORK( IR ), LDWRKR, DUM, 1, DUM, 1, + $ WORK( IWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IR) by +* Q in VT, storing result in A +* (Workspace: need M*M) +* + CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ), + $ LDWRKR, VT, LDVT, ZERO, A, LDA ) +* +* Copy right singular vectors of A from A to VT +* + CALL SLACPY( 'F', M, N, A, LDA, VT, LDVT ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (Workspace: need M+N, prefer M+N*NB) +* + CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Zero out above L in A +* + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), + $ LDA ) +* +* Bidiagonalize L in A +* (Workspace: need 4*M, prefer 3*M+2*M*NB) +* + CALL SGEBRD( M, M, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right bidiagonalizing vectors in A by Q +* in VT +* (Workspace: need 3*M+N, prefer 3*M+N*NB) +* + CALL SORMBR( 'P', 'L', 'T', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT, + $ LDVT, DUM, 1, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTUO ) THEN +* +* Path 8t(N much larger than M, JOBU='O', JOBVT='A') +* N right singular vectors to be computed in VT and +* M left singular vectors to be overwritten on A +* + IF( LWORK.GE.2*M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+2*LDA*M ) THEN +* +* WORK(IU) is LDA by M and WORK(IR) is LDA by M +* + LDWRKU = LDA + IR = IU + LDWRKU*M + LDWRKR = LDA + ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN +* +* WORK(IU) is LDA by M and WORK(IR) is M by M +* + LDWRKU = LDA + IR = IU + LDWRKU*M + LDWRKR = M + ELSE +* +* WORK(IU) is M by M and WORK(IR) is M by M +* + LDWRKU = M + IR = IU + LDWRKU*M + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (Workspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB) +* + CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IU), zeroing out above it +* + CALL SLACPY( 'L', M, M, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, + $ WORK( IU+LDWRKU ), LDWRKU ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IU), copying result to +* WORK(IR) +* (Workspace: need 2*M*M+4*M, +* prefer 2*M*M+3*M+2*M*NB) +* + CALL SGEBRD( M, M, WORK( IU ), LDWRKU, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL SLACPY( 'L', M, M, WORK( IU ), LDWRKU, + $ WORK( IR ), LDWRKR ) +* +* Generate right bidiagonalizing vectors in WORK(IU) +* (Workspace: need 2*M*M+4*M-1, +* prefer 2*M*M+3*M+(M-1)*NB) +* + CALL SORGBR( 'P', M, M, M, WORK( IU ), LDWRKU, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in WORK(IR) +* (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB) +* + CALL SORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in WORK(IR) and computing +* right singular vectors of L in WORK(IU) +* (Workspace: need 2*M*M+BDSPAC) +* + CALL SBDSQR( 'U', M, M, M, 0, S, WORK( IE ), + $ WORK( IU ), LDWRKU, WORK( IR ), + $ LDWRKR, DUM, 1, WORK( IWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IU) by +* Q in VT, storing result in A +* (Workspace: need M*M) +* + CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ), + $ LDWRKU, VT, LDVT, ZERO, A, LDA ) +* +* Copy right singular vectors of A from A to VT +* + CALL SLACPY( 'F', M, N, A, LDA, VT, LDVT ) +* +* Copy left singular vectors of A from WORK(IR) to A +* + CALL SLACPY( 'F', M, M, WORK( IR ), LDWRKR, A, + $ LDA ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (Workspace: need M+N, prefer M+N*NB) +* + CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Zero out above L in A +* + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), + $ LDA ) +* +* Bidiagonalize L in A +* (Workspace: need 4*M, prefer 3*M+2*M*NB) +* + CALL SGEBRD( M, M, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right bidiagonalizing vectors in A by Q +* in VT +* (Workspace: need 3*M+N, prefer 3*M+N*NB) +* + CALL SORMBR( 'P', 'L', 'T', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in A +* (Workspace: need 4*M, prefer 3*M+M*NB) +* + CALL SORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in A and computing right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT, + $ LDVT, A, LDA, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTUAS ) THEN +* +* Path 9t(N much larger than M, JOBU='S' or 'A', +* JOBVT='A') +* N right singular vectors to be computed in VT and +* M left singular vectors to be computed in U +* + IF( LWORK.GE.M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+LDA*M ) THEN +* +* WORK(IU) is LDA by M +* + LDWRKU = LDA + ELSE +* +* WORK(IU) is M by M +* + LDWRKU = M + END IF + ITAU = IU + LDWRKU*M + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (Workspace: need M*M+M+N, prefer M*M+M+N*NB) +* + CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IU), zeroing out above it +* + CALL SLACPY( 'L', M, M, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, + $ WORK( IU+LDWRKU ), LDWRKU ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IU), copying result to U +* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +* + CALL SGEBRD( M, M, WORK( IU ), LDWRKU, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL SLACPY( 'L', M, M, WORK( IU ), LDWRKU, U, + $ LDU ) +* +* Generate right bidiagonalizing vectors in WORK(IU) +* (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB) +* + CALL SORGBR( 'P', M, M, M, WORK( IU ), LDWRKU, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in U +* (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB) +* + CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in U and computing right +* singular vectors of L in WORK(IU) +* (Workspace: need M*M+BDSPAC) +* + CALL SBDSQR( 'U', M, M, M, 0, S, WORK( IE ), + $ WORK( IU ), LDWRKU, U, LDU, DUM, 1, + $ WORK( IWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IU) by +* Q in VT, storing result in A +* (Workspace: need M*M) +* + CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ), + $ LDWRKU, VT, LDVT, ZERO, A, LDA ) +* +* Copy right singular vectors of A from A to VT +* + CALL SLACPY( 'F', M, N, A, LDA, VT, LDVT ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (Workspace: need M+N, prefer M+N*NB) +* + CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to U, zeroing out above it +* + CALL SLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ), + $ LDU ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in U +* (Workspace: need 4*M, prefer 3*M+2*M*NB) +* + CALL SGEBRD( M, M, U, LDU, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right bidiagonalizing vectors in U by Q +* in VT +* (Workspace: need 3*M+N, prefer 3*M+N*NB) +* + CALL SORMBR( 'P', 'L', 'T', M, N, M, U, LDU, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in U +* (Workspace: need 4*M, prefer 3*M+M*NB) +* + CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT, + $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + END IF +* + END IF +* + ELSE +* +* N .LT. MNTHR +* +* Path 10t(N greater than M, but not much larger) +* Reduce to bidiagonal form without LQ decomposition +* + IE = 1 + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize A +* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) +* + CALL SGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ IERR ) + IF( WNTUAS ) THEN +* +* If left singular vectors desired in U, copy result to U +* and generate left bidiagonalizing vectors in U +* (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB) +* + CALL SLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL SORGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTVAS ) THEN +* +* If right singular vectors desired in VT, copy result to +* VT and generate right bidiagonalizing vectors in VT +* (Workspace: need 3*M+NRVT, prefer 3*M+NRVT*NB) +* + CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT ) + IF( WNTVA ) + $ NRVT = N + IF( WNTVS ) + $ NRVT = M + CALL SORGBR( 'P', NRVT, N, M, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTUO ) THEN +* +* If left singular vectors desired in A, generate left +* bidiagonalizing vectors in A +* (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB) +* + CALL SORGBR( 'Q', M, M, N, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTVO ) THEN +* +* If right singular vectors desired in A, generate right +* bidiagonalizing vectors in A +* (Workspace: need 4*M, prefer 3*M+M*NB) +* + CALL SORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IWORK = IE + M + IF( WNTUAS .OR. WNTUO ) + $ NRU = M + IF( WNTUN ) + $ NRU = 0 + IF( WNTVAS .OR. WNTVO ) + $ NCVT = N + IF( WNTVN ) + $ NCVT = 0 + IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in U and computing right singular +* vectors in VT +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT, + $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO ) + ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in U and computing right singular +* vectors in A +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), A, LDA, + $ U, LDU, DUM, 1, WORK( IWORK ), INFO ) + ELSE +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in A and computing right singular +* vectors in VT +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT, + $ LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO ) + END IF +* + END IF +* + END IF +* +* If SBDSQR failed to converge, copy unconverged superdiagonals +* to WORK( 2:MINMN ) +* + IF( INFO.NE.0 ) THEN + IF( IE.GT.2 ) THEN + DO 50 I = 1, MINMN - 1 + WORK( I+1 ) = WORK( I+IE-1 ) + 50 CONTINUE + END IF + IF( IE.LT.2 ) THEN + DO 60 I = MINMN - 1, 1, -1 + WORK( I+1 ) = WORK( I+IE-1 ) + 60 CONTINUE + END IF + END IF +* +* Undo scaling if necessary +* + IF( ISCL.EQ.1 ) THEN + IF( ANRM.GT.BIGNUM ) + $ CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN, + $ IERR ) + IF( INFO.NE.0 .AND. ANRM.GT.BIGNUM ) + $ CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN-1, 1, WORK( 2 ), + $ MINMN, IERR ) + IF( ANRM.LT.SMLNUM ) + $ CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN, + $ IERR ) + IF( INFO.NE.0 .AND. ANRM.LT.SMLNUM ) + $ CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN-1, 1, WORK( 2 ), + $ MINMN, IERR ) + END IF +* +* Return optimal workspace in WORK(1) +* + WORK( 1 ) = MAXWRK +* + RETURN +* +* End of SGESVD +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sgetf2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sgetf2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,147 @@ + SUBROUTINE SGETF2( M, N, A, LDA, IPIV, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* SGETF2 computes an LU factorization of a general m-by-n matrix A +* using partial pivoting with row interchanges. +* +* The factorization has the form +* A = P * L * U +* where P is a permutation matrix, L is lower triangular with unit +* diagonal elements (lower trapezoidal if m > n), and U is upper +* triangular (upper trapezoidal if m < n). +* +* This is the right-looking Level 2 BLAS version of the algorithm. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the m by n matrix to be factored. +* On exit, the factors L and U from the factorization +* A = P*L*U; the unit diagonal elements of L are not stored. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* IPIV (output) INTEGER array, dimension (min(M,N)) +* The pivot indices; for 1 <= i <= min(M,N), row i of the +* matrix was interchanged with row IPIV(i). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* > 0: if INFO = k, U(k,k) is exactly zero. The factorization +* has been completed, but the factor U is exactly +* singular, and division by zero will occur if it is used +* to solve a system of equations. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + REAL SFMIN + INTEGER I, J, JP +* .. +* .. External Functions .. + REAL SLAMCH + INTEGER ISAMAX + EXTERNAL SLAMCH, ISAMAX +* .. +* .. External Subroutines .. + EXTERNAL SGER, SSCAL, SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGETF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Compute machine safe minimum +* + SFMIN = SLAMCH('S') +* + DO 10 J = 1, MIN( M, N ) +* +* Find pivot and test for singularity. +* + JP = J - 1 + ISAMAX( M-J+1, A( J, J ), 1 ) + IPIV( J ) = JP + IF( A( JP, J ).NE.ZERO ) THEN +* +* Apply the interchange to columns 1:N. +* + IF( JP.NE.J ) + $ CALL SSWAP( N, A( J, 1 ), LDA, A( JP, 1 ), LDA ) +* +* Compute elements J+1:M of J-th column. +* + IF( J.LT.M ) THEN + IF( ABS(A( J, J )) .GE. SFMIN ) THEN + CALL SSCAL( M-J, ONE / A( J, J ), A( J+1, J ), 1 ) + ELSE + DO 20 I = 1, M-J + A( J+I, J ) = A( J+I, J ) / A( J, J ) + 20 CONTINUE + END IF + END IF +* + ELSE IF( INFO.EQ.0 ) THEN +* + INFO = J + END IF +* + IF( J.LT.MIN( M, N ) ) THEN +* +* Update trailing submatrix. +* + CALL SGER( M-J, N-J, -ONE, A( J+1, J ), 1, A( J, J+1 ), LDA, + $ A( J+1, J+1 ), LDA ) + END IF + 10 CONTINUE + RETURN +* +* End of SGETF2 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sgetrf.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sgetrf.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,159 @@ + SUBROUTINE SGETRF( M, N, A, LDA, IPIV, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* SGETRF computes an LU factorization of a general M-by-N matrix A +* using partial pivoting with row interchanges. +* +* The factorization has the form +* A = P * L * U +* where P is a permutation matrix, L is lower triangular with unit +* diagonal elements (lower trapezoidal if m > n), and U is upper +* triangular (upper trapezoidal if m < n). +* +* This is the right-looking Level 3 BLAS version of the algorithm. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the M-by-N matrix to be factored. +* On exit, the factors L and U from the factorization +* A = P*L*U; the unit diagonal elements of L are not stored. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* IPIV (output) INTEGER array, dimension (min(M,N)) +* The pivot indices; for 1 <= i <= min(M,N), row i of the +* matrix was interchanged with row IPIV(i). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, U(i,i) is exactly zero. The factorization +* has been completed, but the factor U is exactly +* singular, and division by zero will occur if it is used +* to solve a system of equations. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IINFO, J, JB, NB +* .. +* .. External Subroutines .. + EXTERNAL SGEMM, SGETF2, SLASWP, STRSM, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGETRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'SGETRF', ' ', M, N, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.MIN( M, N ) ) THEN +* +* Use unblocked code. +* + CALL SGETF2( M, N, A, LDA, IPIV, INFO ) + ELSE +* +* Use blocked code. +* + DO 20 J = 1, MIN( M, N ), NB + JB = MIN( MIN( M, N )-J+1, NB ) +* +* Factor diagonal and subdiagonal blocks and test for exact +* singularity. +* + CALL SGETF2( M-J+1, JB, A( J, J ), LDA, IPIV( J ), IINFO ) +* +* Adjust INFO and the pivot indices. +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO + J - 1 + DO 10 I = J, MIN( M, J+JB-1 ) + IPIV( I ) = J - 1 + IPIV( I ) + 10 CONTINUE +* +* Apply interchanges to columns 1:J-1. +* + CALL SLASWP( J-1, A, LDA, J, J+JB-1, IPIV, 1 ) +* + IF( J+JB.LE.N ) THEN +* +* Apply interchanges to columns J+JB:N. +* + CALL SLASWP( N-J-JB+1, A( 1, J+JB ), LDA, J, J+JB-1, + $ IPIV, 1 ) +* +* Compute block row of U. +* + CALL STRSM( 'Left', 'Lower', 'No transpose', 'Unit', JB, + $ N-J-JB+1, ONE, A( J, J ), LDA, A( J, J+JB ), + $ LDA ) + IF( J+JB.LE.M ) THEN +* +* Update trailing submatrix. +* + CALL SGEMM( 'No transpose', 'No transpose', M-J-JB+1, + $ N-J-JB+1, JB, -ONE, A( J+JB, J ), LDA, + $ A( J, J+JB ), LDA, ONE, A( J+JB, J+JB ), + $ LDA ) + END IF + END IF + 20 CONTINUE + END IF + RETURN +* +* End of SGETRF +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sgetri.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sgetri.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,192 @@ + SUBROUTINE SGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SGETRI computes the inverse of a matrix using the LU factorization +* computed by SGETRF. +* +* This method inverts U and then computes inv(A) by solving the system +* inv(A)*L = inv(U) for inv(A). +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the factors L and U from the factorization +* A = P*L*U as computed by SGETRF. +* On exit, if INFO = 0, the inverse of the original matrix A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices from SGETRF; for 1<=i<=N, row i of the +* matrix was interchanged with row IPIV(i). +* +* WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) +* On exit, if INFO=0, then WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N). +* For optimal performance LWORK >= N*NB, where NB is +* the optimal blocksize returned by ILAENV. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, U(i,i) is exactly zero; the matrix is +* singular and its inverse could not be computed. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IWS, J, JB, JJ, JP, LDWORK, LWKOPT, NB, + $ NBMIN, NN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. External Subroutines .. + EXTERNAL SGEMM, SGEMV, SSWAP, STRSM, STRTRI, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NB = ILAENV( 1, 'SGETRI', ' ', N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -3 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGETRI', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form inv(U). If INFO > 0 from STRTRI, then U is singular, +* and the inverse is not computed. +* + CALL STRTRI( 'Upper', 'Non-unit', N, A, LDA, INFO ) + IF( INFO.GT.0 ) + $ RETURN +* + NBMIN = 2 + LDWORK = N + IF( NB.GT.1 .AND. NB.LT.N ) THEN + IWS = MAX( LDWORK*NB, 1 ) + IF( LWORK.LT.IWS ) THEN + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'SGETRI', ' ', N, -1, -1, -1 ) ) + END IF + ELSE + IWS = N + END IF +* +* Solve the equation inv(A)*L = inv(U) for inv(A). +* + IF( NB.LT.NBMIN .OR. NB.GE.N ) THEN +* +* Use unblocked code. +* + DO 20 J = N, 1, -1 +* +* Copy current column of L to WORK and replace with zeros. +* + DO 10 I = J + 1, N + WORK( I ) = A( I, J ) + A( I, J ) = ZERO + 10 CONTINUE +* +* Compute current column of inv(A). +* + IF( J.LT.N ) + $ CALL SGEMV( 'No transpose', N, N-J, -ONE, A( 1, J+1 ), + $ LDA, WORK( J+1 ), 1, ONE, A( 1, J ), 1 ) + 20 CONTINUE + ELSE +* +* Use blocked code. +* + NN = ( ( N-1 ) / NB )*NB + 1 + DO 50 J = NN, 1, -NB + JB = MIN( NB, N-J+1 ) +* +* Copy current block column of L to WORK and replace with +* zeros. +* + DO 40 JJ = J, J + JB - 1 + DO 30 I = JJ + 1, N + WORK( I+( JJ-J )*LDWORK ) = A( I, JJ ) + A( I, JJ ) = ZERO + 30 CONTINUE + 40 CONTINUE +* +* Compute current block column of inv(A). +* + IF( J+JB.LE.N ) + $ CALL SGEMM( 'No transpose', 'No transpose', N, JB, + $ N-J-JB+1, -ONE, A( 1, J+JB ), LDA, + $ WORK( J+JB ), LDWORK, ONE, A( 1, J ), LDA ) + CALL STRSM( 'Right', 'Lower', 'No transpose', 'Unit', N, JB, + $ ONE, WORK( J ), LDWORK, A( 1, J ), LDA ) + 50 CONTINUE + END IF +* +* Apply column interchanges. +* + DO 60 J = N - 1, 1, -1 + JP = IPIV( J ) + IF( JP.NE.J ) + $ CALL SSWAP( N, A( 1, J ), 1, A( 1, JP ), 1 ) + 60 CONTINUE +* + WORK( 1 ) = IWS + RETURN +* +* End of SGETRI +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sgetrs.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sgetrs.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,149 @@ + SUBROUTINE SGETRS( TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* SGETRS solves a system of linear equations +* A * X = B or A' * X = B +* with a general N-by-N matrix A using the LU factorization computed +* by SGETRF. +* +* Arguments +* ========= +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A'* X = B (Transpose) +* = 'C': A'* X = B (Conjugate transpose = Transpose) +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* A (input) REAL array, dimension (LDA,N) +* The factors L and U from the factorization A = P*L*U +* as computed by SGETRF. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices from SGETRF; for 1<=i<=N, row i of the +* matrix was interchanged with row IPIV(i). +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SLASWP, STRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGETRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( NOTRAN ) THEN +* +* Solve A * X = B. +* +* Apply row interchanges to the right hand sides. +* + CALL SLASWP( NRHS, B, LDB, 1, N, IPIV, 1 ) +* +* Solve L*X = B, overwriting B with X. +* + CALL STRSM( 'Left', 'Lower', 'No transpose', 'Unit', N, NRHS, + $ ONE, A, LDA, B, LDB ) +* +* Solve U*X = B, overwriting B with X. +* + CALL STRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N, + $ NRHS, ONE, A, LDA, B, LDB ) + ELSE +* +* Solve A' * X = B. +* +* Solve U'*X = B, overwriting B with X. +* + CALL STRSM( 'Left', 'Upper', 'Transpose', 'Non-unit', N, NRHS, + $ ONE, A, LDA, B, LDB ) +* +* Solve L'*X = B, overwriting B with X. +* + CALL STRSM( 'Left', 'Lower', 'Transpose', 'Unit', N, NRHS, ONE, + $ A, LDA, B, LDB ) +* +* Apply row interchanges to the solution vectors. +* + CALL SLASWP( NRHS, B, LDB, 1, N, IPIV, -1 ) + END IF +* + RETURN +* +* End of SGETRS +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sggbak.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sggbak.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,220 @@ + SUBROUTINE SGGBAK( JOB, SIDE, N, ILO, IHI, LSCALE, RSCALE, M, V, + $ LDV, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER JOB, SIDE + INTEGER IHI, ILO, INFO, LDV, M, N +* .. +* .. Array Arguments .. + REAL LSCALE( * ), RSCALE( * ), V( LDV, * ) +* .. +* +* Purpose +* ======= +* +* SGGBAK forms the right or left eigenvectors of a real generalized +* eigenvalue problem A*x = lambda*B*x, by backward transformation on +* the computed eigenvectors of the balanced pair of matrices output by +* SGGBAL. +* +* Arguments +* ========= +* +* JOB (input) CHARACTER*1 +* Specifies the type of backward transformation required: +* = 'N': do nothing, return immediately; +* = 'P': do backward transformation for permutation only; +* = 'S': do backward transformation for scaling only; +* = 'B': do backward transformations for both permutation and +* scaling. +* JOB must be the same as the argument JOB supplied to SGGBAL. +* +* SIDE (input) CHARACTER*1 +* = 'R': V contains right eigenvectors; +* = 'L': V contains left eigenvectors. +* +* N (input) INTEGER +* The number of rows of the matrix V. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* The integers ILO and IHI determined by SGGBAL. +* 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +* +* LSCALE (input) REAL array, dimension (N) +* Details of the permutations and/or scaling factors applied +* to the left side of A and B, as returned by SGGBAL. +* +* RSCALE (input) REAL array, dimension (N) +* Details of the permutations and/or scaling factors applied +* to the right side of A and B, as returned by SGGBAL. +* +* M (input) INTEGER +* The number of columns of the matrix V. M >= 0. +* +* V (input/output) REAL array, dimension (LDV,M) +* On entry, the matrix of right or left eigenvectors to be +* transformed, as returned by STGEVC. +* On exit, V is overwritten by the transformed eigenvectors. +* +* LDV (input) INTEGER +* The leading dimension of the matrix V. LDV >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* See R.C. Ward, Balancing the generalized eigenvalue problem, +* SIAM J. Sci. Stat. Comp. 2 (1981), 141-152. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LEFTV, RIGHTV + INTEGER I, K +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SSCAL, SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + RIGHTV = LSAME( SIDE, 'R' ) + LEFTV = LSAME( SIDE, 'L' ) +* + INFO = 0 + IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND. + $ .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN + INFO = -1 + ELSE IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ILO.LT.1 ) THEN + INFO = -4 + ELSE IF( N.EQ.0 .AND. IHI.EQ.0 .AND. ILO.NE.1 ) THEN + INFO = -4 + ELSE IF( N.GT.0 .AND. ( IHI.LT.ILO .OR. IHI.GT.MAX( 1, N ) ) ) + $ THEN + INFO = -5 + ELSE IF( N.EQ.0 .AND. ILO.EQ.1 .AND. IHI.NE.0 ) THEN + INFO = -5 + ELSE IF( M.LT.0 ) THEN + INFO = -8 + ELSE IF( LDV.LT.MAX( 1, N ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGGBAK', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN + IF( M.EQ.0 ) + $ RETURN + IF( LSAME( JOB, 'N' ) ) + $ RETURN +* + IF( ILO.EQ.IHI ) + $ GO TO 30 +* +* Backward balance +* + IF( LSAME( JOB, 'S' ) .OR. LSAME( JOB, 'B' ) ) THEN +* +* Backward transformation on right eigenvectors +* + IF( RIGHTV ) THEN + DO 10 I = ILO, IHI + CALL SSCAL( M, RSCALE( I ), V( I, 1 ), LDV ) + 10 CONTINUE + END IF +* +* Backward transformation on left eigenvectors +* + IF( LEFTV ) THEN + DO 20 I = ILO, IHI + CALL SSCAL( M, LSCALE( I ), V( I, 1 ), LDV ) + 20 CONTINUE + END IF + END IF +* +* Backward permutation +* + 30 CONTINUE + IF( LSAME( JOB, 'P' ) .OR. LSAME( JOB, 'B' ) ) THEN +* +* Backward permutation on right eigenvectors +* + IF( RIGHTV ) THEN + IF( ILO.EQ.1 ) + $ GO TO 50 +* + DO 40 I = ILO - 1, 1, -1 + K = RSCALE( I ) + IF( K.EQ.I ) + $ GO TO 40 + CALL SSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 40 CONTINUE +* + 50 CONTINUE + IF( IHI.EQ.N ) + $ GO TO 70 + DO 60 I = IHI + 1, N + K = RSCALE( I ) + IF( K.EQ.I ) + $ GO TO 60 + CALL SSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 60 CONTINUE + END IF +* +* Backward permutation on left eigenvectors +* + 70 CONTINUE + IF( LEFTV ) THEN + IF( ILO.EQ.1 ) + $ GO TO 90 + DO 80 I = ILO - 1, 1, -1 + K = LSCALE( I ) + IF( K.EQ.I ) + $ GO TO 80 + CALL SSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 80 CONTINUE +* + 90 CONTINUE + IF( IHI.EQ.N ) + $ GO TO 110 + DO 100 I = IHI + 1, N + K = LSCALE( I ) + IF( K.EQ.I ) + $ GO TO 100 + CALL SSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 100 CONTINUE + END IF + END IF +* + 110 CONTINUE +* + RETURN +* +* End of SGGBAK +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sggbal.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sggbal.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,469 @@ + SUBROUTINE SGGBAL( JOB, N, A, LDA, B, LDB, ILO, IHI, LSCALE, + $ RSCALE, WORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER JOB + INTEGER IHI, ILO, INFO, LDA, LDB, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ), LSCALE( * ), + $ RSCALE( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SGGBAL balances a pair of general real matrices (A,B). This +* involves, first, permuting A and B by similarity transformations to +* isolate eigenvalues in the first 1 to ILO$-$1 and last IHI+1 to N +* elements on the diagonal; and second, applying a diagonal similarity +* transformation to rows and columns ILO to IHI to make the rows +* and columns as close in norm as possible. Both steps are optional. +* +* Balancing may reduce the 1-norm of the matrices, and improve the +* accuracy of the computed eigenvalues and/or eigenvectors in the +* generalized eigenvalue problem A*x = lambda*B*x. +* +* Arguments +* ========= +* +* JOB (input) CHARACTER*1 +* Specifies the operations to be performed on A and B: +* = 'N': none: simply set ILO = 1, IHI = N, LSCALE(I) = 1.0 +* and RSCALE(I) = 1.0 for i = 1,...,N. +* = 'P': permute only; +* = 'S': scale only; +* = 'B': both permute and scale. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the input matrix A. +* On exit, A is overwritten by the balanced matrix. +* If JOB = 'N', A is not referenced. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) REAL array, dimension (LDB,N) +* On entry, the input matrix B. +* On exit, B is overwritten by the balanced matrix. +* If JOB = 'N', B is not referenced. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* ILO (output) INTEGER +* IHI (output) INTEGER +* ILO and IHI are set to integers such that on exit +* A(i,j) = 0 and B(i,j) = 0 if i > j and +* j = 1,...,ILO-1 or i = IHI+1,...,N. +* If JOB = 'N' or 'S', ILO = 1 and IHI = N. +* +* LSCALE (output) REAL array, dimension (N) +* Details of the permutations and scaling factors applied +* to the left side of A and B. If P(j) is the index of the +* row interchanged with row j, and D(j) +* is the scaling factor applied to row j, then +* LSCALE(j) = P(j) for J = 1,...,ILO-1 +* = D(j) for J = ILO,...,IHI +* = P(j) for J = IHI+1,...,N. +* The order in which the interchanges are made is N to IHI+1, +* then 1 to ILO-1. +* +* RSCALE (output) REAL array, dimension (N) +* Details of the permutations and scaling factors applied +* to the right side of A and B. If P(j) is the index of the +* column interchanged with column j, and D(j) +* is the scaling factor applied to column j, then +* LSCALE(j) = P(j) for J = 1,...,ILO-1 +* = D(j) for J = ILO,...,IHI +* = P(j) for J = IHI+1,...,N. +* The order in which the interchanges are made is N to IHI+1, +* then 1 to ILO-1. +* +* WORK (workspace) REAL array, dimension (lwork) +* lwork must be at least max(1,6*N) when JOB = 'S' or 'B', and +* at least 1 when JOB = 'N' or 'P'. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* See R.C. WARD, Balancing the generalized eigenvalue problem, +* SIAM J. Sci. Stat. Comp. 2 (1981), 141-152. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, HALF, ONE + PARAMETER ( ZERO = 0.0E+0, HALF = 0.5E+0, ONE = 1.0E+0 ) + REAL THREE, SCLFAC + PARAMETER ( THREE = 3.0E+0, SCLFAC = 1.0E+1 ) +* .. +* .. Local Scalars .. + INTEGER I, ICAB, IFLOW, IP1, IR, IRAB, IT, J, JC, JP1, + $ K, KOUNT, L, LCAB, LM1, LRAB, LSFMAX, LSFMIN, + $ M, NR, NRP2 + REAL ALPHA, BASL, BETA, CAB, CMAX, COEF, COEF2, + $ COEF5, COR, EW, EWC, GAMMA, PGAMMA, RAB, SFMAX, + $ SFMIN, SUM, T, TA, TB, TC +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX + REAL SDOT, SLAMCH + EXTERNAL LSAME, ISAMAX, SDOT, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SSCAL, SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, INT, LOG10, MAX, MIN, REAL, SIGN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND. + $ .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGGBAL', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + ILO = 1 + IHI = N + RETURN + END IF +* + IF( N.EQ.1 ) THEN + ILO = 1 + IHI = N + LSCALE( 1 ) = ONE + RSCALE( 1 ) = ONE + RETURN + END IF +* + IF( LSAME( JOB, 'N' ) ) THEN + ILO = 1 + IHI = N + DO 10 I = 1, N + LSCALE( I ) = ONE + RSCALE( I ) = ONE + 10 CONTINUE + RETURN + END IF +* + K = 1 + L = N + IF( LSAME( JOB, 'S' ) ) + $ GO TO 190 +* + GO TO 30 +* +* Permute the matrices A and B to isolate the eigenvalues. +* +* Find row with one nonzero in columns 1 through L +* + 20 CONTINUE + L = LM1 + IF( L.NE.1 ) + $ GO TO 30 +* + RSCALE( 1 ) = ONE + LSCALE( 1 ) = ONE + GO TO 190 +* + 30 CONTINUE + LM1 = L - 1 + DO 80 I = L, 1, -1 + DO 40 J = 1, LM1 + JP1 = J + 1 + IF( A( I, J ).NE.ZERO .OR. B( I, J ).NE.ZERO ) + $ GO TO 50 + 40 CONTINUE + J = L + GO TO 70 +* + 50 CONTINUE + DO 60 J = JP1, L + IF( A( I, J ).NE.ZERO .OR. B( I, J ).NE.ZERO ) + $ GO TO 80 + 60 CONTINUE + J = JP1 - 1 +* + 70 CONTINUE + M = L + IFLOW = 1 + GO TO 160 + 80 CONTINUE + GO TO 100 +* +* Find column with one nonzero in rows K through N +* + 90 CONTINUE + K = K + 1 +* + 100 CONTINUE + DO 150 J = K, L + DO 110 I = K, LM1 + IP1 = I + 1 + IF( A( I, J ).NE.ZERO .OR. B( I, J ).NE.ZERO ) + $ GO TO 120 + 110 CONTINUE + I = L + GO TO 140 + 120 CONTINUE + DO 130 I = IP1, L + IF( A( I, J ).NE.ZERO .OR. B( I, J ).NE.ZERO ) + $ GO TO 150 + 130 CONTINUE + I = IP1 - 1 + 140 CONTINUE + M = K + IFLOW = 2 + GO TO 160 + 150 CONTINUE + GO TO 190 +* +* Permute rows M and I +* + 160 CONTINUE + LSCALE( M ) = I + IF( I.EQ.M ) + $ GO TO 170 + CALL SSWAP( N-K+1, A( I, K ), LDA, A( M, K ), LDA ) + CALL SSWAP( N-K+1, B( I, K ), LDB, B( M, K ), LDB ) +* +* Permute columns M and J +* + 170 CONTINUE + RSCALE( M ) = J + IF( J.EQ.M ) + $ GO TO 180 + CALL SSWAP( L, A( 1, J ), 1, A( 1, M ), 1 ) + CALL SSWAP( L, B( 1, J ), 1, B( 1, M ), 1 ) +* + 180 CONTINUE + GO TO ( 20, 90 )IFLOW +* + 190 CONTINUE + ILO = K + IHI = L +* + IF( LSAME( JOB, 'P' ) ) THEN + DO 195 I = ILO, IHI + LSCALE( I ) = ONE + RSCALE( I ) = ONE + 195 CONTINUE + RETURN + END IF +* + IF( ILO.EQ.IHI ) + $ RETURN +* +* Balance the submatrix in rows ILO to IHI. +* + NR = IHI - ILO + 1 + DO 200 I = ILO, IHI + RSCALE( I ) = ZERO + LSCALE( I ) = ZERO +* + WORK( I ) = ZERO + WORK( I+N ) = ZERO + WORK( I+2*N ) = ZERO + WORK( I+3*N ) = ZERO + WORK( I+4*N ) = ZERO + WORK( I+5*N ) = ZERO + 200 CONTINUE +* +* Compute right side vector in resulting linear equations +* + BASL = LOG10( SCLFAC ) + DO 240 I = ILO, IHI + DO 230 J = ILO, IHI + TB = B( I, J ) + TA = A( I, J ) + IF( TA.EQ.ZERO ) + $ GO TO 210 + TA = LOG10( ABS( TA ) ) / BASL + 210 CONTINUE + IF( TB.EQ.ZERO ) + $ GO TO 220 + TB = LOG10( ABS( TB ) ) / BASL + 220 CONTINUE + WORK( I+4*N ) = WORK( I+4*N ) - TA - TB + WORK( J+5*N ) = WORK( J+5*N ) - TA - TB + 230 CONTINUE + 240 CONTINUE +* + COEF = ONE / REAL( 2*NR ) + COEF2 = COEF*COEF + COEF5 = HALF*COEF2 + NRP2 = NR + 2 + BETA = ZERO + IT = 1 +* +* Start generalized conjugate gradient iteration +* + 250 CONTINUE +* + GAMMA = SDOT( NR, WORK( ILO+4*N ), 1, WORK( ILO+4*N ), 1 ) + + $ SDOT( NR, WORK( ILO+5*N ), 1, WORK( ILO+5*N ), 1 ) +* + EW = ZERO + EWC = ZERO + DO 260 I = ILO, IHI + EW = EW + WORK( I+4*N ) + EWC = EWC + WORK( I+5*N ) + 260 CONTINUE +* + GAMMA = COEF*GAMMA - COEF2*( EW**2+EWC**2 ) - COEF5*( EW-EWC )**2 + IF( GAMMA.EQ.ZERO ) + $ GO TO 350 + IF( IT.NE.1 ) + $ BETA = GAMMA / PGAMMA + T = COEF5*( EWC-THREE*EW ) + TC = COEF5*( EW-THREE*EWC ) +* + CALL SSCAL( NR, BETA, WORK( ILO ), 1 ) + CALL SSCAL( NR, BETA, WORK( ILO+N ), 1 ) +* + CALL SAXPY( NR, COEF, WORK( ILO+4*N ), 1, WORK( ILO+N ), 1 ) + CALL SAXPY( NR, COEF, WORK( ILO+5*N ), 1, WORK( ILO ), 1 ) +* + DO 270 I = ILO, IHI + WORK( I ) = WORK( I ) + TC + WORK( I+N ) = WORK( I+N ) + T + 270 CONTINUE +* +* Apply matrix to vector +* + DO 300 I = ILO, IHI + KOUNT = 0 + SUM = ZERO + DO 290 J = ILO, IHI + IF( A( I, J ).EQ.ZERO ) + $ GO TO 280 + KOUNT = KOUNT + 1 + SUM = SUM + WORK( J ) + 280 CONTINUE + IF( B( I, J ).EQ.ZERO ) + $ GO TO 290 + KOUNT = KOUNT + 1 + SUM = SUM + WORK( J ) + 290 CONTINUE + WORK( I+2*N ) = REAL( KOUNT )*WORK( I+N ) + SUM + 300 CONTINUE +* + DO 330 J = ILO, IHI + KOUNT = 0 + SUM = ZERO + DO 320 I = ILO, IHI + IF( A( I, J ).EQ.ZERO ) + $ GO TO 310 + KOUNT = KOUNT + 1 + SUM = SUM + WORK( I+N ) + 310 CONTINUE + IF( B( I, J ).EQ.ZERO ) + $ GO TO 320 + KOUNT = KOUNT + 1 + SUM = SUM + WORK( I+N ) + 320 CONTINUE + WORK( J+3*N ) = REAL( KOUNT )*WORK( J ) + SUM + 330 CONTINUE +* + SUM = SDOT( NR, WORK( ILO+N ), 1, WORK( ILO+2*N ), 1 ) + + $ SDOT( NR, WORK( ILO ), 1, WORK( ILO+3*N ), 1 ) + ALPHA = GAMMA / SUM +* +* Determine correction to current iteration +* + CMAX = ZERO + DO 340 I = ILO, IHI + COR = ALPHA*WORK( I+N ) + IF( ABS( COR ).GT.CMAX ) + $ CMAX = ABS( COR ) + LSCALE( I ) = LSCALE( I ) + COR + COR = ALPHA*WORK( I ) + IF( ABS( COR ).GT.CMAX ) + $ CMAX = ABS( COR ) + RSCALE( I ) = RSCALE( I ) + COR + 340 CONTINUE + IF( CMAX.LT.HALF ) + $ GO TO 350 +* + CALL SAXPY( NR, -ALPHA, WORK( ILO+2*N ), 1, WORK( ILO+4*N ), 1 ) + CALL SAXPY( NR, -ALPHA, WORK( ILO+3*N ), 1, WORK( ILO+5*N ), 1 ) +* + PGAMMA = GAMMA + IT = IT + 1 + IF( IT.LE.NRP2 ) + $ GO TO 250 +* +* End generalized conjugate gradient iteration +* + 350 CONTINUE + SFMIN = SLAMCH( 'S' ) + SFMAX = ONE / SFMIN + LSFMIN = INT( LOG10( SFMIN ) / BASL+ONE ) + LSFMAX = INT( LOG10( SFMAX ) / BASL ) + DO 360 I = ILO, IHI + IRAB = ISAMAX( N-ILO+1, A( I, ILO ), LDA ) + RAB = ABS( A( I, IRAB+ILO-1 ) ) + IRAB = ISAMAX( N-ILO+1, B( I, ILO ), LDB ) + RAB = MAX( RAB, ABS( B( I, IRAB+ILO-1 ) ) ) + LRAB = INT( LOG10( RAB+SFMIN ) / BASL+ONE ) + IR = LSCALE( I ) + SIGN( HALF, LSCALE( I ) ) + IR = MIN( MAX( IR, LSFMIN ), LSFMAX, LSFMAX-LRAB ) + LSCALE( I ) = SCLFAC**IR + ICAB = ISAMAX( IHI, A( 1, I ), 1 ) + CAB = ABS( A( ICAB, I ) ) + ICAB = ISAMAX( IHI, B( 1, I ), 1 ) + CAB = MAX( CAB, ABS( B( ICAB, I ) ) ) + LCAB = INT( LOG10( CAB+SFMIN ) / BASL+ONE ) + JC = RSCALE( I ) + SIGN( HALF, RSCALE( I ) ) + JC = MIN( MAX( JC, LSFMIN ), LSFMAX, LSFMAX-LCAB ) + RSCALE( I ) = SCLFAC**JC + 360 CONTINUE +* +* Row scaling of matrices A and B +* + DO 370 I = ILO, IHI + CALL SSCAL( N-ILO+1, LSCALE( I ), A( I, ILO ), LDA ) + CALL SSCAL( N-ILO+1, LSCALE( I ), B( I, ILO ), LDB ) + 370 CONTINUE +* +* Column scaling of matrices A and B +* + DO 380 J = ILO, IHI + CALL SSCAL( IHI, RSCALE( J ), A( 1, J ), 1 ) + CALL SSCAL( IHI, RSCALE( J ), B( 1, J ), 1 ) + 380 CONTINUE +* + RETURN +* +* End of SGGBAL +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sgghrd.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sgghrd.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,264 @@ + SUBROUTINE SGGHRD( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q, + $ LDQ, Z, LDZ, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER COMPQ, COMPZ + INTEGER IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ), Q( LDQ, * ), + $ Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* SGGHRD reduces a pair of real matrices (A,B) to generalized upper +* Hessenberg form using orthogonal transformations, where A is a +* general matrix and B is upper triangular. The form of the +* generalized eigenvalue problem is +* A*x = lambda*B*x, +* and B is typically made upper triangular by computing its QR +* factorization and moving the orthogonal matrix Q to the left side +* of the equation. +* +* This subroutine simultaneously reduces A to a Hessenberg matrix H: +* Q**T*A*Z = H +* and transforms B to another upper triangular matrix T: +* Q**T*B*Z = T +* in order to reduce the problem to its standard form +* H*y = lambda*T*y +* where y = Z**T*x. +* +* The orthogonal matrices Q and Z are determined as products of Givens +* rotations. They may either be formed explicitly, or they may be +* postmultiplied into input matrices Q1 and Z1, so that +* +* Q1 * A * Z1**T = (Q1*Q) * H * (Z1*Z)**T +* +* Q1 * B * Z1**T = (Q1*Q) * T * (Z1*Z)**T +* +* If Q1 is the orthogonal matrix from the QR factorization of B in the +* original equation A*x = lambda*B*x, then SGGHRD reduces the original +* problem to generalized Hessenberg form. +* +* Arguments +* ========= +* +* COMPQ (input) CHARACTER*1 +* = 'N': do not compute Q; +* = 'I': Q is initialized to the unit matrix, and the +* orthogonal matrix Q is returned; +* = 'V': Q must contain an orthogonal matrix Q1 on entry, +* and the product Q1*Q is returned. +* +* COMPZ (input) CHARACTER*1 +* = 'N': do not compute Z; +* = 'I': Z is initialized to the unit matrix, and the +* orthogonal matrix Z is returned; +* = 'V': Z must contain an orthogonal matrix Z1 on entry, +* and the product Z1*Z is returned. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* ILO and IHI mark the rows and columns of A which are to be +* reduced. It is assumed that A is already upper triangular +* in rows and columns 1:ILO-1 and IHI+1:N. ILO and IHI are +* normally set by a previous call to SGGBAL; otherwise they +* should be set to 1 and N respectively. +* 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +* +* A (input/output) REAL array, dimension (LDA, N) +* On entry, the N-by-N general matrix to be reduced. +* On exit, the upper triangle and the first subdiagonal of A +* are overwritten with the upper Hessenberg matrix H, and the +* rest is set to zero. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) REAL array, dimension (LDB, N) +* On entry, the N-by-N upper triangular matrix B. +* On exit, the upper triangular matrix T = Q**T B Z. The +* elements below the diagonal are set to zero. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* Q (input/output) REAL array, dimension (LDQ, N) +* On entry, if COMPQ = 'V', the orthogonal matrix Q1, +* typically from the QR factorization of B. +* On exit, if COMPQ='I', the orthogonal matrix Q, and if +* COMPQ = 'V', the product Q1*Q. +* Not referenced if COMPQ='N'. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. +* LDQ >= N if COMPQ='V' or 'I'; LDQ >= 1 otherwise. +* +* Z (input/output) REAL array, dimension (LDZ, N) +* On entry, if COMPZ = 'V', the orthogonal matrix Z1. +* On exit, if COMPZ='I', the orthogonal matrix Z, and if +* COMPZ = 'V', the product Z1*Z. +* Not referenced if COMPZ='N'. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. +* LDZ >= N if COMPZ='V' or 'I'; LDZ >= 1 otherwise. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* This routine reduces A to Hessenberg and B to triangular form by +* an unblocked reduction, as described in _Matrix_Computations_, +* by Golub and Van Loan (Johns Hopkins Press.) +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL ILQ, ILZ + INTEGER ICOMPQ, ICOMPZ, JCOL, JROW + REAL C, S, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SLARTG, SLASET, SROT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Decode COMPQ +* + IF( LSAME( COMPQ, 'N' ) ) THEN + ILQ = .FALSE. + ICOMPQ = 1 + ELSE IF( LSAME( COMPQ, 'V' ) ) THEN + ILQ = .TRUE. + ICOMPQ = 2 + ELSE IF( LSAME( COMPQ, 'I' ) ) THEN + ILQ = .TRUE. + ICOMPQ = 3 + ELSE + ICOMPQ = 0 + END IF +* +* Decode COMPZ +* + IF( LSAME( COMPZ, 'N' ) ) THEN + ILZ = .FALSE. + ICOMPZ = 1 + ELSE IF( LSAME( COMPZ, 'V' ) ) THEN + ILZ = .TRUE. + ICOMPZ = 2 + ELSE IF( LSAME( COMPZ, 'I' ) ) THEN + ILZ = .TRUE. + ICOMPZ = 3 + ELSE + ICOMPZ = 0 + END IF +* +* Test the input parameters. +* + INFO = 0 + IF( ICOMPQ.LE.0 ) THEN + INFO = -1 + ELSE IF( ICOMPZ.LE.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ILO.LT.1 ) THEN + INFO = -4 + ELSE IF( IHI.GT.N .OR. IHI.LT.ILO-1 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( ( ILQ .AND. LDQ.LT.N ) .OR. LDQ.LT.1 ) THEN + INFO = -11 + ELSE IF( ( ILZ .AND. LDZ.LT.N ) .OR. LDZ.LT.1 ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGGHRD', -INFO ) + RETURN + END IF +* +* Initialize Q and Z if desired. +* + IF( ICOMPQ.EQ.3 ) + $ CALL SLASET( 'Full', N, N, ZERO, ONE, Q, LDQ ) + IF( ICOMPZ.EQ.3 ) + $ CALL SLASET( 'Full', N, N, ZERO, ONE, Z, LDZ ) +* +* Quick return if possible +* + IF( N.LE.1 ) + $ RETURN +* +* Zero out lower triangle of B +* + DO 20 JCOL = 1, N - 1 + DO 10 JROW = JCOL + 1, N + B( JROW, JCOL ) = ZERO + 10 CONTINUE + 20 CONTINUE +* +* Reduce A and B +* + DO 40 JCOL = ILO, IHI - 2 +* + DO 30 JROW = IHI, JCOL + 2, -1 +* +* Step 1: rotate rows JROW-1, JROW to kill A(JROW,JCOL) +* + TEMP = A( JROW-1, JCOL ) + CALL SLARTG( TEMP, A( JROW, JCOL ), C, S, + $ A( JROW-1, JCOL ) ) + A( JROW, JCOL ) = ZERO + CALL SROT( N-JCOL, A( JROW-1, JCOL+1 ), LDA, + $ A( JROW, JCOL+1 ), LDA, C, S ) + CALL SROT( N+2-JROW, B( JROW-1, JROW-1 ), LDB, + $ B( JROW, JROW-1 ), LDB, C, S ) + IF( ILQ ) + $ CALL SROT( N, Q( 1, JROW-1 ), 1, Q( 1, JROW ), 1, C, S ) +* +* Step 2: rotate columns JROW, JROW-1 to kill B(JROW,JROW-1) +* + TEMP = B( JROW, JROW ) + CALL SLARTG( TEMP, B( JROW, JROW-1 ), C, S, + $ B( JROW, JROW ) ) + B( JROW, JROW-1 ) = ZERO + CALL SROT( IHI, A( 1, JROW ), 1, A( 1, JROW-1 ), 1, C, S ) + CALL SROT( JROW-1, B( 1, JROW ), 1, B( 1, JROW-1 ), 1, C, + $ S ) + IF( ILZ ) + $ CALL SROT( N, Z( 1, JROW ), 1, Z( 1, JROW-1 ), 1, C, S ) + 30 CONTINUE + 40 CONTINUE +* + RETURN +* +* End of SGGHRD +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sgtsv.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sgtsv.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,262 @@ + SUBROUTINE SGTSV( N, NRHS, DL, D, DU, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + REAL B( LDB, * ), D( * ), DL( * ), DU( * ) +* .. +* +* Purpose +* ======= +* +* SGTSV solves the equation +* +* A*X = B, +* +* where A is an n by n tridiagonal matrix, by Gaussian elimination with +* partial pivoting. +* +* Note that the equation A'*X = B may be solved by interchanging the +* order of the arguments DU and DL. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* DL (input/output) REAL array, dimension (N-1) +* On entry, DL must contain the (n-1) sub-diagonal elements of +* A. +* +* On exit, DL is overwritten by the (n-2) elements of the +* second super-diagonal of the upper triangular matrix U from +* the LU factorization of A, in DL(1), ..., DL(n-2). +* +* D (input/output) REAL array, dimension (N) +* On entry, D must contain the diagonal elements of A. +* +* On exit, D is overwritten by the n diagonal elements of U. +* +* DU (input/output) REAL array, dimension (N-1) +* On entry, DU must contain the (n-1) super-diagonal elements +* of A. +* +* On exit, DU is overwritten by the (n-1) elements of the first +* super-diagonal of U. +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the N by NRHS matrix of right hand side matrix B. +* On exit, if INFO = 0, the N by NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, U(i,i) is exactly zero, and the solution +* has not been computed. The factorization has not been +* completed unless i = N. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL FACT, TEMP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -2 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGTSV ', -INFO ) + RETURN + END IF +* + IF( N.EQ.0 ) + $ RETURN +* + IF( NRHS.EQ.1 ) THEN + DO 10 I = 1, N - 2 + IF( ABS( D( I ) ).GE.ABS( DL( I ) ) ) THEN +* +* No row interchange required +* + IF( D( I ).NE.ZERO ) THEN + FACT = DL( I ) / D( I ) + D( I+1 ) = D( I+1 ) - FACT*DU( I ) + B( I+1, 1 ) = B( I+1, 1 ) - FACT*B( I, 1 ) + ELSE + INFO = I + RETURN + END IF + DL( I ) = ZERO + ELSE +* +* Interchange rows I and I+1 +* + FACT = D( I ) / DL( I ) + D( I ) = DL( I ) + TEMP = D( I+1 ) + D( I+1 ) = DU( I ) - FACT*TEMP + DL( I ) = DU( I+1 ) + DU( I+1 ) = -FACT*DL( I ) + DU( I ) = TEMP + TEMP = B( I, 1 ) + B( I, 1 ) = B( I+1, 1 ) + B( I+1, 1 ) = TEMP - FACT*B( I+1, 1 ) + END IF + 10 CONTINUE + IF( N.GT.1 ) THEN + I = N - 1 + IF( ABS( D( I ) ).GE.ABS( DL( I ) ) ) THEN + IF( D( I ).NE.ZERO ) THEN + FACT = DL( I ) / D( I ) + D( I+1 ) = D( I+1 ) - FACT*DU( I ) + B( I+1, 1 ) = B( I+1, 1 ) - FACT*B( I, 1 ) + ELSE + INFO = I + RETURN + END IF + ELSE + FACT = D( I ) / DL( I ) + D( I ) = DL( I ) + TEMP = D( I+1 ) + D( I+1 ) = DU( I ) - FACT*TEMP + DU( I ) = TEMP + TEMP = B( I, 1 ) + B( I, 1 ) = B( I+1, 1 ) + B( I+1, 1 ) = TEMP - FACT*B( I+1, 1 ) + END IF + END IF + IF( D( N ).EQ.ZERO ) THEN + INFO = N + RETURN + END IF + ELSE + DO 40 I = 1, N - 2 + IF( ABS( D( I ) ).GE.ABS( DL( I ) ) ) THEN +* +* No row interchange required +* + IF( D( I ).NE.ZERO ) THEN + FACT = DL( I ) / D( I ) + D( I+1 ) = D( I+1 ) - FACT*DU( I ) + DO 20 J = 1, NRHS + B( I+1, J ) = B( I+1, J ) - FACT*B( I, J ) + 20 CONTINUE + ELSE + INFO = I + RETURN + END IF + DL( I ) = ZERO + ELSE +* +* Interchange rows I and I+1 +* + FACT = D( I ) / DL( I ) + D( I ) = DL( I ) + TEMP = D( I+1 ) + D( I+1 ) = DU( I ) - FACT*TEMP + DL( I ) = DU( I+1 ) + DU( I+1 ) = -FACT*DL( I ) + DU( I ) = TEMP + DO 30 J = 1, NRHS + TEMP = B( I, J ) + B( I, J ) = B( I+1, J ) + B( I+1, J ) = TEMP - FACT*B( I+1, J ) + 30 CONTINUE + END IF + 40 CONTINUE + IF( N.GT.1 ) THEN + I = N - 1 + IF( ABS( D( I ) ).GE.ABS( DL( I ) ) ) THEN + IF( D( I ).NE.ZERO ) THEN + FACT = DL( I ) / D( I ) + D( I+1 ) = D( I+1 ) - FACT*DU( I ) + DO 50 J = 1, NRHS + B( I+1, J ) = B( I+1, J ) - FACT*B( I, J ) + 50 CONTINUE + ELSE + INFO = I + RETURN + END IF + ELSE + FACT = D( I ) / DL( I ) + D( I ) = DL( I ) + TEMP = D( I+1 ) + D( I+1 ) = DU( I ) - FACT*TEMP + DU( I ) = TEMP + DO 60 J = 1, NRHS + TEMP = B( I, J ) + B( I, J ) = B( I+1, J ) + B( I+1, J ) = TEMP - FACT*B( I+1, J ) + 60 CONTINUE + END IF + END IF + IF( D( N ).EQ.ZERO ) THEN + INFO = N + RETURN + END IF + END IF +* +* Back solve with the matrix U from the factorization. +* + IF( NRHS.LE.2 ) THEN + J = 1 + 70 CONTINUE + B( N, J ) = B( N, J ) / D( N ) + IF( N.GT.1 ) + $ B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) / D( N-1 ) + DO 80 I = N - 2, 1, -1 + B( I, J ) = ( B( I, J )-DU( I )*B( I+1, J )-DL( I )* + $ B( I+2, J ) ) / D( I ) + 80 CONTINUE + IF( J.LT.NRHS ) THEN + J = J + 1 + GO TO 70 + END IF + ELSE + DO 100 J = 1, NRHS + B( N, J ) = B( N, J ) / D( N ) + IF( N.GT.1 ) + $ B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) / + $ D( N-1 ) + DO 90 I = N - 2, 1, -1 + B( I, J ) = ( B( I, J )-DU( I )*B( I+1, J )-DL( I )* + $ B( I+2, J ) ) / D( I ) + 90 CONTINUE + 100 CONTINUE + END IF +* + RETURN +* +* End of SGTSV +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sgttrf.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sgttrf.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,168 @@ + SUBROUTINE SGTTRF( N, DL, D, DU, DU2, IPIV, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL D( * ), DL( * ), DU( * ), DU2( * ) +* .. +* +* Purpose +* ======= +* +* SGTTRF computes an LU factorization of a real tridiagonal matrix A +* using elimination with partial pivoting and row interchanges. +* +* The factorization has the form +* A = L * U +* where L is a product of permutation and unit lower bidiagonal +* matrices and U is upper triangular with nonzeros in only the main +* diagonal and first two superdiagonals. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. +* +* DL (input/output) REAL array, dimension (N-1) +* On entry, DL must contain the (n-1) sub-diagonal elements of +* A. +* +* On exit, DL is overwritten by the (n-1) multipliers that +* define the matrix L from the LU factorization of A. +* +* D (input/output) REAL array, dimension (N) +* On entry, D must contain the diagonal elements of A. +* +* On exit, D is overwritten by the n diagonal elements of the +* upper triangular matrix U from the LU factorization of A. +* +* DU (input/output) REAL array, dimension (N-1) +* On entry, DU must contain the (n-1) super-diagonal elements +* of A. +* +* On exit, DU is overwritten by the (n-1) elements of the first +* super-diagonal of U. +* +* DU2 (output) REAL array, dimension (N-2) +* On exit, DU2 is overwritten by the (n-2) elements of the +* second super-diagonal of U. +* +* IPIV (output) INTEGER array, dimension (N) +* The pivot indices; for 1 <= i <= n, row i of the matrix was +* interchanged with row IPIV(i). IPIV(i) will always be either +* i or i+1; IPIV(i) = i indicates a row interchange was not +* required. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* > 0: if INFO = k, U(k,k) is exactly zero. The factorization +* has been completed, but the factor U is exactly +* singular, and division by zero will occur if it is used +* to solve a system of equations. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I + REAL FACT, TEMP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + CALL XERBLA( 'SGTTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Initialize IPIV(i) = i and DU2(I) = 0 +* + DO 10 I = 1, N + IPIV( I ) = I + 10 CONTINUE + DO 20 I = 1, N - 2 + DU2( I ) = ZERO + 20 CONTINUE +* + DO 30 I = 1, N - 2 + IF( ABS( D( I ) ).GE.ABS( DL( I ) ) ) THEN +* +* No row interchange required, eliminate DL(I) +* + IF( D( I ).NE.ZERO ) THEN + FACT = DL( I ) / D( I ) + DL( I ) = FACT + D( I+1 ) = D( I+1 ) - FACT*DU( I ) + END IF + ELSE +* +* Interchange rows I and I+1, eliminate DL(I) +* + FACT = D( I ) / DL( I ) + D( I ) = DL( I ) + DL( I ) = FACT + TEMP = DU( I ) + DU( I ) = D( I+1 ) + D( I+1 ) = TEMP - FACT*D( I+1 ) + DU2( I ) = DU( I+1 ) + DU( I+1 ) = -FACT*DU( I+1 ) + IPIV( I ) = I + 1 + END IF + 30 CONTINUE + IF( N.GT.1 ) THEN + I = N - 1 + IF( ABS( D( I ) ).GE.ABS( DL( I ) ) ) THEN + IF( D( I ).NE.ZERO ) THEN + FACT = DL( I ) / D( I ) + DL( I ) = FACT + D( I+1 ) = D( I+1 ) - FACT*DU( I ) + END IF + ELSE + FACT = D( I ) / DL( I ) + D( I ) = DL( I ) + DL( I ) = FACT + TEMP = DU( I ) + DU( I ) = D( I+1 ) + D( I+1 ) = TEMP - FACT*D( I+1 ) + IPIV( I ) = I + 1 + END IF + END IF +* +* Check for a zero on the diagonal of U. +* + DO 40 I = 1, N + IF( D( I ).EQ.ZERO ) THEN + INFO = I + GO TO 50 + END IF + 40 CONTINUE + 50 CONTINUE +* + RETURN +* +* End of SGTTRF +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sgttrs.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sgttrs.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,140 @@ + SUBROUTINE SGTTRS( TRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB, + $ INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL B( LDB, * ), D( * ), DL( * ), DU( * ), DU2( * ) +* .. +* +* Purpose +* ======= +* +* SGTTRS solves one of the systems of equations +* A*X = B or A'*X = B, +* with a tridiagonal matrix A using the LU factorization computed +* by SGTTRF. +* +* Arguments +* ========= +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations. +* = 'N': A * X = B (No transpose) +* = 'T': A'* X = B (Transpose) +* = 'C': A'* X = B (Conjugate transpose = Transpose) +* +* N (input) INTEGER +* The order of the matrix A. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* DL (input) REAL array, dimension (N-1) +* The (n-1) multipliers that define the matrix L from the +* LU factorization of A. +* +* D (input) REAL array, dimension (N) +* The n diagonal elements of the upper triangular matrix U from +* the LU factorization of A. +* +* DU (input) REAL array, dimension (N-1) +* The (n-1) elements of the first super-diagonal of U. +* +* DU2 (input) REAL array, dimension (N-2) +* The (n-2) elements of the second super-diagonal of U. +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices; for 1 <= i <= n, row i of the matrix was +* interchanged with row IPIV(i). IPIV(i) will always be either +* i or i+1; IPIV(i) = i indicates a row interchange was not +* required. +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the matrix of right hand side vectors B. +* On exit, B is overwritten by the solution vectors X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL NOTRAN + INTEGER ITRANS, J, JB, NB +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. External Subroutines .. + EXTERNAL SGTTS2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOTRAN = ( TRANS.EQ.'N' .OR. TRANS.EQ.'n' ) + IF( .NOT.NOTRAN .AND. .NOT.( TRANS.EQ.'T' .OR. TRANS.EQ. + $ 't' ) .AND. .NOT.( TRANS.EQ.'C' .OR. TRANS.EQ.'c' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( N, 1 ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGTTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* +* Decode TRANS +* + IF( NOTRAN ) THEN + ITRANS = 0 + ELSE + ITRANS = 1 + END IF +* +* Determine the number of right-hand sides to solve at a time. +* + IF( NRHS.EQ.1 ) THEN + NB = 1 + ELSE + NB = MAX( 1, ILAENV( 1, 'SGTTRS', TRANS, N, NRHS, -1, -1 ) ) + END IF +* + IF( NB.GE.NRHS ) THEN + CALL SGTTS2( ITRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB ) + ELSE + DO 10 J = 1, NRHS, NB + JB = MIN( NRHS-J+1, NB ) + CALL SGTTS2( ITRANS, N, JB, DL, D, DU, DU2, IPIV, B( 1, J ), + $ LDB ) + 10 CONTINUE + END IF +* +* End of SGTTRS +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sgtts2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sgtts2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,196 @@ + SUBROUTINE SGTTS2( ITRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER ITRANS, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL B( LDB, * ), D( * ), DL( * ), DU( * ), DU2( * ) +* .. +* +* Purpose +* ======= +* +* SGTTS2 solves one of the systems of equations +* A*X = B or A'*X = B, +* with a tridiagonal matrix A using the LU factorization computed +* by SGTTRF. +* +* Arguments +* ========= +* +* ITRANS (input) INTEGER +* Specifies the form of the system of equations. +* = 0: A * X = B (No transpose) +* = 1: A'* X = B (Transpose) +* = 2: A'* X = B (Conjugate transpose = Transpose) +* +* N (input) INTEGER +* The order of the matrix A. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* DL (input) REAL array, dimension (N-1) +* The (n-1) multipliers that define the matrix L from the +* LU factorization of A. +* +* D (input) REAL array, dimension (N) +* The n diagonal elements of the upper triangular matrix U from +* the LU factorization of A. +* +* DU (input) REAL array, dimension (N-1) +* The (n-1) elements of the first super-diagonal of U. +* +* DU2 (input) REAL array, dimension (N-2) +* The (n-2) elements of the second super-diagonal of U. +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices; for 1 <= i <= n, row i of the matrix was +* interchanged with row IPIV(i). IPIV(i) will always be either +* i or i+1; IPIV(i) = i indicates a row interchange was not +* required. +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the matrix of right hand side vectors B. +* On exit, B is overwritten by the solution vectors X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IP, J + REAL TEMP +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( ITRANS.EQ.0 ) THEN +* +* Solve A*X = B using the LU factorization of A, +* overwriting each right hand side vector with its solution. +* + IF( NRHS.LE.1 ) THEN + J = 1 + 10 CONTINUE +* +* Solve L*x = b. +* + DO 20 I = 1, N - 1 + IP = IPIV( I ) + TEMP = B( I+1-IP+I, J ) - DL( I )*B( IP, J ) + B( I, J ) = B( IP, J ) + B( I+1, J ) = TEMP + 20 CONTINUE +* +* Solve U*x = b. +* + B( N, J ) = B( N, J ) / D( N ) + IF( N.GT.1 ) + $ B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) / + $ D( N-1 ) + DO 30 I = N - 2, 1, -1 + B( I, J ) = ( B( I, J )-DU( I )*B( I+1, J )-DU2( I )* + $ B( I+2, J ) ) / D( I ) + 30 CONTINUE + IF( J.LT.NRHS ) THEN + J = J + 1 + GO TO 10 + END IF + ELSE + DO 60 J = 1, NRHS +* +* Solve L*x = b. +* + DO 40 I = 1, N - 1 + IF( IPIV( I ).EQ.I ) THEN + B( I+1, J ) = B( I+1, J ) - DL( I )*B( I, J ) + ELSE + TEMP = B( I, J ) + B( I, J ) = B( I+1, J ) + B( I+1, J ) = TEMP - DL( I )*B( I, J ) + END IF + 40 CONTINUE +* +* Solve U*x = b. +* + B( N, J ) = B( N, J ) / D( N ) + IF( N.GT.1 ) + $ B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) / + $ D( N-1 ) + DO 50 I = N - 2, 1, -1 + B( I, J ) = ( B( I, J )-DU( I )*B( I+1, J )-DU2( I )* + $ B( I+2, J ) ) / D( I ) + 50 CONTINUE + 60 CONTINUE + END IF + ELSE +* +* Solve A' * X = B. +* + IF( NRHS.LE.1 ) THEN +* +* Solve U'*x = b. +* + J = 1 + 70 CONTINUE + B( 1, J ) = B( 1, J ) / D( 1 ) + IF( N.GT.1 ) + $ B( 2, J ) = ( B( 2, J )-DU( 1 )*B( 1, J ) ) / D( 2 ) + DO 80 I = 3, N + B( I, J ) = ( B( I, J )-DU( I-1 )*B( I-1, J )-DU2( I-2 )* + $ B( I-2, J ) ) / D( I ) + 80 CONTINUE +* +* Solve L'*x = b. +* + DO 90 I = N - 1, 1, -1 + IP = IPIV( I ) + TEMP = B( I, J ) - DL( I )*B( I+1, J ) + B( I, J ) = B( IP, J ) + B( IP, J ) = TEMP + 90 CONTINUE + IF( J.LT.NRHS ) THEN + J = J + 1 + GO TO 70 + END IF +* + ELSE + DO 120 J = 1, NRHS +* +* Solve U'*x = b. +* + B( 1, J ) = B( 1, J ) / D( 1 ) + IF( N.GT.1 ) + $ B( 2, J ) = ( B( 2, J )-DU( 1 )*B( 1, J ) ) / D( 2 ) + DO 100 I = 3, N + B( I, J ) = ( B( I, J )-DU( I-1 )*B( I-1, J )- + $ DU2( I-2 )*B( I-2, J ) ) / D( I ) + 100 CONTINUE + DO 110 I = N - 1, 1, -1 + IF( IPIV( I ).EQ.I ) THEN + B( I, J ) = B( I, J ) - DL( I )*B( I+1, J ) + ELSE + TEMP = B( I+1, J ) + B( I+1, J ) = B( I, J ) - DL( I )*TEMP + B( I, J ) = TEMP + END IF + 110 CONTINUE + 120 CONTINUE + END IF + END IF +* +* End of SGTTS2 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/shgeqz.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/shgeqz.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,1243 @@ + SUBROUTINE SHGEQZ( JOB, COMPQ, COMPZ, N, ILO, IHI, H, LDH, T, LDT, + $ ALPHAR, ALPHAI, BETA, Q, LDQ, Z, LDZ, WORK, + $ LWORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER COMPQ, COMPZ, JOB + INTEGER IHI, ILO, INFO, LDH, LDQ, LDT, LDZ, LWORK, N +* .. +* .. Array Arguments .. + REAL ALPHAI( * ), ALPHAR( * ), BETA( * ), + $ H( LDH, * ), Q( LDQ, * ), T( LDT, * ), + $ WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* SHGEQZ computes the eigenvalues of a real matrix pair (H,T), +* where H is an upper Hessenberg matrix and T is upper triangular, +* using the double-shift QZ method. +* Matrix pairs of this type are produced by the reduction to +* generalized upper Hessenberg form of a real matrix pair (A,B): +* +* A = Q1*H*Z1**T, B = Q1*T*Z1**T, +* +* as computed by SGGHRD. +* +* If JOB='S', then the Hessenberg-triangular pair (H,T) is +* also reduced to generalized Schur form, +* +* H = Q*S*Z**T, T = Q*P*Z**T, +* +* where Q and Z are orthogonal matrices, P is an upper triangular +* matrix, and S is a quasi-triangular matrix with 1-by-1 and 2-by-2 +* diagonal blocks. +* +* The 1-by-1 blocks correspond to real eigenvalues of the matrix pair +* (H,T) and the 2-by-2 blocks correspond to complex conjugate pairs of +* eigenvalues. +* +* Additionally, the 2-by-2 upper triangular diagonal blocks of P +* corresponding to 2-by-2 blocks of S are reduced to positive diagonal +* form, i.e., if S(j+1,j) is non-zero, then P(j+1,j) = P(j,j+1) = 0, +* P(j,j) > 0, and P(j+1,j+1) > 0. +* +* Optionally, the orthogonal matrix Q from the generalized Schur +* factorization may be postmultiplied into an input matrix Q1, and the +* orthogonal matrix Z may be postmultiplied into an input matrix Z1. +* If Q1 and Z1 are the orthogonal matrices from SGGHRD that reduced +* the matrix pair (A,B) to generalized upper Hessenberg form, then the +* output matrices Q1*Q and Z1*Z are the orthogonal factors from the +* generalized Schur factorization of (A,B): +* +* A = (Q1*Q)*S*(Z1*Z)**T, B = (Q1*Q)*P*(Z1*Z)**T. +* +* To avoid overflow, eigenvalues of the matrix pair (H,T) (equivalently, +* of (A,B)) are computed as a pair of values (alpha,beta), where alpha is +* complex and beta real. +* If beta is nonzero, lambda = alpha / beta is an eigenvalue of the +* generalized nonsymmetric eigenvalue problem (GNEP) +* A*x = lambda*B*x +* and if alpha is nonzero, mu = beta / alpha is an eigenvalue of the +* alternate form of the GNEP +* mu*A*y = B*y. +* Real eigenvalues can be read directly from the generalized Schur +* form: +* alpha = S(i,i), beta = P(i,i). +* +* Ref: C.B. Moler & G.W. Stewart, "An Algorithm for Generalized Matrix +* Eigenvalue Problems", SIAM J. Numer. Anal., 10(1973), +* pp. 241--256. +* +* Arguments +* ========= +* +* JOB (input) CHARACTER*1 +* = 'E': Compute eigenvalues only; +* = 'S': Compute eigenvalues and the Schur form. +* +* COMPQ (input) CHARACTER*1 +* = 'N': Left Schur vectors (Q) are not computed; +* = 'I': Q is initialized to the unit matrix and the matrix Q +* of left Schur vectors of (H,T) is returned; +* = 'V': Q must contain an orthogonal matrix Q1 on entry and +* the product Q1*Q is returned. +* +* COMPZ (input) CHARACTER*1 +* = 'N': Right Schur vectors (Z) are not computed; +* = 'I': Z is initialized to the unit matrix and the matrix Z +* of right Schur vectors of (H,T) is returned; +* = 'V': Z must contain an orthogonal matrix Z1 on entry and +* the product Z1*Z is returned. +* +* N (input) INTEGER +* The order of the matrices H, T, Q, and Z. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* ILO and IHI mark the rows and columns of H which are in +* Hessenberg form. It is assumed that A is already upper +* triangular in rows and columns 1:ILO-1 and IHI+1:N. +* If N > 0, 1 <= ILO <= IHI <= N; if N = 0, ILO=1 and IHI=0. +* +* H (input/output) REAL array, dimension (LDH, N) +* On entry, the N-by-N upper Hessenberg matrix H. +* On exit, if JOB = 'S', H contains the upper quasi-triangular +* matrix S from the generalized Schur factorization; +* 2-by-2 diagonal blocks (corresponding to complex conjugate +* pairs of eigenvalues) are returned in standard form, with +* H(i,i) = H(i+1,i+1) and H(i+1,i)*H(i,i+1) < 0. +* If JOB = 'E', the diagonal blocks of H match those of S, but +* the rest of H is unspecified. +* +* LDH (input) INTEGER +* The leading dimension of the array H. LDH >= max( 1, N ). +* +* T (input/output) REAL array, dimension (LDT, N) +* On entry, the N-by-N upper triangular matrix T. +* On exit, if JOB = 'S', T contains the upper triangular +* matrix P from the generalized Schur factorization; +* 2-by-2 diagonal blocks of P corresponding to 2-by-2 blocks of S +* are reduced to positive diagonal form, i.e., if H(j+1,j) is +* non-zero, then T(j+1,j) = T(j,j+1) = 0, T(j,j) > 0, and +* T(j+1,j+1) > 0. +* If JOB = 'E', the diagonal blocks of T match those of P, but +* the rest of T is unspecified. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= max( 1, N ). +* +* ALPHAR (output) REAL array, dimension (N) +* The real parts of each scalar alpha defining an eigenvalue +* of GNEP. +* +* ALPHAI (output) REAL array, dimension (N) +* The imaginary parts of each scalar alpha defining an +* eigenvalue of GNEP. +* If ALPHAI(j) is zero, then the j-th eigenvalue is real; if +* positive, then the j-th and (j+1)-st eigenvalues are a +* complex conjugate pair, with ALPHAI(j+1) = -ALPHAI(j). +* +* BETA (output) REAL array, dimension (N) +* The scalars beta that define the eigenvalues of GNEP. +* Together, the quantities alpha = (ALPHAR(j),ALPHAI(j)) and +* beta = BETA(j) represent the j-th eigenvalue of the matrix +* pair (A,B), in one of the forms lambda = alpha/beta or +* mu = beta/alpha. Since either lambda or mu may overflow, +* they should not, in general, be computed. +* +* Q (input/output) REAL array, dimension (LDQ, N) +* On entry, if COMPZ = 'V', the orthogonal matrix Q1 used in +* the reduction of (A,B) to generalized Hessenberg form. +* On exit, if COMPZ = 'I', the orthogonal matrix of left Schur +* vectors of (H,T), and if COMPZ = 'V', the orthogonal matrix +* of left Schur vectors of (A,B). +* Not referenced if COMPZ = 'N'. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= 1. +* If COMPQ='V' or 'I', then LDQ >= N. +* +* Z (input/output) REAL array, dimension (LDZ, N) +* On entry, if COMPZ = 'V', the orthogonal matrix Z1 used in +* the reduction of (A,B) to generalized Hessenberg form. +* On exit, if COMPZ = 'I', the orthogonal matrix of +* right Schur vectors of (H,T), and if COMPZ = 'V', the +* orthogonal matrix of right Schur vectors of (A,B). +* Not referenced if COMPZ = 'N'. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1. +* If COMPZ='V' or 'I', then LDZ >= N. +* +* WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) +* On exit, if INFO >= 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N). +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* = 1,...,N: the QZ iteration did not converge. (H,T) is not +* in Schur form, but ALPHAR(i), ALPHAI(i), and +* BETA(i), i=INFO+1,...,N should be correct. +* = N+1,...,2*N: the shift calculation failed. (H,T) is not +* in Schur form, but ALPHAR(i), ALPHAI(i), and +* BETA(i), i=INFO-N+1,...,N should be correct. +* +* Further Details +* =============== +* +* Iteration counters: +* +* JITER -- counts iterations. +* IITER -- counts iterations run since ILAST was last +* changed. This is therefore reset only when a 1-by-1 or +* 2-by-2 block deflates off the bottom. +* +* ===================================================================== +* +* .. Parameters .. +* $ SAFETY = 1.0E+0 ) + REAL HALF, ZERO, ONE, SAFETY + PARAMETER ( HALF = 0.5E+0, ZERO = 0.0E+0, ONE = 1.0E+0, + $ SAFETY = 1.0E+2 ) +* .. +* .. Local Scalars .. + LOGICAL ILAZR2, ILAZRO, ILPIVT, ILQ, ILSCHR, ILZ, + $ LQUERY + INTEGER ICOMPQ, ICOMPZ, IFIRST, IFRSTM, IITER, ILAST, + $ ILASTM, IN, ISCHUR, ISTART, J, JC, JCH, JITER, + $ JR, MAXIT + REAL A11, A12, A1I, A1R, A21, A22, A2I, A2R, AD11, + $ AD11L, AD12, AD12L, AD21, AD21L, AD22, AD22L, + $ AD32L, AN, ANORM, ASCALE, ATOL, B11, B1A, B1I, + $ B1R, B22, B2A, B2I, B2R, BN, BNORM, BSCALE, + $ BTOL, C, C11I, C11R, C12, C21, C22I, C22R, CL, + $ CQ, CR, CZ, ESHIFT, S, S1, S1INV, S2, SAFMAX, + $ SAFMIN, SCALE, SL, SQI, SQR, SR, SZI, SZR, T1, + $ TAU, TEMP, TEMP2, TEMPI, TEMPR, U1, U12, U12L, + $ U2, ULP, VS, W11, W12, W21, W22, WABS, WI, WR, + $ WR2 +* .. +* .. Local Arrays .. + REAL V( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANHS, SLAPY2, SLAPY3 + EXTERNAL LSAME, SLAMCH, SLANHS, SLAPY2, SLAPY3 +* .. +* .. External Subroutines .. + EXTERNAL SLAG2, SLARFG, SLARTG, SLASET, SLASV2, SROT, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, REAL, SQRT +* .. +* .. Executable Statements .. +* +* Decode JOB, COMPQ, COMPZ +* + IF( LSAME( JOB, 'E' ) ) THEN + ILSCHR = .FALSE. + ISCHUR = 1 + ELSE IF( LSAME( JOB, 'S' ) ) THEN + ILSCHR = .TRUE. + ISCHUR = 2 + ELSE + ISCHUR = 0 + END IF +* + IF( LSAME( COMPQ, 'N' ) ) THEN + ILQ = .FALSE. + ICOMPQ = 1 + ELSE IF( LSAME( COMPQ, 'V' ) ) THEN + ILQ = .TRUE. + ICOMPQ = 2 + ELSE IF( LSAME( COMPQ, 'I' ) ) THEN + ILQ = .TRUE. + ICOMPQ = 3 + ELSE + ICOMPQ = 0 + END IF +* + IF( LSAME( COMPZ, 'N' ) ) THEN + ILZ = .FALSE. + ICOMPZ = 1 + ELSE IF( LSAME( COMPZ, 'V' ) ) THEN + ILZ = .TRUE. + ICOMPZ = 2 + ELSE IF( LSAME( COMPZ, 'I' ) ) THEN + ILZ = .TRUE. + ICOMPZ = 3 + ELSE + ICOMPZ = 0 + END IF +* +* Check Argument Values +* + INFO = 0 + WORK( 1 ) = MAX( 1, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( ISCHUR.EQ.0 ) THEN + INFO = -1 + ELSE IF( ICOMPQ.EQ.0 ) THEN + INFO = -2 + ELSE IF( ICOMPZ.EQ.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( ILO.LT.1 ) THEN + INFO = -5 + ELSE IF( IHI.GT.N .OR. IHI.LT.ILO-1 ) THEN + INFO = -6 + ELSE IF( LDH.LT.N ) THEN + INFO = -8 + ELSE IF( LDT.LT.N ) THEN + INFO = -10 + ELSE IF( LDQ.LT.1 .OR. ( ILQ .AND. LDQ.LT.N ) ) THEN + INFO = -15 + ELSE IF( LDZ.LT.1 .OR. ( ILZ .AND. LDZ.LT.N ) ) THEN + INFO = -17 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -19 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SHGEQZ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + WORK( 1 ) = REAL( 1 ) + RETURN + END IF +* +* Initialize Q and Z +* + IF( ICOMPQ.EQ.3 ) + $ CALL SLASET( 'Full', N, N, ZERO, ONE, Q, LDQ ) + IF( ICOMPZ.EQ.3 ) + $ CALL SLASET( 'Full', N, N, ZERO, ONE, Z, LDZ ) +* +* Machine Constants +* + IN = IHI + 1 - ILO + SAFMIN = SLAMCH( 'S' ) + SAFMAX = ONE / SAFMIN + ULP = SLAMCH( 'E' )*SLAMCH( 'B' ) + ANORM = SLANHS( 'F', IN, H( ILO, ILO ), LDH, WORK ) + BNORM = SLANHS( 'F', IN, T( ILO, ILO ), LDT, WORK ) + ATOL = MAX( SAFMIN, ULP*ANORM ) + BTOL = MAX( SAFMIN, ULP*BNORM ) + ASCALE = ONE / MAX( SAFMIN, ANORM ) + BSCALE = ONE / MAX( SAFMIN, BNORM ) +* +* Set Eigenvalues IHI+1:N +* + DO 30 J = IHI + 1, N + IF( T( J, J ).LT.ZERO ) THEN + IF( ILSCHR ) THEN + DO 10 JR = 1, J + H( JR, J ) = -H( JR, J ) + T( JR, J ) = -T( JR, J ) + 10 CONTINUE + ELSE + H( J, J ) = -H( J, J ) + T( J, J ) = -T( J, J ) + END IF + IF( ILZ ) THEN + DO 20 JR = 1, N + Z( JR, J ) = -Z( JR, J ) + 20 CONTINUE + END IF + END IF + ALPHAR( J ) = H( J, J ) + ALPHAI( J ) = ZERO + BETA( J ) = T( J, J ) + 30 CONTINUE +* +* If IHI < ILO, skip QZ steps +* + IF( IHI.LT.ILO ) + $ GO TO 380 +* +* MAIN QZ ITERATION LOOP +* +* Initialize dynamic indices +* +* Eigenvalues ILAST+1:N have been found. +* Column operations modify rows IFRSTM:whatever. +* Row operations modify columns whatever:ILASTM. +* +* If only eigenvalues are being computed, then +* IFRSTM is the row of the last splitting row above row ILAST; +* this is always at least ILO. +* IITER counts iterations since the last eigenvalue was found, +* to tell when to use an extraordinary shift. +* MAXIT is the maximum number of QZ sweeps allowed. +* + ILAST = IHI + IF( ILSCHR ) THEN + IFRSTM = 1 + ILASTM = N + ELSE + IFRSTM = ILO + ILASTM = IHI + END IF + IITER = 0 + ESHIFT = ZERO + MAXIT = 30*( IHI-ILO+1 ) +* + DO 360 JITER = 1, MAXIT +* +* Split the matrix if possible. +* +* Two tests: +* 1: H(j,j-1)=0 or j=ILO +* 2: T(j,j)=0 +* + IF( ILAST.EQ.ILO ) THEN +* +* Special case: j=ILAST +* + GO TO 80 + ELSE + IF( ABS( H( ILAST, ILAST-1 ) ).LE.ATOL ) THEN + H( ILAST, ILAST-1 ) = ZERO + GO TO 80 + END IF + END IF +* + IF( ABS( T( ILAST, ILAST ) ).LE.BTOL ) THEN + T( ILAST, ILAST ) = ZERO + GO TO 70 + END IF +* +* General case: j unfl ) +* __ +* (sA - wB) ( CZ -SZ ) +* ( SZ CZ ) +* + C11R = S1*A11 - WR*B11 + C11I = -WI*B11 + C12 = S1*A12 + C21 = S1*A21 + C22R = S1*A22 - WR*B22 + C22I = -WI*B22 +* + IF( ABS( C11R )+ABS( C11I )+ABS( C12 ).GT.ABS( C21 )+ + $ ABS( C22R )+ABS( C22I ) ) THEN + T1 = SLAPY3( C12, C11R, C11I ) + CZ = C12 / T1 + SZR = -C11R / T1 + SZI = -C11I / T1 + ELSE + CZ = SLAPY2( C22R, C22I ) + IF( CZ.LE.SAFMIN ) THEN + CZ = ZERO + SZR = ONE + SZI = ZERO + ELSE + TEMPR = C22R / CZ + TEMPI = C22I / CZ + T1 = SLAPY2( CZ, C21 ) + CZ = CZ / T1 + SZR = -C21*TEMPR / T1 + SZI = C21*TEMPI / T1 + END IF + END IF +* +* Compute Givens rotation on left +* +* ( CQ SQ ) +* ( __ ) A or B +* ( -SQ CQ ) +* + AN = ABS( A11 ) + ABS( A12 ) + ABS( A21 ) + ABS( A22 ) + BN = ABS( B11 ) + ABS( B22 ) + WABS = ABS( WR ) + ABS( WI ) + IF( S1*AN.GT.WABS*BN ) THEN + CQ = CZ*B11 + SQR = SZR*B22 + SQI = -SZI*B22 + ELSE + A1R = CZ*A11 + SZR*A12 + A1I = SZI*A12 + A2R = CZ*A21 + SZR*A22 + A2I = SZI*A22 + CQ = SLAPY2( A1R, A1I ) + IF( CQ.LE.SAFMIN ) THEN + CQ = ZERO + SQR = ONE + SQI = ZERO + ELSE + TEMPR = A1R / CQ + TEMPI = A1I / CQ + SQR = TEMPR*A2R + TEMPI*A2I + SQI = TEMPI*A2R - TEMPR*A2I + END IF + END IF + T1 = SLAPY3( CQ, SQR, SQI ) + CQ = CQ / T1 + SQR = SQR / T1 + SQI = SQI / T1 +* +* Compute diagonal elements of QBZ +* + TEMPR = SQR*SZR - SQI*SZI + TEMPI = SQR*SZI + SQI*SZR + B1R = CQ*CZ*B11 + TEMPR*B22 + B1I = TEMPI*B22 + B1A = SLAPY2( B1R, B1I ) + B2R = CQ*CZ*B22 + TEMPR*B11 + B2I = -TEMPI*B11 + B2A = SLAPY2( B2R, B2I ) +* +* Normalize so beta > 0, and Im( alpha1 ) > 0 +* + BETA( ILAST-1 ) = B1A + BETA( ILAST ) = B2A + ALPHAR( ILAST-1 ) = ( WR*B1A )*S1INV + ALPHAI( ILAST-1 ) = ( WI*B1A )*S1INV + ALPHAR( ILAST ) = ( WR*B2A )*S1INV + ALPHAI( ILAST ) = -( WI*B2A )*S1INV +* +* Step 3: Go to next block -- exit if finished. +* + ILAST = IFIRST - 1 + IF( ILAST.LT.ILO ) + $ GO TO 380 +* +* Reset counters +* + IITER = 0 + ESHIFT = ZERO + IF( .NOT.ILSCHR ) THEN + ILASTM = ILAST + IF( IFRSTM.GT.ILAST ) + $ IFRSTM = ILO + END IF + GO TO 350 + ELSE +* +* Usual case: 3x3 or larger block, using Francis implicit +* double-shift +* +* 2 +* Eigenvalue equation is w - c w + d = 0, +* +* -1 2 -1 +* so compute 1st column of (A B ) - c A B + d +* using the formula in QZIT (from EISPACK) +* +* We assume that the block is at least 3x3 +* + AD11 = ( ASCALE*H( ILAST-1, ILAST-1 ) ) / + $ ( BSCALE*T( ILAST-1, ILAST-1 ) ) + AD21 = ( ASCALE*H( ILAST, ILAST-1 ) ) / + $ ( BSCALE*T( ILAST-1, ILAST-1 ) ) + AD12 = ( ASCALE*H( ILAST-1, ILAST ) ) / + $ ( BSCALE*T( ILAST, ILAST ) ) + AD22 = ( ASCALE*H( ILAST, ILAST ) ) / + $ ( BSCALE*T( ILAST, ILAST ) ) + U12 = T( ILAST-1, ILAST ) / T( ILAST, ILAST ) + AD11L = ( ASCALE*H( IFIRST, IFIRST ) ) / + $ ( BSCALE*T( IFIRST, IFIRST ) ) + AD21L = ( ASCALE*H( IFIRST+1, IFIRST ) ) / + $ ( BSCALE*T( IFIRST, IFIRST ) ) + AD12L = ( ASCALE*H( IFIRST, IFIRST+1 ) ) / + $ ( BSCALE*T( IFIRST+1, IFIRST+1 ) ) + AD22L = ( ASCALE*H( IFIRST+1, IFIRST+1 ) ) / + $ ( BSCALE*T( IFIRST+1, IFIRST+1 ) ) + AD32L = ( ASCALE*H( IFIRST+2, IFIRST+1 ) ) / + $ ( BSCALE*T( IFIRST+1, IFIRST+1 ) ) + U12L = T( IFIRST, IFIRST+1 ) / T( IFIRST+1, IFIRST+1 ) +* + V( 1 ) = ( AD11-AD11L )*( AD22-AD11L ) - AD12*AD21 + + $ AD21*U12*AD11L + ( AD12L-AD11L*U12L )*AD21L + V( 2 ) = ( ( AD22L-AD11L )-AD21L*U12L-( AD11-AD11L )- + $ ( AD22-AD11L )+AD21*U12 )*AD21L + V( 3 ) = AD32L*AD21L +* + ISTART = IFIRST +* + CALL SLARFG( 3, V( 1 ), V( 2 ), 1, TAU ) + V( 1 ) = ONE +* +* Sweep +* + DO 290 J = ISTART, ILAST - 2 +* +* All but last elements: use 3x3 Householder transforms. +* +* Zero (j-1)st column of A +* + IF( J.GT.ISTART ) THEN + V( 1 ) = H( J, J-1 ) + V( 2 ) = H( J+1, J-1 ) + V( 3 ) = H( J+2, J-1 ) +* + CALL SLARFG( 3, H( J, J-1 ), V( 2 ), 1, TAU ) + V( 1 ) = ONE + H( J+1, J-1 ) = ZERO + H( J+2, J-1 ) = ZERO + END IF +* + DO 230 JC = J, ILASTM + TEMP = TAU*( H( J, JC )+V( 2 )*H( J+1, JC )+V( 3 )* + $ H( J+2, JC ) ) + H( J, JC ) = H( J, JC ) - TEMP + H( J+1, JC ) = H( J+1, JC ) - TEMP*V( 2 ) + H( J+2, JC ) = H( J+2, JC ) - TEMP*V( 3 ) + TEMP2 = TAU*( T( J, JC )+V( 2 )*T( J+1, JC )+V( 3 )* + $ T( J+2, JC ) ) + T( J, JC ) = T( J, JC ) - TEMP2 + T( J+1, JC ) = T( J+1, JC ) - TEMP2*V( 2 ) + T( J+2, JC ) = T( J+2, JC ) - TEMP2*V( 3 ) + 230 CONTINUE + IF( ILQ ) THEN + DO 240 JR = 1, N + TEMP = TAU*( Q( JR, J )+V( 2 )*Q( JR, J+1 )+V( 3 )* + $ Q( JR, J+2 ) ) + Q( JR, J ) = Q( JR, J ) - TEMP + Q( JR, J+1 ) = Q( JR, J+1 ) - TEMP*V( 2 ) + Q( JR, J+2 ) = Q( JR, J+2 ) - TEMP*V( 3 ) + 240 CONTINUE + END IF +* +* Zero j-th column of B (see SLAGBC for details) +* +* Swap rows to pivot +* + ILPIVT = .FALSE. + TEMP = MAX( ABS( T( J+1, J+1 ) ), ABS( T( J+1, J+2 ) ) ) + TEMP2 = MAX( ABS( T( J+2, J+1 ) ), ABS( T( J+2, J+2 ) ) ) + IF( MAX( TEMP, TEMP2 ).LT.SAFMIN ) THEN + SCALE = ZERO + U1 = ONE + U2 = ZERO + GO TO 250 + ELSE IF( TEMP.GE.TEMP2 ) THEN + W11 = T( J+1, J+1 ) + W21 = T( J+2, J+1 ) + W12 = T( J+1, J+2 ) + W22 = T( J+2, J+2 ) + U1 = T( J+1, J ) + U2 = T( J+2, J ) + ELSE + W21 = T( J+1, J+1 ) + W11 = T( J+2, J+1 ) + W22 = T( J+1, J+2 ) + W12 = T( J+2, J+2 ) + U2 = T( J+1, J ) + U1 = T( J+2, J ) + END IF +* +* Swap columns if nec. +* + IF( ABS( W12 ).GT.ABS( W11 ) ) THEN + ILPIVT = .TRUE. + TEMP = W12 + TEMP2 = W22 + W12 = W11 + W22 = W21 + W11 = TEMP + W21 = TEMP2 + END IF +* +* LU-factor +* + TEMP = W21 / W11 + U2 = U2 - TEMP*U1 + W22 = W22 - TEMP*W12 + W21 = ZERO +* +* Compute SCALE +* + SCALE = ONE + IF( ABS( W22 ).LT.SAFMIN ) THEN + SCALE = ZERO + U2 = ONE + U1 = -W12 / W11 + GO TO 250 + END IF + IF( ABS( W22 ).LT.ABS( U2 ) ) + $ SCALE = ABS( W22 / U2 ) + IF( ABS( W11 ).LT.ABS( U1 ) ) + $ SCALE = MIN( SCALE, ABS( W11 / U1 ) ) +* +* Solve +* + U2 = ( SCALE*U2 ) / W22 + U1 = ( SCALE*U1-W12*U2 ) / W11 +* + 250 CONTINUE + IF( ILPIVT ) THEN + TEMP = U2 + U2 = U1 + U1 = TEMP + END IF +* +* Compute Householder Vector +* + T1 = SQRT( SCALE**2+U1**2+U2**2 ) + TAU = ONE + SCALE / T1 + VS = -ONE / ( SCALE+T1 ) + V( 1 ) = ONE + V( 2 ) = VS*U1 + V( 3 ) = VS*U2 +* +* Apply transformations from the right. +* + DO 260 JR = IFRSTM, MIN( J+3, ILAST ) + TEMP = TAU*( H( JR, J )+V( 2 )*H( JR, J+1 )+V( 3 )* + $ H( JR, J+2 ) ) + H( JR, J ) = H( JR, J ) - TEMP + H( JR, J+1 ) = H( JR, J+1 ) - TEMP*V( 2 ) + H( JR, J+2 ) = H( JR, J+2 ) - TEMP*V( 3 ) + 260 CONTINUE + DO 270 JR = IFRSTM, J + 2 + TEMP = TAU*( T( JR, J )+V( 2 )*T( JR, J+1 )+V( 3 )* + $ T( JR, J+2 ) ) + T( JR, J ) = T( JR, J ) - TEMP + T( JR, J+1 ) = T( JR, J+1 ) - TEMP*V( 2 ) + T( JR, J+2 ) = T( JR, J+2 ) - TEMP*V( 3 ) + 270 CONTINUE + IF( ILZ ) THEN + DO 280 JR = 1, N + TEMP = TAU*( Z( JR, J )+V( 2 )*Z( JR, J+1 )+V( 3 )* + $ Z( JR, J+2 ) ) + Z( JR, J ) = Z( JR, J ) - TEMP + Z( JR, J+1 ) = Z( JR, J+1 ) - TEMP*V( 2 ) + Z( JR, J+2 ) = Z( JR, J+2 ) - TEMP*V( 3 ) + 280 CONTINUE + END IF + T( J+1, J ) = ZERO + T( J+2, J ) = ZERO + 290 CONTINUE +* +* Last elements: Use Givens rotations +* +* Rotations from the left +* + J = ILAST - 1 + TEMP = H( J, J-1 ) + CALL SLARTG( TEMP, H( J+1, J-1 ), C, S, H( J, J-1 ) ) + H( J+1, J-1 ) = ZERO +* + DO 300 JC = J, ILASTM + TEMP = C*H( J, JC ) + S*H( J+1, JC ) + H( J+1, JC ) = -S*H( J, JC ) + C*H( J+1, JC ) + H( J, JC ) = TEMP + TEMP2 = C*T( J, JC ) + S*T( J+1, JC ) + T( J+1, JC ) = -S*T( J, JC ) + C*T( J+1, JC ) + T( J, JC ) = TEMP2 + 300 CONTINUE + IF( ILQ ) THEN + DO 310 JR = 1, N + TEMP = C*Q( JR, J ) + S*Q( JR, J+1 ) + Q( JR, J+1 ) = -S*Q( JR, J ) + C*Q( JR, J+1 ) + Q( JR, J ) = TEMP + 310 CONTINUE + END IF +* +* Rotations from the right. +* + TEMP = T( J+1, J+1 ) + CALL SLARTG( TEMP, T( J+1, J ), C, S, T( J+1, J+1 ) ) + T( J+1, J ) = ZERO +* + DO 320 JR = IFRSTM, ILAST + TEMP = C*H( JR, J+1 ) + S*H( JR, J ) + H( JR, J ) = -S*H( JR, J+1 ) + C*H( JR, J ) + H( JR, J+1 ) = TEMP + 320 CONTINUE + DO 330 JR = IFRSTM, ILAST - 1 + TEMP = C*T( JR, J+1 ) + S*T( JR, J ) + T( JR, J ) = -S*T( JR, J+1 ) + C*T( JR, J ) + T( JR, J+1 ) = TEMP + 330 CONTINUE + IF( ILZ ) THEN + DO 340 JR = 1, N + TEMP = C*Z( JR, J+1 ) + S*Z( JR, J ) + Z( JR, J ) = -S*Z( JR, J+1 ) + C*Z( JR, J ) + Z( JR, J+1 ) = TEMP + 340 CONTINUE + END IF +* +* End of Double-Shift code +* + END IF +* + GO TO 350 +* +* End of iteration loop +* + 350 CONTINUE + 360 CONTINUE +* +* Drop-through = non-convergence +* + INFO = ILAST + GO TO 420 +* +* Successful completion of all QZ steps +* + 380 CONTINUE +* +* Set Eigenvalues 1:ILO-1 +* + DO 410 J = 1, ILO - 1 + IF( T( J, J ).LT.ZERO ) THEN + IF( ILSCHR ) THEN + DO 390 JR = 1, J + H( JR, J ) = -H( JR, J ) + T( JR, J ) = -T( JR, J ) + 390 CONTINUE + ELSE + H( J, J ) = -H( J, J ) + T( J, J ) = -T( J, J ) + END IF + IF( ILZ ) THEN + DO 400 JR = 1, N + Z( JR, J ) = -Z( JR, J ) + 400 CONTINUE + END IF + END IF + ALPHAR( J ) = H( J, J ) + ALPHAI( J ) = ZERO + BETA( J ) = T( J, J ) + 410 CONTINUE +* +* Normal Termination +* + INFO = 0 +* +* Exit (other than argument error) -- return optimal workspace size +* + 420 CONTINUE + WORK( 1 ) = REAL( N ) + RETURN +* +* End of SHGEQZ +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/shseqr.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/shseqr.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,407 @@ + SUBROUTINE SHSEQR( JOB, COMPZ, N, ILO, IHI, H, LDH, WR, WI, Z, + $ LDZ, WORK, LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER IHI, ILO, INFO, LDH, LDZ, LWORK, N + CHARACTER COMPZ, JOB +* .. +* .. Array Arguments .. + REAL H( LDH, * ), WI( * ), WORK( * ), WR( * ), + $ Z( LDZ, * ) +* .. +* Purpose +* ======= +* +* SHSEQR computes the eigenvalues of a Hessenberg matrix H +* and, optionally, the matrices T and Z from the Schur decomposition +* H = Z T Z**T, where T is an upper quasi-triangular matrix (the +* Schur form), and Z is the orthogonal matrix of Schur vectors. +* +* Optionally Z may be postmultiplied into an input orthogonal +* matrix Q so that this routine can give the Schur factorization +* of a matrix A which has been reduced to the Hessenberg form H +* by the orthogonal matrix Q: A = Q*H*Q**T = (QZ)*T*(QZ)**T. +* +* Arguments +* ========= +* +* JOB (input) CHARACTER*1 +* = 'E': compute eigenvalues only; +* = 'S': compute eigenvalues and the Schur form T. +* +* COMPZ (input) CHARACTER*1 +* = 'N': no Schur vectors are computed; +* = 'I': Z is initialized to the unit matrix and the matrix Z +* of Schur vectors of H is returned; +* = 'V': Z must contain an orthogonal matrix Q on entry, and +* the product Q*Z is returned. +* +* N (input) INTEGER +* The order of the matrix H. N .GE. 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* It is assumed that H is already upper triangular in rows +* and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally +* set by a previous call to SGEBAL, and then passed to SGEHRD +* when the matrix output by SGEBAL is reduced to Hessenberg +* form. Otherwise ILO and IHI should be set to 1 and N +* respectively. If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N. +* If N = 0, then ILO = 1 and IHI = 0. +* +* H (input/output) REAL array, dimension (LDH,N) +* On entry, the upper Hessenberg matrix H. +* On exit, if INFO = 0 and JOB = 'S', then H contains the +* upper quasi-triangular matrix T from the Schur decomposition +* (the Schur form); 2-by-2 diagonal blocks (corresponding to +* complex conjugate pairs of eigenvalues) are returned in +* standard form, with H(i,i) = H(i+1,i+1) and +* H(i+1,i)*H(i,i+1).LT.0. If INFO = 0 and JOB = 'E', the +* contents of H are unspecified on exit. (The output value of +* H when INFO.GT.0 is given under the description of INFO +* below.) +* +* Unlike earlier versions of SHSEQR, this subroutine may +* explicitly H(i,j) = 0 for i.GT.j and j = 1, 2, ... ILO-1 +* or j = IHI+1, IHI+2, ... N. +* +* LDH (input) INTEGER +* The leading dimension of the array H. LDH .GE. max(1,N). +* +* WR (output) REAL array, dimension (N) +* WI (output) REAL array, dimension (N) +* The real and imaginary parts, respectively, of the computed +* eigenvalues. If two eigenvalues are computed as a complex +* conjugate pair, they are stored in consecutive elements of +* WR and WI, say the i-th and (i+1)th, with WI(i) .GT. 0 and +* WI(i+1) .LT. 0. If JOB = 'S', the eigenvalues are stored in +* the same order as on the diagonal of the Schur form returned +* in H, with WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2 +* diagonal block, WI(i) = sqrt(-H(i+1,i)*H(i,i+1)) and +* WI(i+1) = -WI(i). +* +* Z (input/output) REAL array, dimension (LDZ,N) +* If COMPZ = 'N', Z is not referenced. +* If COMPZ = 'I', on entry Z need not be set and on exit, +* if INFO = 0, Z contains the orthogonal matrix Z of the Schur +* vectors of H. If COMPZ = 'V', on entry Z must contain an +* N-by-N matrix Q, which is assumed to be equal to the unit +* matrix except for the submatrix Z(ILO:IHI,ILO:IHI). On exit, +* if INFO = 0, Z contains Q*Z. +* Normally Q is the orthogonal matrix generated by SORGHR +* after the call to SGEHRD which formed the Hessenberg matrix +* H. (The output value of Z when INFO.GT.0 is given under +* the description of INFO below.) +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. if COMPZ = 'I' or +* COMPZ = 'V', then LDZ.GE.MAX(1,N). Otherwize, LDZ.GE.1. +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns an estimate of +* the optimal value for LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK .GE. max(1,N) +* is sufficient, but LWORK typically as large as 6*N may +* be required for optimal performance. A workspace query +* to determine the optimal workspace size is recommended. +* +* If LWORK = -1, then SHSEQR does a workspace query. +* In this case, SHSEQR checks the input parameters and +* estimates the optimal workspace size for the given +* values of N, ILO and IHI. The estimate is returned +* in WORK(1). No error message related to LWORK is +* issued by XERBLA. Neither H nor Z are accessed. +* +* +* INFO (output) INTEGER +* = 0: successful exit +* .LT. 0: if INFO = -i, the i-th argument had an illegal +* value +* .GT. 0: if INFO = i, SHSEQR failed to compute all of +* the eigenvalues. Elements 1:ilo-1 and i+1:n of WR +* and WI contain those eigenvalues which have been +* successfully computed. (Failures are rare.) +* +* If INFO .GT. 0 and JOB = 'E', then on exit, the +* remaining unconverged eigenvalues are the eigen- +* values of the upper Hessenberg matrix rows and +* columns ILO through INFO of the final, output +* value of H. +* +* If INFO .GT. 0 and JOB = 'S', then on exit +* +* (*) (initial value of H)*U = U*(final value of H) +* +* where U is an orthogonal matrix. The final +* value of H is upper Hessenberg and quasi-triangular +* in rows and columns INFO+1 through IHI. +* +* If INFO .GT. 0 and COMPZ = 'V', then on exit +* +* (final value of Z) = (initial value of Z)*U +* +* where U is the orthogonal matrix in (*) (regard- +* less of the value of JOB.) +* +* If INFO .GT. 0 and COMPZ = 'I', then on exit +* (final value of Z) = U +* where U is the orthogonal matrix in (*) (regard- +* less of the value of JOB.) +* +* If INFO .GT. 0 and COMPZ = 'N', then Z is not +* accessed. +* +* ================================================================ +* Default values supplied by +* ILAENV(ISPEC,'SHSEQR',JOB(:1)//COMPZ(:1),N,ILO,IHI,LWORK). +* It is suggested that these defaults be adjusted in order +* to attain best performance in each particular +* computational environment. +* +* ISPEC=1: The SLAHQR vs SLAQR0 crossover point. +* Default: 75. (Must be at least 11.) +* +* ISPEC=2: Recommended deflation window size. +* This depends on ILO, IHI and NS. NS is the +* number of simultaneous shifts returned +* by ILAENV(ISPEC=4). (See ISPEC=4 below.) +* The default for (IHI-ILO+1).LE.500 is NS. +* The default for (IHI-ILO+1).GT.500 is 3*NS/2. +* +* ISPEC=3: Nibble crossover point. (See ILAENV for +* details.) Default: 14% of deflation window +* size. +* +* ISPEC=4: Number of simultaneous shifts, NS, in +* a multi-shift QR iteration. +* +* If IHI-ILO+1 is ... +* +* greater than ...but less ... the +* or equal to ... than default is +* +* 1 30 NS - 2(+) +* 30 60 NS - 4(+) +* 60 150 NS = 10(+) +* 150 590 NS = ** +* 590 3000 NS = 64 +* 3000 6000 NS = 128 +* 6000 infinity NS = 256 +* +* (+) By default some or all matrices of this order +* are passed to the implicit double shift routine +* SLAHQR and NS is ignored. See ISPEC=1 above +* and comments in IPARM for details. +* +* The asterisks (**) indicate an ad-hoc +* function of N increasing from 10 to 64. +* +* ISPEC=5: Select structured matrix multiply. +* (See ILAENV for details.) Default: 3. +* +* ================================================================ +* Based on contributions by +* Karen Braman and Ralph Byers, Department of Mathematics, +* University of Kansas, USA +* +* ================================================================ +* References: +* K. Braman, R. Byers and R. Mathias, The Multi-Shift QR +* Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 +* Performance, SIAM Journal of Matrix Analysis, volume 23, pages +* 929--947, 2002. +* +* K. Braman, R. Byers and R. Mathias, The Multi-Shift QR +* Algorithm Part II: Aggressive Early Deflation, SIAM Journal +* of Matrix Analysis, volume 23, pages 948--973, 2002. +* +* ================================================================ +* .. Parameters .. +* +* ==== Matrices of order NTINY or smaller must be processed by +* . SLAHQR because of insufficient subdiagonal scratch space. +* . (This is a hard limit.) ==== +* +* ==== NL allocates some local workspace to help small matrices +* . through a rare SLAHQR failure. NL .GT. NTINY = 11 is +* . required and NL .LE. NMIN = ILAENV(ISPEC=1,...) is recom- +* . mended. (The default value of NMIN is 75.) Using NL = 49 +* . allows up to six simultaneous shifts and a 16-by-16 +* . deflation window. ==== +* + INTEGER NTINY + PARAMETER ( NTINY = 11 ) + INTEGER NL + PARAMETER ( NL = 49 ) + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0e0, ONE = 1.0e0 ) +* .. +* .. Local Arrays .. + REAL HL( NL, NL ), WORKL( NL ) +* .. +* .. Local Scalars .. + INTEGER I, KBOT, NMIN + LOGICAL INITZ, LQUERY, WANTT, WANTZ +* .. +* .. External Functions .. + INTEGER ILAENV + LOGICAL LSAME + EXTERNAL ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL SLACPY, SLAHQR, SLAQR0, SLASET, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, REAL +* .. +* .. Executable Statements .. +* +* ==== Decode and check the input parameters. ==== +* + WANTT = LSAME( JOB, 'S' ) + INITZ = LSAME( COMPZ, 'I' ) + WANTZ = INITZ .OR. LSAME( COMPZ, 'V' ) + WORK( 1 ) = REAL( MAX( 1, N ) ) + LQUERY = LWORK.EQ.-1 +* + INFO = 0 + IF( .NOT.LSAME( JOB, 'E' ) .AND. .NOT.WANTT ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( COMPZ, 'N' ) .AND. .NOT.WANTZ ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN + INFO = -5 + ELSE IF( LDH.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.MAX( 1, N ) ) ) THEN + INFO = -11 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF +* + IF( INFO.NE.0 ) THEN +* +* ==== Quick return in case of invalid argument. ==== +* + CALL XERBLA( 'SHSEQR', -INFO ) + RETURN +* + ELSE IF( N.EQ.0 ) THEN +* +* ==== Quick return in case N = 0; nothing to do. ==== +* + RETURN +* + ELSE IF( LQUERY ) THEN +* +* ==== Quick return in case of a workspace query ==== +* + CALL SLAQR0( WANTT, WANTZ, N, ILO, IHI, H, LDH, WR, WI, ILO, + $ IHI, Z, LDZ, WORK, LWORK, INFO ) +* ==== Ensure reported workspace size is backward-compatible with +* . previous LAPACK versions. ==== + WORK( 1 ) = MAX( REAL( MAX( 1, N ) ), WORK( 1 ) ) + RETURN +* + ELSE +* +* ==== copy eigenvalues isolated by SGEBAL ==== +* + DO 10 I = 1, ILO - 1 + WR( I ) = H( I, I ) + WI( I ) = ZERO + 10 CONTINUE + DO 20 I = IHI + 1, N + WR( I ) = H( I, I ) + WI( I ) = ZERO + 20 CONTINUE +* +* ==== Initialize Z, if requested ==== +* + IF( INITZ ) + $ CALL SLASET( 'A', N, N, ZERO, ONE, Z, LDZ ) +* +* ==== Quick return if possible ==== +* + IF( ILO.EQ.IHI ) THEN + WR( ILO ) = H( ILO, ILO ) + WI( ILO ) = ZERO + RETURN + END IF +* +* ==== SLAHQR/SLAQR0 crossover point ==== +* + NMIN = ILAENV( 1, 'SHSEQR', JOB( : 1 ) // COMPZ( : 1 ), N, ILO, + $ IHI, LWORK ) + NMIN = MAX( NTINY, NMIN ) +* +* ==== SLAQR0 for big matrices; SLAHQR for small ones ==== +* + IF( N.GT.NMIN ) THEN + CALL SLAQR0( WANTT, WANTZ, N, ILO, IHI, H, LDH, WR, WI, ILO, + $ IHI, Z, LDZ, WORK, LWORK, INFO ) + ELSE +* +* ==== Small matrix ==== +* + CALL SLAHQR( WANTT, WANTZ, N, ILO, IHI, H, LDH, WR, WI, ILO, + $ IHI, Z, LDZ, INFO ) +* + IF( INFO.GT.0 ) THEN +* +* ==== A rare SLAHQR failure! SLAQR0 sometimes succeeds +* . when SLAHQR fails. ==== +* + KBOT = INFO +* + IF( N.GE.NL ) THEN +* +* ==== Larger matrices have enough subdiagonal scratch +* . space to call SLAQR0 directly. ==== +* + CALL SLAQR0( WANTT, WANTZ, N, ILO, KBOT, H, LDH, WR, + $ WI, ILO, IHI, Z, LDZ, WORK, LWORK, INFO ) +* + ELSE +* +* ==== Tiny matrices don't have enough subdiagonal +* . scratch space to benefit from SLAQR0. Hence, +* . tiny matrices must be copied into a larger +* . array before calling SLAQR0. ==== +* + CALL SLACPY( 'A', N, N, H, LDH, HL, NL ) + HL( N+1, N ) = ZERO + CALL SLASET( 'A', NL, NL-N, ZERO, ZERO, HL( 1, N+1 ), + $ NL ) + CALL SLAQR0( WANTT, WANTZ, NL, ILO, KBOT, HL, NL, WR, + $ WI, ILO, IHI, Z, LDZ, WORKL, NL, INFO ) + IF( WANTT .OR. INFO.NE.0 ) + $ CALL SLACPY( 'A', N, N, HL, NL, H, LDH ) + END IF + END IF + END IF +* +* ==== Clear out the trash, if necessary. ==== +* + IF( ( WANTT .OR. INFO.NE.0 ) .AND. N.GT.2 ) + $ CALL SLASET( 'L', N-2, N-2, ZERO, ZERO, H( 3, 1 ), LDH ) +* +* ==== Ensure reported workspace size is backward-compatible with +* . previous LAPACK versions. ==== +* + WORK( 1 ) = MAX( REAL( MAX( 1, N ) ), WORK( 1 ) ) + END IF +* +* ==== End of SHSEQR ==== +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slabad.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slabad.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,55 @@ + SUBROUTINE SLABAD( SMALL, LARGE ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + REAL LARGE, SMALL +* .. +* +* Purpose +* ======= +* +* SLABAD takes as input the values computed by SLAMCH for underflow and +* overflow, and returns the square root of each of these values if the +* log of LARGE is sufficiently large. This subroutine is intended to +* identify machines with a large exponent range, such as the Crays, and +* redefine the underflow and overflow limits to be the square roots of +* the values computed by SLAMCH. This subroutine is needed because +* SLAMCH does not compensate for poor arithmetic in the upper half of +* the exponent range, as is found on a Cray. +* +* Arguments +* ========= +* +* SMALL (input/output) REAL +* On entry, the underflow threshold as computed by SLAMCH. +* On exit, if LOG10(LARGE) is sufficiently large, the square +* root of SMALL, otherwise unchanged. +* +* LARGE (input/output) REAL +* On entry, the overflow threshold as computed by SLAMCH. +* On exit, if LOG10(LARGE) is sufficiently large, the square +* root of LARGE, otherwise unchanged. +* +* ===================================================================== +* +* .. Intrinsic Functions .. + INTRINSIC LOG10, SQRT +* .. +* .. Executable Statements .. +* +* If it looks like we're on a Cray, take the square root of +* SMALL and LARGE to avoid overflow and underflow problems. +* + IF( LOG10( LARGE ).GT.2000. ) THEN + SMALL = SQRT( SMALL ) + LARGE = SQRT( LARGE ) + END IF +* + RETURN +* +* End of SLABAD +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slabrd.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slabrd.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,290 @@ + SUBROUTINE SLABRD( M, N, NB, A, LDA, D, E, TAUQ, TAUP, X, LDX, Y, + $ LDY ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER LDA, LDX, LDY, M, N, NB +* .. +* .. Array Arguments .. + REAL A( LDA, * ), D( * ), E( * ), TAUP( * ), + $ TAUQ( * ), X( LDX, * ), Y( LDY, * ) +* .. +* +* Purpose +* ======= +* +* SLABRD reduces the first NB rows and columns of a real general +* m by n matrix A to upper or lower bidiagonal form by an orthogonal +* transformation Q' * A * P, and returns the matrices X and Y which +* are needed to apply the transformation to the unreduced part of A. +* +* If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower +* bidiagonal form. +* +* This is an auxiliary routine called by SGEBRD +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows in the matrix A. +* +* N (input) INTEGER +* The number of columns in the matrix A. +* +* NB (input) INTEGER +* The number of leading rows and columns of A to be reduced. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the m by n general matrix to be reduced. +* On exit, the first NB rows and columns of the matrix are +* overwritten; the rest of the array is unchanged. +* If m >= n, elements on and below the diagonal in the first NB +* columns, with the array TAUQ, represent the orthogonal +* matrix Q as a product of elementary reflectors; and +* elements above the diagonal in the first NB rows, with the +* array TAUP, represent the orthogonal matrix P as a product +* of elementary reflectors. +* If m < n, elements below the diagonal in the first NB +* columns, with the array TAUQ, represent the orthogonal +* matrix Q as a product of elementary reflectors, and +* elements on and above the diagonal in the first NB rows, +* with the array TAUP, represent the orthogonal matrix P as +* a product of elementary reflectors. +* See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* D (output) REAL array, dimension (NB) +* The diagonal elements of the first NB rows and columns of +* the reduced matrix. D(i) = A(i,i). +* +* E (output) REAL array, dimension (NB) +* The off-diagonal elements of the first NB rows and columns of +* the reduced matrix. +* +* TAUQ (output) REAL array dimension (NB) +* The scalar factors of the elementary reflectors which +* represent the orthogonal matrix Q. See Further Details. +* +* TAUP (output) REAL array, dimension (NB) +* The scalar factors of the elementary reflectors which +* represent the orthogonal matrix P. See Further Details. +* +* X (output) REAL array, dimension (LDX,NB) +* The m-by-nb matrix X required to update the unreduced part +* of A. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= M. +* +* Y (output) REAL array, dimension (LDY,NB) +* The n-by-nb matrix Y required to update the unreduced part +* of A. +* +* LDY (input) INTEGER +* The leading dimension of the array Y. LDY >= N. +* +* Further Details +* =============== +* +* The matrices Q and P are represented as products of elementary +* reflectors: +* +* Q = H(1) H(2) . . . H(nb) and P = G(1) G(2) . . . G(nb) +* +* Each H(i) and G(i) has the form: +* +* H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' +* +* where tauq and taup are real scalars, and v and u are real vectors. +* +* If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in +* A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in +* A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). +* +* If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in +* A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in +* A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). +* +* The elements of the vectors v and u together form the m-by-nb matrix +* V and the nb-by-n matrix U' which are needed, with X and Y, to apply +* the transformation to the unreduced part of the matrix, using a block +* update of the form: A := A - V*Y' - X*U'. +* +* The contents of A on exit are illustrated by the following examples +* with nb = 2: +* +* m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): +* +* ( 1 1 u1 u1 u1 ) ( 1 u1 u1 u1 u1 u1 ) +* ( v1 1 1 u2 u2 ) ( 1 1 u2 u2 u2 u2 ) +* ( v1 v2 a a a ) ( v1 1 a a a a ) +* ( v1 v2 a a a ) ( v1 v2 a a a a ) +* ( v1 v2 a a a ) ( v1 v2 a a a a ) +* ( v1 v2 a a a ) +* +* where a denotes an element of the original matrix which is unchanged, +* vi denotes an element of the vector defining H(i), and ui an element +* of the vector defining G(i). +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + INTEGER I +* .. +* .. External Subroutines .. + EXTERNAL SGEMV, SLARFG, SSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) + $ RETURN +* + IF( M.GE.N ) THEN +* +* Reduce to upper bidiagonal form +* + DO 10 I = 1, NB +* +* Update A(i:m,i) +* + CALL SGEMV( 'No transpose', M-I+1, I-1, -ONE, A( I, 1 ), + $ LDA, Y( I, 1 ), LDY, ONE, A( I, I ), 1 ) + CALL SGEMV( 'No transpose', M-I+1, I-1, -ONE, X( I, 1 ), + $ LDX, A( 1, I ), 1, ONE, A( I, I ), 1 ) +* +* Generate reflection Q(i) to annihilate A(i+1:m,i) +* + CALL SLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1, + $ TAUQ( I ) ) + D( I ) = A( I, I ) + IF( I.LT.N ) THEN + A( I, I ) = ONE +* +* Compute Y(i+1:n,i) +* + CALL SGEMV( 'Transpose', M-I+1, N-I, ONE, A( I, I+1 ), + $ LDA, A( I, I ), 1, ZERO, Y( I+1, I ), 1 ) + CALL SGEMV( 'Transpose', M-I+1, I-1, ONE, A( I, 1 ), LDA, + $ A( I, I ), 1, ZERO, Y( 1, I ), 1 ) + CALL SGEMV( 'No transpose', N-I, I-1, -ONE, Y( I+1, 1 ), + $ LDY, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 ) + CALL SGEMV( 'Transpose', M-I+1, I-1, ONE, X( I, 1 ), LDX, + $ A( I, I ), 1, ZERO, Y( 1, I ), 1 ) + CALL SGEMV( 'Transpose', I-1, N-I, -ONE, A( 1, I+1 ), + $ LDA, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 ) + CALL SSCAL( N-I, TAUQ( I ), Y( I+1, I ), 1 ) +* +* Update A(i,i+1:n) +* + CALL SGEMV( 'No transpose', N-I, I, -ONE, Y( I+1, 1 ), + $ LDY, A( I, 1 ), LDA, ONE, A( I, I+1 ), LDA ) + CALL SGEMV( 'Transpose', I-1, N-I, -ONE, A( 1, I+1 ), + $ LDA, X( I, 1 ), LDX, ONE, A( I, I+1 ), LDA ) +* +* Generate reflection P(i) to annihilate A(i,i+2:n) +* + CALL SLARFG( N-I, A( I, I+1 ), A( I, MIN( I+2, N ) ), + $ LDA, TAUP( I ) ) + E( I ) = A( I, I+1 ) + A( I, I+1 ) = ONE +* +* Compute X(i+1:m,i) +* + CALL SGEMV( 'No transpose', M-I, N-I, ONE, A( I+1, I+1 ), + $ LDA, A( I, I+1 ), LDA, ZERO, X( I+1, I ), 1 ) + CALL SGEMV( 'Transpose', N-I, I, ONE, Y( I+1, 1 ), LDY, + $ A( I, I+1 ), LDA, ZERO, X( 1, I ), 1 ) + CALL SGEMV( 'No transpose', M-I, I, -ONE, A( I+1, 1 ), + $ LDA, X( 1, I ), 1, ONE, X( I+1, I ), 1 ) + CALL SGEMV( 'No transpose', I-1, N-I, ONE, A( 1, I+1 ), + $ LDA, A( I, I+1 ), LDA, ZERO, X( 1, I ), 1 ) + CALL SGEMV( 'No transpose', M-I, I-1, -ONE, X( I+1, 1 ), + $ LDX, X( 1, I ), 1, ONE, X( I+1, I ), 1 ) + CALL SSCAL( M-I, TAUP( I ), X( I+1, I ), 1 ) + END IF + 10 CONTINUE + ELSE +* +* Reduce to lower bidiagonal form +* + DO 20 I = 1, NB +* +* Update A(i,i:n) +* + CALL SGEMV( 'No transpose', N-I+1, I-1, -ONE, Y( I, 1 ), + $ LDY, A( I, 1 ), LDA, ONE, A( I, I ), LDA ) + CALL SGEMV( 'Transpose', I-1, N-I+1, -ONE, A( 1, I ), LDA, + $ X( I, 1 ), LDX, ONE, A( I, I ), LDA ) +* +* Generate reflection P(i) to annihilate A(i,i+1:n) +* + CALL SLARFG( N-I+1, A( I, I ), A( I, MIN( I+1, N ) ), LDA, + $ TAUP( I ) ) + D( I ) = A( I, I ) + IF( I.LT.M ) THEN + A( I, I ) = ONE +* +* Compute X(i+1:m,i) +* + CALL SGEMV( 'No transpose', M-I, N-I+1, ONE, A( I+1, I ), + $ LDA, A( I, I ), LDA, ZERO, X( I+1, I ), 1 ) + CALL SGEMV( 'Transpose', N-I+1, I-1, ONE, Y( I, 1 ), LDY, + $ A( I, I ), LDA, ZERO, X( 1, I ), 1 ) + CALL SGEMV( 'No transpose', M-I, I-1, -ONE, A( I+1, 1 ), + $ LDA, X( 1, I ), 1, ONE, X( I+1, I ), 1 ) + CALL SGEMV( 'No transpose', I-1, N-I+1, ONE, A( 1, I ), + $ LDA, A( I, I ), LDA, ZERO, X( 1, I ), 1 ) + CALL SGEMV( 'No transpose', M-I, I-1, -ONE, X( I+1, 1 ), + $ LDX, X( 1, I ), 1, ONE, X( I+1, I ), 1 ) + CALL SSCAL( M-I, TAUP( I ), X( I+1, I ), 1 ) +* +* Update A(i+1:m,i) +* + CALL SGEMV( 'No transpose', M-I, I-1, -ONE, A( I+1, 1 ), + $ LDA, Y( I, 1 ), LDY, ONE, A( I+1, I ), 1 ) + CALL SGEMV( 'No transpose', M-I, I, -ONE, X( I+1, 1 ), + $ LDX, A( 1, I ), 1, ONE, A( I+1, I ), 1 ) +* +* Generate reflection Q(i) to annihilate A(i+2:m,i) +* + CALL SLARFG( M-I, A( I+1, I ), A( MIN( I+2, M ), I ), 1, + $ TAUQ( I ) ) + E( I ) = A( I+1, I ) + A( I+1, I ) = ONE +* +* Compute Y(i+1:n,i) +* + CALL SGEMV( 'Transpose', M-I, N-I, ONE, A( I+1, I+1 ), + $ LDA, A( I+1, I ), 1, ZERO, Y( I+1, I ), 1 ) + CALL SGEMV( 'Transpose', M-I, I-1, ONE, A( I+1, 1 ), LDA, + $ A( I+1, I ), 1, ZERO, Y( 1, I ), 1 ) + CALL SGEMV( 'No transpose', N-I, I-1, -ONE, Y( I+1, 1 ), + $ LDY, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 ) + CALL SGEMV( 'Transpose', M-I, I, ONE, X( I+1, 1 ), LDX, + $ A( I+1, I ), 1, ZERO, Y( 1, I ), 1 ) + CALL SGEMV( 'Transpose', I, N-I, -ONE, A( 1, I+1 ), LDA, + $ Y( 1, I ), 1, ONE, Y( I+1, I ), 1 ) + CALL SSCAL( N-I, TAUQ( I ), Y( I+1, I ), 1 ) + END IF + 20 CONTINUE + END IF + RETURN +* +* End of SLABRD +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slacn2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slacn2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,214 @@ + SUBROUTINE SLACN2( N, V, X, ISGN, EST, KASE, ISAVE ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER KASE, N + REAL EST +* .. +* .. Array Arguments .. + INTEGER ISGN( * ), ISAVE( 3 ) + REAL V( * ), X( * ) +* .. +* +* Purpose +* ======= +* +* SLACN2 estimates the 1-norm of a square, real matrix A. +* Reverse communication is used for evaluating matrix-vector products. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix. N >= 1. +* +* V (workspace) REAL array, dimension (N) +* On the final return, V = A*W, where EST = norm(V)/norm(W) +* (W is not returned). +* +* X (input/output) REAL array, dimension (N) +* On an intermediate return, X should be overwritten by +* A * X, if KASE=1, +* A' * X, if KASE=2, +* and SLACN2 must be re-called with all the other parameters +* unchanged. +* +* ISGN (workspace) INTEGER array, dimension (N) +* +* EST (input/output) REAL +* On entry with KASE = 1 or 2 and ISAVE(1) = 3, EST should be +* unchanged from the previous call to SLACN2. +* On exit, EST is an estimate (a lower bound) for norm(A). +* +* KASE (input/output) INTEGER +* On the initial call to SLACN2, KASE should be 0. +* On an intermediate return, KASE will be 1 or 2, indicating +* whether X should be overwritten by A * X or A' * X. +* On the final return from SLACN2, KASE will again be 0. +* +* ISAVE (input/output) INTEGER array, dimension (3) +* ISAVE is used to save variables between calls to SLACN2 +* +* Further Details +* ======= ======= +* +* Contributed by Nick Higham, University of Manchester. +* Originally named SONEST, dated March 16, 1988. +* +* Reference: N.J. Higham, "FORTRAN codes for estimating the one-norm of +* a real or complex matrix, with applications to condition estimation", +* ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988. +* +* This is a thread safe version of SLACON, which uses the array ISAVE +* in place of a SAVE statement, as follows: +* +* SLACON SLACN2 +* JUMP ISAVE(1) +* J ISAVE(2) +* ITER ISAVE(3) +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + REAL ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, JLAST + REAL ALTSGN, ESTOLD, TEMP +* .. +* .. External Functions .. + INTEGER ISAMAX + REAL SASUM + EXTERNAL ISAMAX, SASUM +* .. +* .. External Subroutines .. + EXTERNAL SCOPY +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, NINT, REAL, SIGN +* .. +* .. Executable Statements .. +* + IF( KASE.EQ.0 ) THEN + DO 10 I = 1, N + X( I ) = ONE / REAL( N ) + 10 CONTINUE + KASE = 1 + ISAVE( 1 ) = 1 + RETURN + END IF +* + GO TO ( 20, 40, 70, 110, 140 )ISAVE( 1 ) +* +* ................ ENTRY (ISAVE( 1 ) = 1) +* FIRST ITERATION. X HAS BEEN OVERWRITTEN BY A*X. +* + 20 CONTINUE + IF( N.EQ.1 ) THEN + V( 1 ) = X( 1 ) + EST = ABS( V( 1 ) ) +* ... QUIT + GO TO 150 + END IF + EST = SASUM( N, X, 1 ) +* + DO 30 I = 1, N + X( I ) = SIGN( ONE, X( I ) ) + ISGN( I ) = NINT( X( I ) ) + 30 CONTINUE + KASE = 2 + ISAVE( 1 ) = 2 + RETURN +* +* ................ ENTRY (ISAVE( 1 ) = 2) +* FIRST ITERATION. X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X. +* + 40 CONTINUE + ISAVE( 2 ) = ISAMAX( N, X, 1 ) + ISAVE( 3 ) = 2 +* +* MAIN LOOP - ITERATIONS 2,3,...,ITMAX. +* + 50 CONTINUE + DO 60 I = 1, N + X( I ) = ZERO + 60 CONTINUE + X( ISAVE( 2 ) ) = ONE + KASE = 1 + ISAVE( 1 ) = 3 + RETURN +* +* ................ ENTRY (ISAVE( 1 ) = 3) +* X HAS BEEN OVERWRITTEN BY A*X. +* + 70 CONTINUE + CALL SCOPY( N, X, 1, V, 1 ) + ESTOLD = EST + EST = SASUM( N, V, 1 ) + DO 80 I = 1, N + IF( NINT( SIGN( ONE, X( I ) ) ).NE.ISGN( I ) ) + $ GO TO 90 + 80 CONTINUE +* REPEATED SIGN VECTOR DETECTED, HENCE ALGORITHM HAS CONVERGED. + GO TO 120 +* + 90 CONTINUE +* TEST FOR CYCLING. + IF( EST.LE.ESTOLD ) + $ GO TO 120 +* + DO 100 I = 1, N + X( I ) = SIGN( ONE, X( I ) ) + ISGN( I ) = NINT( X( I ) ) + 100 CONTINUE + KASE = 2 + ISAVE( 1 ) = 4 + RETURN +* +* ................ ENTRY (ISAVE( 1 ) = 4) +* X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X. +* + 110 CONTINUE + JLAST = ISAVE( 2 ) + ISAVE( 2 ) = ISAMAX( N, X, 1 ) + IF( ( X( JLAST ).NE.ABS( X( ISAVE( 2 ) ) ) ) .AND. + $ ( ISAVE( 3 ).LT.ITMAX ) ) THEN + ISAVE( 3 ) = ISAVE( 3 ) + 1 + GO TO 50 + END IF +* +* ITERATION COMPLETE. FINAL STAGE. +* + 120 CONTINUE + ALTSGN = ONE + DO 130 I = 1, N + X( I ) = ALTSGN*( ONE+REAL( I-1 ) / REAL( N-1 ) ) + ALTSGN = -ALTSGN + 130 CONTINUE + KASE = 1 + ISAVE( 1 ) = 5 + RETURN +* +* ................ ENTRY (ISAVE( 1 ) = 5) +* X HAS BEEN OVERWRITTEN BY A*X. +* + 140 CONTINUE + TEMP = TWO*( SASUM( N, X, 1 ) / REAL( 3*N ) ) + IF( TEMP.GT.EST ) THEN + CALL SCOPY( N, X, 1, V, 1 ) + EST = TEMP + END IF +* + 150 CONTINUE + KASE = 0 + RETURN +* +* End of SLACN2 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slacon.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slacon.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,205 @@ + SUBROUTINE SLACON( N, V, X, ISGN, EST, KASE ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER KASE, N + REAL EST +* .. +* .. Array Arguments .. + INTEGER ISGN( * ) + REAL V( * ), X( * ) +* .. +* +* Purpose +* ======= +* +* SLACON estimates the 1-norm of a square, real matrix A. +* Reverse communication is used for evaluating matrix-vector products. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix. N >= 1. +* +* V (workspace) REAL array, dimension (N) +* On the final return, V = A*W, where EST = norm(V)/norm(W) +* (W is not returned). +* +* X (input/output) REAL array, dimension (N) +* On an intermediate return, X should be overwritten by +* A * X, if KASE=1, +* A' * X, if KASE=2, +* and SLACON must be re-called with all the other parameters +* unchanged. +* +* ISGN (workspace) INTEGER array, dimension (N) +* +* EST (input/output) REAL +* On entry with KASE = 1 or 2 and JUMP = 3, EST should be +* unchanged from the previous call to SLACON. +* On exit, EST is an estimate (a lower bound) for norm(A). +* +* KASE (input/output) INTEGER +* On the initial call to SLACON, KASE should be 0. +* On an intermediate return, KASE will be 1 or 2, indicating +* whether X should be overwritten by A * X or A' * X. +* On the final return from SLACON, KASE will again be 0. +* +* Further Details +* ======= ======= +* +* Contributed by Nick Higham, University of Manchester. +* Originally named SONEST, dated March 16, 1988. +* +* Reference: N.J. Higham, "FORTRAN codes for estimating the one-norm of +* a real or complex matrix, with applications to condition estimation", +* ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + REAL ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, ITER, J, JLAST, JUMP + REAL ALTSGN, ESTOLD, TEMP +* .. +* .. External Functions .. + INTEGER ISAMAX + REAL SASUM + EXTERNAL ISAMAX, SASUM +* .. +* .. External Subroutines .. + EXTERNAL SCOPY +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, NINT, REAL, SIGN +* .. +* .. Save statement .. + SAVE +* .. +* .. Executable Statements .. +* + IF( KASE.EQ.0 ) THEN + DO 10 I = 1, N + X( I ) = ONE / REAL( N ) + 10 CONTINUE + KASE = 1 + JUMP = 1 + RETURN + END IF +* + GO TO ( 20, 40, 70, 110, 140 )JUMP +* +* ................ ENTRY (JUMP = 1) +* FIRST ITERATION. X HAS BEEN OVERWRITTEN BY A*X. +* + 20 CONTINUE + IF( N.EQ.1 ) THEN + V( 1 ) = X( 1 ) + EST = ABS( V( 1 ) ) +* ... QUIT + GO TO 150 + END IF + EST = SASUM( N, X, 1 ) +* + DO 30 I = 1, N + X( I ) = SIGN( ONE, X( I ) ) + ISGN( I ) = NINT( X( I ) ) + 30 CONTINUE + KASE = 2 + JUMP = 2 + RETURN +* +* ................ ENTRY (JUMP = 2) +* FIRST ITERATION. X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X. +* + 40 CONTINUE + J = ISAMAX( N, X, 1 ) + ITER = 2 +* +* MAIN LOOP - ITERATIONS 2,3,...,ITMAX. +* + 50 CONTINUE + DO 60 I = 1, N + X( I ) = ZERO + 60 CONTINUE + X( J ) = ONE + KASE = 1 + JUMP = 3 + RETURN +* +* ................ ENTRY (JUMP = 3) +* X HAS BEEN OVERWRITTEN BY A*X. +* + 70 CONTINUE + CALL SCOPY( N, X, 1, V, 1 ) + ESTOLD = EST + EST = SASUM( N, V, 1 ) + DO 80 I = 1, N + IF( NINT( SIGN( ONE, X( I ) ) ).NE.ISGN( I ) ) + $ GO TO 90 + 80 CONTINUE +* REPEATED SIGN VECTOR DETECTED, HENCE ALGORITHM HAS CONVERGED. + GO TO 120 +* + 90 CONTINUE +* TEST FOR CYCLING. + IF( EST.LE.ESTOLD ) + $ GO TO 120 +* + DO 100 I = 1, N + X( I ) = SIGN( ONE, X( I ) ) + ISGN( I ) = NINT( X( I ) ) + 100 CONTINUE + KASE = 2 + JUMP = 4 + RETURN +* +* ................ ENTRY (JUMP = 4) +* X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X. +* + 110 CONTINUE + JLAST = J + J = ISAMAX( N, X, 1 ) + IF( ( X( JLAST ).NE.ABS( X( J ) ) ) .AND. ( ITER.LT.ITMAX ) ) THEN + ITER = ITER + 1 + GO TO 50 + END IF +* +* ITERATION COMPLETE. FINAL STAGE. +* + 120 CONTINUE + ALTSGN = ONE + DO 130 I = 1, N + X( I ) = ALTSGN*( ONE+REAL( I-1 ) / REAL( N-1 ) ) + ALTSGN = -ALTSGN + 130 CONTINUE + KASE = 1 + JUMP = 5 + RETURN +* +* ................ ENTRY (JUMP = 5) +* X HAS BEEN OVERWRITTEN BY A*X. +* + 140 CONTINUE + TEMP = TWO*( SASUM( N, X, 1 ) / REAL( 3*N ) ) + IF( TEMP.GT.EST ) THEN + CALL SCOPY( N, X, 1, V, 1 ) + EST = TEMP + END IF +* + 150 CONTINUE + KASE = 0 + RETURN +* +* End of SLACON +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slacpy.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slacpy.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,87 @@ + SUBROUTINE SLACPY( UPLO, M, N, A, LDA, B, LDB ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER LDA, LDB, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* SLACPY copies all or part of a two-dimensional matrix A to another +* matrix B. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies the part of the matrix A to be copied to B. +* = 'U': Upper triangular part +* = 'L': Lower triangular part +* Otherwise: All of the matrix A +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input) REAL array, dimension (LDA,N) +* The m by n matrix A. If UPLO = 'U', only the upper triangle +* or trapezoid is accessed; if UPLO = 'L', only the lower +* triangle or trapezoid is accessed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (output) REAL array, dimension (LDB,N) +* On exit, B = A in the locations specified by UPLO. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,M). +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = 1, MIN( J, M ) + B( I, J ) = A( I, J ) + 10 CONTINUE + 20 CONTINUE + ELSE IF( LSAME( UPLO, 'L' ) ) THEN + DO 40 J = 1, N + DO 30 I = J, M + B( I, J ) = A( I, J ) + 30 CONTINUE + 40 CONTINUE + ELSE + DO 60 J = 1, N + DO 50 I = 1, M + B( I, J ) = A( I, J ) + 50 CONTINUE + 60 CONTINUE + END IF + RETURN +* +* End of SLACPY +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sladiv.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sladiv.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,62 @@ + SUBROUTINE SLADIV( A, B, C, D, P, Q ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + REAL A, B, C, D, P, Q +* .. +* +* Purpose +* ======= +* +* SLADIV performs complex division in real arithmetic +* +* a + i*b +* p + i*q = --------- +* c + i*d +* +* The algorithm is due to Robert L. Smith and can be found +* in D. Knuth, The art of Computer Programming, Vol.2, p.195 +* +* Arguments +* ========= +* +* A (input) REAL +* B (input) REAL +* C (input) REAL +* D (input) REAL +* The scalars a, b, c, and d in the above expression. +* +* P (output) REAL +* Q (output) REAL +* The scalars p and q in the above expression. +* +* ===================================================================== +* +* .. Local Scalars .. + REAL E, F +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* + IF( ABS( D ).LT.ABS( C ) ) THEN + E = D / C + F = C + D*E + P = ( A+B*E ) / F + Q = ( B-A*E ) / F + ELSE + E = C / D + F = D + C*E + P = ( B+A*E ) / F + Q = ( -A+B*E ) / F + END IF +* + RETURN +* +* End of SLADIV +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slae2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slae2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,123 @@ + SUBROUTINE SLAE2( A, B, C, RT1, RT2 ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + REAL A, B, C, RT1, RT2 +* .. +* +* Purpose +* ======= +* +* SLAE2 computes the eigenvalues of a 2-by-2 symmetric matrix +* [ A B ] +* [ B C ]. +* On return, RT1 is the eigenvalue of larger absolute value, and RT2 +* is the eigenvalue of smaller absolute value. +* +* Arguments +* ========= +* +* A (input) REAL +* The (1,1) element of the 2-by-2 matrix. +* +* B (input) REAL +* The (1,2) and (2,1) elements of the 2-by-2 matrix. +* +* C (input) REAL +* The (2,2) element of the 2-by-2 matrix. +* +* RT1 (output) REAL +* The eigenvalue of larger absolute value. +* +* RT2 (output) REAL +* The eigenvalue of smaller absolute value. +* +* Further Details +* =============== +* +* RT1 is accurate to a few ulps barring over/underflow. +* +* RT2 may be inaccurate if there is massive cancellation in the +* determinant A*C-B*B; higher precision or correctly rounded or +* correctly truncated arithmetic would be needed to compute RT2 +* accurately in all cases. +* +* Overflow is possible only if RT1 is within a factor of 5 of overflow. +* Underflow is harmless if the input data is 0 or exceeds +* underflow_threshold / macheps. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E0 ) + REAL TWO + PARAMETER ( TWO = 2.0E0 ) + REAL ZERO + PARAMETER ( ZERO = 0.0E0 ) + REAL HALF + PARAMETER ( HALF = 0.5E0 ) +* .. +* .. Local Scalars .. + REAL AB, ACMN, ACMX, ADF, DF, RT, SM, TB +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. +* +* Compute the eigenvalues +* + SM = A + C + DF = A - C + ADF = ABS( DF ) + TB = B + B + AB = ABS( TB ) + IF( ABS( A ).GT.ABS( C ) ) THEN + ACMX = A + ACMN = C + ELSE + ACMX = C + ACMN = A + END IF + IF( ADF.GT.AB ) THEN + RT = ADF*SQRT( ONE+( AB / ADF )**2 ) + ELSE IF( ADF.LT.AB ) THEN + RT = AB*SQRT( ONE+( ADF / AB )**2 ) + ELSE +* +* Includes case AB=ADF=0 +* + RT = AB*SQRT( TWO ) + END IF + IF( SM.LT.ZERO ) THEN + RT1 = HALF*( SM-RT ) +* +* Order of execution important. +* To get fully accurate smaller eigenvalue, +* next line needs to be executed in higher precision. +* + RT2 = ( ACMX / RT1 )*ACMN - ( B / RT1 )*B + ELSE IF( SM.GT.ZERO ) THEN + RT1 = HALF*( SM+RT ) +* +* Order of execution important. +* To get fully accurate smaller eigenvalue, +* next line needs to be executed in higher precision. +* + RT2 = ( ACMX / RT1 )*ACMN - ( B / RT1 )*B + ELSE +* +* Includes case RT1 = RT2 = 0 +* + RT1 = HALF*RT + RT2 = -HALF*RT + END IF + RETURN +* +* End of SLAE2 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slaed6.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slaed6.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,327 @@ + SUBROUTINE SLAED6( KNITER, ORGATI, RHO, D, Z, FINIT, TAU, INFO ) +* +* -- LAPACK routine (version 3.1.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* February 2007 +* +* .. Scalar Arguments .. + LOGICAL ORGATI + INTEGER INFO, KNITER + REAL FINIT, RHO, TAU +* .. +* .. Array Arguments .. + REAL D( 3 ), Z( 3 ) +* .. +* +* Purpose +* ======= +* +* SLAED6 computes the positive or negative root (closest to the origin) +* of +* z(1) z(2) z(3) +* f(x) = rho + --------- + ---------- + --------- +* d(1)-x d(2)-x d(3)-x +* +* It is assumed that +* +* if ORGATI = .true. the root is between d(2) and d(3); +* otherwise it is between d(1) and d(2) +* +* This routine will be called by SLAED4 when necessary. In most cases, +* the root sought is the smallest in magnitude, though it might not be +* in some extremely rare situations. +* +* Arguments +* ========= +* +* KNITER (input) INTEGER +* Refer to SLAED4 for its significance. +* +* ORGATI (input) LOGICAL +* If ORGATI is true, the needed root is between d(2) and +* d(3); otherwise it is between d(1) and d(2). See +* SLAED4 for further details. +* +* RHO (input) REAL +* Refer to the equation f(x) above. +* +* D (input) REAL array, dimension (3) +* D satisfies d(1) < d(2) < d(3). +* +* Z (input) REAL array, dimension (3) +* Each of the elements in z must be positive. +* +* FINIT (input) REAL +* The value of f at 0. It is more accurate than the one +* evaluated inside this routine (if someone wants to do +* so). +* +* TAU (output) REAL +* The root of the equation f(x). +* +* INFO (output) INTEGER +* = 0: successful exit +* > 0: if INFO = 1, failure to converge +* +* Further Details +* =============== +* +* 30/06/99: Based on contributions by +* Ren-Cang Li, Computer Science Division, University of California +* at Berkeley, USA +* +* 10/02/03: This version has a few statements commented out for thread safety +* (machine parameters are computed on each entry). SJH. +* +* 05/10/06: Modified from a new version of Ren-Cang Li, use +* Gragg-Thornton-Warner cubic convergent scheme for better stability. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER MAXIT + PARAMETER ( MAXIT = 40 ) + REAL ZERO, ONE, TWO, THREE, FOUR, EIGHT + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0, + $ THREE = 3.0E0, FOUR = 4.0E0, EIGHT = 8.0E0 ) +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. Local Arrays .. + REAL DSCALE( 3 ), ZSCALE( 3 ) +* .. +* .. Local Scalars .. + LOGICAL SCALE + INTEGER I, ITER, NITER + REAL A, B, BASE, C, DDF, DF, EPS, ERRETM, ETA, F, + $ FC, SCLFAC, SCLINV, SMALL1, SMALL2, SMINV1, + $ SMINV2, TEMP, TEMP1, TEMP2, TEMP3, TEMP4, + $ LBD, UBD +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, INT, LOG, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + INFO = 0 +* + IF( ORGATI ) THEN + LBD = D(2) + UBD = D(3) + ELSE + LBD = D(1) + UBD = D(2) + END IF + IF( FINIT .LT. ZERO )THEN + LBD = ZERO + ELSE + UBD = ZERO + END IF +* + NITER = 1 + TAU = ZERO + IF( KNITER.EQ.2 ) THEN + IF( ORGATI ) THEN + TEMP = ( D( 3 )-D( 2 ) ) / TWO + C = RHO + Z( 1 ) / ( ( D( 1 )-D( 2 ) )-TEMP ) + A = C*( D( 2 )+D( 3 ) ) + Z( 2 ) + Z( 3 ) + B = C*D( 2 )*D( 3 ) + Z( 2 )*D( 3 ) + Z( 3 )*D( 2 ) + ELSE + TEMP = ( D( 1 )-D( 2 ) ) / TWO + C = RHO + Z( 3 ) / ( ( D( 3 )-D( 2 ) )-TEMP ) + A = C*( D( 1 )+D( 2 ) ) + Z( 1 ) + Z( 2 ) + B = C*D( 1 )*D( 2 ) + Z( 1 )*D( 2 ) + Z( 2 )*D( 1 ) + END IF + TEMP = MAX( ABS( A ), ABS( B ), ABS( C ) ) + A = A / TEMP + B = B / TEMP + C = C / TEMP + IF( C.EQ.ZERO ) THEN + TAU = B / A + ELSE IF( A.LE.ZERO ) THEN + TAU = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + TAU = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF + IF( TAU .LT. LBD .OR. TAU .GT. UBD ) + $ TAU = ( LBD+UBD )/TWO + IF( D(1).EQ.TAU .OR. D(2).EQ.TAU .OR. D(3).EQ.TAU ) THEN + TAU = ZERO + ELSE + TEMP = FINIT + TAU*Z(1)/( D(1)*( D( 1 )-TAU ) ) + + $ TAU*Z(2)/( D(2)*( D( 2 )-TAU ) ) + + $ TAU*Z(3)/( D(3)*( D( 3 )-TAU ) ) + IF( TEMP .LE. ZERO )THEN + LBD = TAU + ELSE + UBD = TAU + END IF + IF( ABS( FINIT ).LE.ABS( TEMP ) ) + $ TAU = ZERO + END IF + END IF +* +* get machine parameters for possible scaling to avoid overflow +* +* modified by Sven: parameters SMALL1, SMINV1, SMALL2, +* SMINV2, EPS are not SAVEd anymore between one call to the +* others but recomputed at each call +* + EPS = SLAMCH( 'Epsilon' ) + BASE = SLAMCH( 'Base' ) + SMALL1 = BASE**( INT( LOG( SLAMCH( 'SafMin' ) ) / LOG( BASE ) / + $ THREE ) ) + SMINV1 = ONE / SMALL1 + SMALL2 = SMALL1*SMALL1 + SMINV2 = SMINV1*SMINV1 +* +* Determine if scaling of inputs necessary to avoid overflow +* when computing 1/TEMP**3 +* + IF( ORGATI ) THEN + TEMP = MIN( ABS( D( 2 )-TAU ), ABS( D( 3 )-TAU ) ) + ELSE + TEMP = MIN( ABS( D( 1 )-TAU ), ABS( D( 2 )-TAU ) ) + END IF + SCALE = .FALSE. + IF( TEMP.LE.SMALL1 ) THEN + SCALE = .TRUE. + IF( TEMP.LE.SMALL2 ) THEN +* +* Scale up by power of radix nearest 1/SAFMIN**(2/3) +* + SCLFAC = SMINV2 + SCLINV = SMALL2 + ELSE +* +* Scale up by power of radix nearest 1/SAFMIN**(1/3) +* + SCLFAC = SMINV1 + SCLINV = SMALL1 + END IF +* +* Scaling up safe because D, Z, TAU scaled elsewhere to be O(1) +* + DO 10 I = 1, 3 + DSCALE( I ) = D( I )*SCLFAC + ZSCALE( I ) = Z( I )*SCLFAC + 10 CONTINUE + TAU = TAU*SCLFAC + LBD = LBD*SCLFAC + UBD = UBD*SCLFAC + ELSE +* +* Copy D and Z to DSCALE and ZSCALE +* + DO 20 I = 1, 3 + DSCALE( I ) = D( I ) + ZSCALE( I ) = Z( I ) + 20 CONTINUE + END IF +* + FC = ZERO + DF = ZERO + DDF = ZERO + DO 30 I = 1, 3 + TEMP = ONE / ( DSCALE( I )-TAU ) + TEMP1 = ZSCALE( I )*TEMP + TEMP2 = TEMP1*TEMP + TEMP3 = TEMP2*TEMP + FC = FC + TEMP1 / DSCALE( I ) + DF = DF + TEMP2 + DDF = DDF + TEMP3 + 30 CONTINUE + F = FINIT + TAU*FC +* + IF( ABS( F ).LE.ZERO ) + $ GO TO 60 + IF( F .LE. ZERO )THEN + LBD = TAU + ELSE + UBD = TAU + END IF +* +* Iteration begins -- Use Gragg-Thornton-Warner cubic convergent +* scheme +* +* It is not hard to see that +* +* 1) Iterations will go up monotonically +* if FINIT < 0; +* +* 2) Iterations will go down monotonically +* if FINIT > 0. +* + ITER = NITER + 1 +* + DO 50 NITER = ITER, MAXIT +* + IF( ORGATI ) THEN + TEMP1 = DSCALE( 2 ) - TAU + TEMP2 = DSCALE( 3 ) - TAU + ELSE + TEMP1 = DSCALE( 1 ) - TAU + TEMP2 = DSCALE( 2 ) - TAU + END IF + A = ( TEMP1+TEMP2 )*F - TEMP1*TEMP2*DF + B = TEMP1*TEMP2*F + C = F - ( TEMP1+TEMP2 )*DF + TEMP1*TEMP2*DDF + TEMP = MAX( ABS( A ), ABS( B ), ABS( C ) ) + A = A / TEMP + B = B / TEMP + C = C / TEMP + IF( C.EQ.ZERO ) THEN + ETA = B / A + ELSE IF( A.LE.ZERO ) THEN + ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF + IF( F*ETA.GE.ZERO ) THEN + ETA = -F / DF + END IF +* + TAU = TAU + ETA + IF( TAU .LT. LBD .OR. TAU .GT. UBD ) + $ TAU = ( LBD + UBD )/TWO +* + FC = ZERO + ERRETM = ZERO + DF = ZERO + DDF = ZERO + DO 40 I = 1, 3 + TEMP = ONE / ( DSCALE( I )-TAU ) + TEMP1 = ZSCALE( I )*TEMP + TEMP2 = TEMP1*TEMP + TEMP3 = TEMP2*TEMP + TEMP4 = TEMP1 / DSCALE( I ) + FC = FC + TEMP4 + ERRETM = ERRETM + ABS( TEMP4 ) + DF = DF + TEMP2 + DDF = DDF + TEMP3 + 40 CONTINUE + F = FINIT + TAU*FC + ERRETM = EIGHT*( ABS( FINIT )+ABS( TAU )*ERRETM ) + + $ ABS( TAU )*DF + IF( ABS( F ).LE.EPS*ERRETM ) + $ GO TO 60 + IF( F .LE. ZERO )THEN + LBD = TAU + ELSE + UBD = TAU + END IF + 50 CONTINUE + INFO = 1 + 60 CONTINUE +* +* Undo scaling +* + IF( SCALE ) + $ TAU = TAU*SCLINV + RETURN +* +* End of SLAED6 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slaev2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slaev2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,169 @@ + SUBROUTINE SLAEV2( A, B, C, RT1, RT2, CS1, SN1 ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + REAL A, B, C, CS1, RT1, RT2, SN1 +* .. +* +* Purpose +* ======= +* +* SLAEV2 computes the eigendecomposition of a 2-by-2 symmetric matrix +* [ A B ] +* [ B C ]. +* On return, RT1 is the eigenvalue of larger absolute value, RT2 is the +* eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right +* eigenvector for RT1, giving the decomposition +* +* [ CS1 SN1 ] [ A B ] [ CS1 -SN1 ] = [ RT1 0 ] +* [-SN1 CS1 ] [ B C ] [ SN1 CS1 ] [ 0 RT2 ]. +* +* Arguments +* ========= +* +* A (input) REAL +* The (1,1) element of the 2-by-2 matrix. +* +* B (input) REAL +* The (1,2) element and the conjugate of the (2,1) element of +* the 2-by-2 matrix. +* +* C (input) REAL +* The (2,2) element of the 2-by-2 matrix. +* +* RT1 (output) REAL +* The eigenvalue of larger absolute value. +* +* RT2 (output) REAL +* The eigenvalue of smaller absolute value. +* +* CS1 (output) REAL +* SN1 (output) REAL +* The vector (CS1, SN1) is a unit right eigenvector for RT1. +* +* Further Details +* =============== +* +* RT1 is accurate to a few ulps barring over/underflow. +* +* RT2 may be inaccurate if there is massive cancellation in the +* determinant A*C-B*B; higher precision or correctly rounded or +* correctly truncated arithmetic would be needed to compute RT2 +* accurately in all cases. +* +* CS1 and SN1 are accurate to a few ulps barring over/underflow. +* +* Overflow is possible only if RT1 is within a factor of 5 of overflow. +* Underflow is harmless if the input data is 0 or exceeds +* underflow_threshold / macheps. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E0 ) + REAL TWO + PARAMETER ( TWO = 2.0E0 ) + REAL ZERO + PARAMETER ( ZERO = 0.0E0 ) + REAL HALF + PARAMETER ( HALF = 0.5E0 ) +* .. +* .. Local Scalars .. + INTEGER SGN1, SGN2 + REAL AB, ACMN, ACMX, ACS, ADF, CS, CT, DF, RT, SM, + $ TB, TN +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. +* +* Compute the eigenvalues +* + SM = A + C + DF = A - C + ADF = ABS( DF ) + TB = B + B + AB = ABS( TB ) + IF( ABS( A ).GT.ABS( C ) ) THEN + ACMX = A + ACMN = C + ELSE + ACMX = C + ACMN = A + END IF + IF( ADF.GT.AB ) THEN + RT = ADF*SQRT( ONE+( AB / ADF )**2 ) + ELSE IF( ADF.LT.AB ) THEN + RT = AB*SQRT( ONE+( ADF / AB )**2 ) + ELSE +* +* Includes case AB=ADF=0 +* + RT = AB*SQRT( TWO ) + END IF + IF( SM.LT.ZERO ) THEN + RT1 = HALF*( SM-RT ) + SGN1 = -1 +* +* Order of execution important. +* To get fully accurate smaller eigenvalue, +* next line needs to be executed in higher precision. +* + RT2 = ( ACMX / RT1 )*ACMN - ( B / RT1 )*B + ELSE IF( SM.GT.ZERO ) THEN + RT1 = HALF*( SM+RT ) + SGN1 = 1 +* +* Order of execution important. +* To get fully accurate smaller eigenvalue, +* next line needs to be executed in higher precision. +* + RT2 = ( ACMX / RT1 )*ACMN - ( B / RT1 )*B + ELSE +* +* Includes case RT1 = RT2 = 0 +* + RT1 = HALF*RT + RT2 = -HALF*RT + SGN1 = 1 + END IF +* +* Compute the eigenvector +* + IF( DF.GE.ZERO ) THEN + CS = DF + RT + SGN2 = 1 + ELSE + CS = DF - RT + SGN2 = -1 + END IF + ACS = ABS( CS ) + IF( ACS.GT.AB ) THEN + CT = -TB / CS + SN1 = ONE / SQRT( ONE+CT*CT ) + CS1 = CT*SN1 + ELSE + IF( AB.EQ.ZERO ) THEN + CS1 = ONE + SN1 = ZERO + ELSE + TN = -CS / TB + CS1 = ONE / SQRT( ONE+TN*TN ) + SN1 = TN*CS1 + END IF + END IF + IF( SGN1.EQ.SGN2 ) THEN + TN = CS1 + CS1 = -SN1 + SN1 = TN + END IF + RETURN +* +* End of SLAEV2 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slaexc.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slaexc.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,353 @@ + SUBROUTINE SLAEXC( WANTQ, N, T, LDT, Q, LDQ, J1, N1, N2, WORK, + $ INFO ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + LOGICAL WANTQ + INTEGER INFO, J1, LDQ, LDT, N, N1, N2 +* .. +* .. Array Arguments .. + REAL Q( LDQ, * ), T( LDT, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SLAEXC swaps adjacent diagonal blocks T11 and T22 of order 1 or 2 in +* an upper quasi-triangular matrix T by an orthogonal similarity +* transformation. +* +* T must be in Schur canonical form, that is, block upper triangular +* with 1-by-1 and 2-by-2 diagonal blocks; each 2-by-2 diagonal block +* has its diagonal elemnts equal and its off-diagonal elements of +* opposite sign. +* +* Arguments +* ========= +* +* WANTQ (input) LOGICAL +* = .TRUE. : accumulate the transformation in the matrix Q; +* = .FALSE.: do not accumulate the transformation. +* +* N (input) INTEGER +* The order of the matrix T. N >= 0. +* +* T (input/output) REAL array, dimension (LDT,N) +* On entry, the upper quasi-triangular matrix T, in Schur +* canonical form. +* On exit, the updated matrix T, again in Schur canonical form. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= max(1,N). +* +* Q (input/output) REAL array, dimension (LDQ,N) +* On entry, if WANTQ is .TRUE., the orthogonal matrix Q. +* On exit, if WANTQ is .TRUE., the updated matrix Q. +* If WANTQ is .FALSE., Q is not referenced. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. +* LDQ >= 1; and if WANTQ is .TRUE., LDQ >= N. +* +* J1 (input) INTEGER +* The index of the first row of the first block T11. +* +* N1 (input) INTEGER +* The order of the first block T11. N1 = 0, 1 or 2. +* +* N2 (input) INTEGER +* The order of the second block T22. N2 = 0, 1 or 2. +* +* WORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* = 1: the transformed matrix T would be too far from Schur +* form; the blocks are not swapped and T and Q are +* unchanged. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL TEN + PARAMETER ( TEN = 1.0E+1 ) + INTEGER LDD, LDX + PARAMETER ( LDD = 4, LDX = 2 ) +* .. +* .. Local Scalars .. + INTEGER IERR, J2, J3, J4, K, ND + REAL CS, DNORM, EPS, SCALE, SMLNUM, SN, T11, T22, + $ T33, TAU, TAU1, TAU2, TEMP, THRESH, WI1, WI2, + $ WR1, WR2, XNORM +* .. +* .. Local Arrays .. + REAL D( LDD, 4 ), U( 3 ), U1( 3 ), U2( 3 ), + $ X( LDX, 2 ) +* .. +* .. External Functions .. + REAL SLAMCH, SLANGE + EXTERNAL SLAMCH, SLANGE +* .. +* .. External Subroutines .. + EXTERNAL SLACPY, SLANV2, SLARFG, SLARFX, SLARTG, SLASY2, + $ SROT +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Quick return if possible +* + IF( N.EQ.0 .OR. N1.EQ.0 .OR. N2.EQ.0 ) + $ RETURN + IF( J1+N1.GT.N ) + $ RETURN +* + J2 = J1 + 1 + J3 = J1 + 2 + J4 = J1 + 3 +* + IF( N1.EQ.1 .AND. N2.EQ.1 ) THEN +* +* Swap two 1-by-1 blocks. +* + T11 = T( J1, J1 ) + T22 = T( J2, J2 ) +* +* Determine the transformation to perform the interchange. +* + CALL SLARTG( T( J1, J2 ), T22-T11, CS, SN, TEMP ) +* +* Apply transformation to the matrix T. +* + IF( J3.LE.N ) + $ CALL SROT( N-J1-1, T( J1, J3 ), LDT, T( J2, J3 ), LDT, CS, + $ SN ) + CALL SROT( J1-1, T( 1, J1 ), 1, T( 1, J2 ), 1, CS, SN ) +* + T( J1, J1 ) = T22 + T( J2, J2 ) = T11 +* + IF( WANTQ ) THEN +* +* Accumulate transformation in the matrix Q. +* + CALL SROT( N, Q( 1, J1 ), 1, Q( 1, J2 ), 1, CS, SN ) + END IF +* + ELSE +* +* Swapping involves at least one 2-by-2 block. +* +* Copy the diagonal block of order N1+N2 to the local array D +* and compute its norm. +* + ND = N1 + N2 + CALL SLACPY( 'Full', ND, ND, T( J1, J1 ), LDT, D, LDD ) + DNORM = SLANGE( 'Max', ND, ND, D, LDD, WORK ) +* +* Compute machine-dependent threshold for test for accepting +* swap. +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) / EPS + THRESH = MAX( TEN*EPS*DNORM, SMLNUM ) +* +* Solve T11*X - X*T22 = scale*T12 for X. +* + CALL SLASY2( .FALSE., .FALSE., -1, N1, N2, D, LDD, + $ D( N1+1, N1+1 ), LDD, D( 1, N1+1 ), LDD, SCALE, X, + $ LDX, XNORM, IERR ) +* +* Swap the adjacent diagonal blocks. +* + K = N1 + N1 + N2 - 3 + GO TO ( 10, 20, 30 )K +* + 10 CONTINUE +* +* N1 = 1, N2 = 2: generate elementary reflector H so that: +* +* ( scale, X11, X12 ) H = ( 0, 0, * ) +* + U( 1 ) = SCALE + U( 2 ) = X( 1, 1 ) + U( 3 ) = X( 1, 2 ) + CALL SLARFG( 3, U( 3 ), U, 1, TAU ) + U( 3 ) = ONE + T11 = T( J1, J1 ) +* +* Perform swap provisionally on diagonal block in D. +* + CALL SLARFX( 'L', 3, 3, U, TAU, D, LDD, WORK ) + CALL SLARFX( 'R', 3, 3, U, TAU, D, LDD, WORK ) +* +* Test whether to reject swap. +* + IF( MAX( ABS( D( 3, 1 ) ), ABS( D( 3, 2 ) ), ABS( D( 3, + $ 3 )-T11 ) ).GT.THRESH )GO TO 50 +* +* Accept swap: apply transformation to the entire matrix T. +* + CALL SLARFX( 'L', 3, N-J1+1, U, TAU, T( J1, J1 ), LDT, WORK ) + CALL SLARFX( 'R', J2, 3, U, TAU, T( 1, J1 ), LDT, WORK ) +* + T( J3, J1 ) = ZERO + T( J3, J2 ) = ZERO + T( J3, J3 ) = T11 +* + IF( WANTQ ) THEN +* +* Accumulate transformation in the matrix Q. +* + CALL SLARFX( 'R', N, 3, U, TAU, Q( 1, J1 ), LDQ, WORK ) + END IF + GO TO 40 +* + 20 CONTINUE +* +* N1 = 2, N2 = 1: generate elementary reflector H so that: +* +* H ( -X11 ) = ( * ) +* ( -X21 ) = ( 0 ) +* ( scale ) = ( 0 ) +* + U( 1 ) = -X( 1, 1 ) + U( 2 ) = -X( 2, 1 ) + U( 3 ) = SCALE + CALL SLARFG( 3, U( 1 ), U( 2 ), 1, TAU ) + U( 1 ) = ONE + T33 = T( J3, J3 ) +* +* Perform swap provisionally on diagonal block in D. +* + CALL SLARFX( 'L', 3, 3, U, TAU, D, LDD, WORK ) + CALL SLARFX( 'R', 3, 3, U, TAU, D, LDD, WORK ) +* +* Test whether to reject swap. +* + IF( MAX( ABS( D( 2, 1 ) ), ABS( D( 3, 1 ) ), ABS( D( 1, + $ 1 )-T33 ) ).GT.THRESH )GO TO 50 +* +* Accept swap: apply transformation to the entire matrix T. +* + CALL SLARFX( 'R', J3, 3, U, TAU, T( 1, J1 ), LDT, WORK ) + CALL SLARFX( 'L', 3, N-J1, U, TAU, T( J1, J2 ), LDT, WORK ) +* + T( J1, J1 ) = T33 + T( J2, J1 ) = ZERO + T( J3, J1 ) = ZERO +* + IF( WANTQ ) THEN +* +* Accumulate transformation in the matrix Q. +* + CALL SLARFX( 'R', N, 3, U, TAU, Q( 1, J1 ), LDQ, WORK ) + END IF + GO TO 40 +* + 30 CONTINUE +* +* N1 = 2, N2 = 2: generate elementary reflectors H(1) and H(2) so +* that: +* +* H(2) H(1) ( -X11 -X12 ) = ( * * ) +* ( -X21 -X22 ) ( 0 * ) +* ( scale 0 ) ( 0 0 ) +* ( 0 scale ) ( 0 0 ) +* + U1( 1 ) = -X( 1, 1 ) + U1( 2 ) = -X( 2, 1 ) + U1( 3 ) = SCALE + CALL SLARFG( 3, U1( 1 ), U1( 2 ), 1, TAU1 ) + U1( 1 ) = ONE +* + TEMP = -TAU1*( X( 1, 2 )+U1( 2 )*X( 2, 2 ) ) + U2( 1 ) = -TEMP*U1( 2 ) - X( 2, 2 ) + U2( 2 ) = -TEMP*U1( 3 ) + U2( 3 ) = SCALE + CALL SLARFG( 3, U2( 1 ), U2( 2 ), 1, TAU2 ) + U2( 1 ) = ONE +* +* Perform swap provisionally on diagonal block in D. +* + CALL SLARFX( 'L', 3, 4, U1, TAU1, D, LDD, WORK ) + CALL SLARFX( 'R', 4, 3, U1, TAU1, D, LDD, WORK ) + CALL SLARFX( 'L', 3, 4, U2, TAU2, D( 2, 1 ), LDD, WORK ) + CALL SLARFX( 'R', 4, 3, U2, TAU2, D( 1, 2 ), LDD, WORK ) +* +* Test whether to reject swap. +* + IF( MAX( ABS( D( 3, 1 ) ), ABS( D( 3, 2 ) ), ABS( D( 4, 1 ) ), + $ ABS( D( 4, 2 ) ) ).GT.THRESH )GO TO 50 +* +* Accept swap: apply transformation to the entire matrix T. +* + CALL SLARFX( 'L', 3, N-J1+1, U1, TAU1, T( J1, J1 ), LDT, WORK ) + CALL SLARFX( 'R', J4, 3, U1, TAU1, T( 1, J1 ), LDT, WORK ) + CALL SLARFX( 'L', 3, N-J1+1, U2, TAU2, T( J2, J1 ), LDT, WORK ) + CALL SLARFX( 'R', J4, 3, U2, TAU2, T( 1, J2 ), LDT, WORK ) +* + T( J3, J1 ) = ZERO + T( J3, J2 ) = ZERO + T( J4, J1 ) = ZERO + T( J4, J2 ) = ZERO +* + IF( WANTQ ) THEN +* +* Accumulate transformation in the matrix Q. +* + CALL SLARFX( 'R', N, 3, U1, TAU1, Q( 1, J1 ), LDQ, WORK ) + CALL SLARFX( 'R', N, 3, U2, TAU2, Q( 1, J2 ), LDQ, WORK ) + END IF +* + 40 CONTINUE +* + IF( N2.EQ.2 ) THEN +* +* Standardize new 2-by-2 block T11 +* + CALL SLANV2( T( J1, J1 ), T( J1, J2 ), T( J2, J1 ), + $ T( J2, J2 ), WR1, WI1, WR2, WI2, CS, SN ) + CALL SROT( N-J1-1, T( J1, J1+2 ), LDT, T( J2, J1+2 ), LDT, + $ CS, SN ) + CALL SROT( J1-1, T( 1, J1 ), 1, T( 1, J2 ), 1, CS, SN ) + IF( WANTQ ) + $ CALL SROT( N, Q( 1, J1 ), 1, Q( 1, J2 ), 1, CS, SN ) + END IF +* + IF( N1.EQ.2 ) THEN +* +* Standardize new 2-by-2 block T22 +* + J3 = J1 + N2 + J4 = J3 + 1 + CALL SLANV2( T( J3, J3 ), T( J3, J4 ), T( J4, J3 ), + $ T( J4, J4 ), WR1, WI1, WR2, WI2, CS, SN ) + IF( J3+2.LE.N ) + $ CALL SROT( N-J3-1, T( J3, J3+2 ), LDT, T( J4, J3+2 ), + $ LDT, CS, SN ) + CALL SROT( J3-1, T( 1, J3 ), 1, T( 1, J4 ), 1, CS, SN ) + IF( WANTQ ) + $ CALL SROT( N, Q( 1, J3 ), 1, Q( 1, J4 ), 1, CS, SN ) + END IF +* + END IF + RETURN +* +* Exit with INFO = 1 if swap was rejected. +* + 50 INFO = 1 + RETURN +* +* End of SLAEXC +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slag2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slag2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,300 @@ + SUBROUTINE SLAG2( A, LDA, B, LDB, SAFMIN, SCALE1, SCALE2, WR1, + $ WR2, WI ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER LDA, LDB + REAL SAFMIN, SCALE1, SCALE2, WI, WR1, WR2 +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* SLAG2 computes the eigenvalues of a 2 x 2 generalized eigenvalue +* problem A - w B, with scaling as necessary to avoid over-/underflow. +* +* The scaling factor "s" results in a modified eigenvalue equation +* +* s A - w B +* +* where s is a non-negative scaling factor chosen so that w, w B, +* and s A do not overflow and, if possible, do not underflow, either. +* +* Arguments +* ========= +* +* A (input) REAL array, dimension (LDA, 2) +* On entry, the 2 x 2 matrix A. It is assumed that its 1-norm +* is less than 1/SAFMIN. Entries less than +* sqrt(SAFMIN)*norm(A) are subject to being treated as zero. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= 2. +* +* B (input) REAL array, dimension (LDB, 2) +* On entry, the 2 x 2 upper triangular matrix B. It is +* assumed that the one-norm of B is less than 1/SAFMIN. The +* diagonals should be at least sqrt(SAFMIN) times the largest +* element of B (in absolute value); if a diagonal is smaller +* than that, then +/- sqrt(SAFMIN) will be used instead of +* that diagonal. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= 2. +* +* SAFMIN (input) REAL +* The smallest positive number s.t. 1/SAFMIN does not +* overflow. (This should always be SLAMCH('S') -- it is an +* argument in order to avoid having to call SLAMCH frequently.) +* +* SCALE1 (output) REAL +* A scaling factor used to avoid over-/underflow in the +* eigenvalue equation which defines the first eigenvalue. If +* the eigenvalues are complex, then the eigenvalues are +* ( WR1 +/- WI i ) / SCALE1 (which may lie outside the +* exponent range of the machine), SCALE1=SCALE2, and SCALE1 +* will always be positive. If the eigenvalues are real, then +* the first (real) eigenvalue is WR1 / SCALE1 , but this may +* overflow or underflow, and in fact, SCALE1 may be zero or +* less than the underflow threshhold if the exact eigenvalue +* is sufficiently large. +* +* SCALE2 (output) REAL +* A scaling factor used to avoid over-/underflow in the +* eigenvalue equation which defines the second eigenvalue. If +* the eigenvalues are complex, then SCALE2=SCALE1. If the +* eigenvalues are real, then the second (real) eigenvalue is +* WR2 / SCALE2 , but this may overflow or underflow, and in +* fact, SCALE2 may be zero or less than the underflow +* threshhold if the exact eigenvalue is sufficiently large. +* +* WR1 (output) REAL +* If the eigenvalue is real, then WR1 is SCALE1 times the +* eigenvalue closest to the (2,2) element of A B**(-1). If the +* eigenvalue is complex, then WR1=WR2 is SCALE1 times the real +* part of the eigenvalues. +* +* WR2 (output) REAL +* If the eigenvalue is real, then WR2 is SCALE2 times the +* other eigenvalue. If the eigenvalue is complex, then +* WR1=WR2 is SCALE1 times the real part of the eigenvalues. +* +* WI (output) REAL +* If the eigenvalue is real, then WI is zero. If the +* eigenvalue is complex, then WI is SCALE1 times the imaginary +* part of the eigenvalues. WI will always be non-negative. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0 ) + REAL HALF + PARAMETER ( HALF = ONE / TWO ) + REAL FUZZY1 + PARAMETER ( FUZZY1 = ONE+1.0E-5 ) +* .. +* .. Local Scalars .. + REAL A11, A12, A21, A22, ABI22, ANORM, AS11, AS12, + $ AS22, ASCALE, B11, B12, B22, BINV11, BINV22, + $ BMIN, BNORM, BSCALE, BSIZE, C1, C2, C3, C4, C5, + $ DIFF, DISCR, PP, QQ, R, RTMAX, RTMIN, S1, S2, + $ SAFMAX, SHIFT, SS, SUM, WABS, WBIG, WDET, + $ WSCALE, WSIZE, WSMALL +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SIGN, SQRT +* .. +* .. Executable Statements .. +* + RTMIN = SQRT( SAFMIN ) + RTMAX = ONE / RTMIN + SAFMAX = ONE / SAFMIN +* +* Scale A +* + ANORM = MAX( ABS( A( 1, 1 ) )+ABS( A( 2, 1 ) ), + $ ABS( A( 1, 2 ) )+ABS( A( 2, 2 ) ), SAFMIN ) + ASCALE = ONE / ANORM + A11 = ASCALE*A( 1, 1 ) + A21 = ASCALE*A( 2, 1 ) + A12 = ASCALE*A( 1, 2 ) + A22 = ASCALE*A( 2, 2 ) +* +* Perturb B if necessary to insure non-singularity +* + B11 = B( 1, 1 ) + B12 = B( 1, 2 ) + B22 = B( 2, 2 ) + BMIN = RTMIN*MAX( ABS( B11 ), ABS( B12 ), ABS( B22 ), RTMIN ) + IF( ABS( B11 ).LT.BMIN ) + $ B11 = SIGN( BMIN, B11 ) + IF( ABS( B22 ).LT.BMIN ) + $ B22 = SIGN( BMIN, B22 ) +* +* Scale B +* + BNORM = MAX( ABS( B11 ), ABS( B12 )+ABS( B22 ), SAFMIN ) + BSIZE = MAX( ABS( B11 ), ABS( B22 ) ) + BSCALE = ONE / BSIZE + B11 = B11*BSCALE + B12 = B12*BSCALE + B22 = B22*BSCALE +* +* Compute larger eigenvalue by method described by C. van Loan +* +* ( AS is A shifted by -SHIFT*B ) +* + BINV11 = ONE / B11 + BINV22 = ONE / B22 + S1 = A11*BINV11 + S2 = A22*BINV22 + IF( ABS( S1 ).LE.ABS( S2 ) ) THEN + AS12 = A12 - S1*B12 + AS22 = A22 - S1*B22 + SS = A21*( BINV11*BINV22 ) + ABI22 = AS22*BINV22 - SS*B12 + PP = HALF*ABI22 + SHIFT = S1 + ELSE + AS12 = A12 - S2*B12 + AS11 = A11 - S2*B11 + SS = A21*( BINV11*BINV22 ) + ABI22 = -SS*B12 + PP = HALF*( AS11*BINV11+ABI22 ) + SHIFT = S2 + END IF + QQ = SS*AS12 + IF( ABS( PP*RTMIN ).GE.ONE ) THEN + DISCR = ( RTMIN*PP )**2 + QQ*SAFMIN + R = SQRT( ABS( DISCR ) )*RTMAX + ELSE + IF( PP**2+ABS( QQ ).LE.SAFMIN ) THEN + DISCR = ( RTMAX*PP )**2 + QQ*SAFMAX + R = SQRT( ABS( DISCR ) )*RTMIN + ELSE + DISCR = PP**2 + QQ + R = SQRT( ABS( DISCR ) ) + END IF + END IF +* +* Note: the test of R in the following IF is to cover the case when +* DISCR is small and negative and is flushed to zero during +* the calculation of R. On machines which have a consistent +* flush-to-zero threshhold and handle numbers above that +* threshhold correctly, it would not be necessary. +* + IF( DISCR.GE.ZERO .OR. R.EQ.ZERO ) THEN + SUM = PP + SIGN( R, PP ) + DIFF = PP - SIGN( R, PP ) + WBIG = SHIFT + SUM +* +* Compute smaller eigenvalue +* + WSMALL = SHIFT + DIFF + IF( HALF*ABS( WBIG ).GT.MAX( ABS( WSMALL ), SAFMIN ) ) THEN + WDET = ( A11*A22-A12*A21 )*( BINV11*BINV22 ) + WSMALL = WDET / WBIG + END IF +* +* Choose (real) eigenvalue closest to 2,2 element of A*B**(-1) +* for WR1. +* + IF( PP.GT.ABI22 ) THEN + WR1 = MIN( WBIG, WSMALL ) + WR2 = MAX( WBIG, WSMALL ) + ELSE + WR1 = MAX( WBIG, WSMALL ) + WR2 = MIN( WBIG, WSMALL ) + END IF + WI = ZERO + ELSE +* +* Complex eigenvalues +* + WR1 = SHIFT + PP + WR2 = WR1 + WI = R + END IF +* +* Further scaling to avoid underflow and overflow in computing +* SCALE1 and overflow in computing w*B. +* +* This scale factor (WSCALE) is bounded from above using C1 and C2, +* and from below using C3 and C4. +* C1 implements the condition s A must never overflow. +* C2 implements the condition w B must never overflow. +* C3, with C2, +* implement the condition that s A - w B must never overflow. +* C4 implements the condition s should not underflow. +* C5 implements the condition max(s,|w|) should be at least 2. +* + C1 = BSIZE*( SAFMIN*MAX( ONE, ASCALE ) ) + C2 = SAFMIN*MAX( ONE, BNORM ) + C3 = BSIZE*SAFMIN + IF( ASCALE.LE.ONE .AND. BSIZE.LE.ONE ) THEN + C4 = MIN( ONE, ( ASCALE / SAFMIN )*BSIZE ) + ELSE + C4 = ONE + END IF + IF( ASCALE.LE.ONE .OR. BSIZE.LE.ONE ) THEN + C5 = MIN( ONE, ASCALE*BSIZE ) + ELSE + C5 = ONE + END IF +* +* Scale first eigenvalue +* + WABS = ABS( WR1 ) + ABS( WI ) + WSIZE = MAX( SAFMIN, C1, FUZZY1*( WABS*C2+C3 ), + $ MIN( C4, HALF*MAX( WABS, C5 ) ) ) + IF( WSIZE.NE.ONE ) THEN + WSCALE = ONE / WSIZE + IF( WSIZE.GT.ONE ) THEN + SCALE1 = ( MAX( ASCALE, BSIZE )*WSCALE )* + $ MIN( ASCALE, BSIZE ) + ELSE + SCALE1 = ( MIN( ASCALE, BSIZE )*WSCALE )* + $ MAX( ASCALE, BSIZE ) + END IF + WR1 = WR1*WSCALE + IF( WI.NE.ZERO ) THEN + WI = WI*WSCALE + WR2 = WR1 + SCALE2 = SCALE1 + END IF + ELSE + SCALE1 = ASCALE*BSIZE + SCALE2 = SCALE1 + END IF +* +* Scale second eigenvalue (if real) +* + IF( WI.EQ.ZERO ) THEN + WSIZE = MAX( SAFMIN, C1, FUZZY1*( ABS( WR2 )*C2+C3 ), + $ MIN( C4, HALF*MAX( ABS( WR2 ), C5 ) ) ) + IF( WSIZE.NE.ONE ) THEN + WSCALE = ONE / WSIZE + IF( WSIZE.GT.ONE ) THEN + SCALE2 = ( MAX( ASCALE, BSIZE )*WSCALE )* + $ MIN( ASCALE, BSIZE ) + ELSE + SCALE2 = ( MIN( ASCALE, BSIZE )*WSCALE )* + $ MAX( ASCALE, BSIZE ) + END IF + WR2 = WR2*WSCALE + ELSE + SCALE2 = ASCALE*BSIZE + END IF + END IF +* +* End of SLAG2 +* + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slahqr.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slahqr.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,501 @@ + SUBROUTINE SLAHQR( WANTT, WANTZ, N, ILO, IHI, H, LDH, WR, WI, + $ ILOZ, IHIZ, Z, LDZ, INFO ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER IHI, IHIZ, ILO, ILOZ, INFO, LDH, LDZ, N + LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. + REAL H( LDH, * ), WI( * ), WR( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* SLAHQR is an auxiliary routine called by SHSEQR to update the +* eigenvalues and Schur decomposition already computed by SHSEQR, by +* dealing with the Hessenberg submatrix in rows and columns ILO to +* IHI. +* +* Arguments +* ========= +* +* WANTT (input) LOGICAL +* = .TRUE. : the full Schur form T is required; +* = .FALSE.: only eigenvalues are required. +* +* WANTZ (input) LOGICAL +* = .TRUE. : the matrix of Schur vectors Z is required; +* = .FALSE.: Schur vectors are not required. +* +* N (input) INTEGER +* The order of the matrix H. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* It is assumed that H is already upper quasi-triangular in +* rows and columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless +* ILO = 1). SLAHQR works primarily with the Hessenberg +* submatrix in rows and columns ILO to IHI, but applies +* transformations to all of H if WANTT is .TRUE.. +* 1 <= ILO <= max(1,IHI); IHI <= N. +* +* H (input/output) REAL array, dimension (LDH,N) +* On entry, the upper Hessenberg matrix H. +* On exit, if INFO is zero and if WANTT is .TRUE., H is upper +* quasi-triangular in rows and columns ILO:IHI, with any +* 2-by-2 diagonal blocks in standard form. If INFO is zero +* and WANTT is .FALSE., the contents of H are unspecified on +* exit. The output state of H if INFO is nonzero is given +* below under the description of INFO. +* +* LDH (input) INTEGER +* The leading dimension of the array H. LDH >= max(1,N). +* +* WR (output) REAL array, dimension (N) +* WI (output) REAL array, dimension (N) +* The real and imaginary parts, respectively, of the computed +* eigenvalues ILO to IHI are stored in the corresponding +* elements of WR and WI. If two eigenvalues are computed as a +* complex conjugate pair, they are stored in consecutive +* elements of WR and WI, say the i-th and (i+1)th, with +* WI(i) > 0 and WI(i+1) < 0. If WANTT is .TRUE., the +* eigenvalues are stored in the same order as on the diagonal +* of the Schur form returned in H, with WR(i) = H(i,i), and, if +* H(i:i+1,i:i+1) is a 2-by-2 diagonal block, +* WI(i) = sqrt(H(i+1,i)*H(i,i+1)) and WI(i+1) = -WI(i). +* +* ILOZ (input) INTEGER +* IHIZ (input) INTEGER +* Specify the rows of Z to which transformations must be +* applied if WANTZ is .TRUE.. +* 1 <= ILOZ <= ILO; IHI <= IHIZ <= N. +* +* Z (input/output) REAL array, dimension (LDZ,N) +* If WANTZ is .TRUE., on entry Z must contain the current +* matrix Z of transformations accumulated by SHSEQR, and on +* exit Z has been updated; transformations are applied only to +* the submatrix Z(ILOZ:IHIZ,ILO:IHI). +* If WANTZ is .FALSE., Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* .GT. 0: If INFO = i, SLAHQR failed to compute all the +* eigenvalues ILO to IHI in a total of 30 iterations +* per eigenvalue; elements i+1:ihi of WR and WI +* contain those eigenvalues which have been +* successfully computed. +* +* If INFO .GT. 0 and WANTT is .FALSE., then on exit, +* the remaining unconverged eigenvalues are the +* eigenvalues of the upper Hessenberg matrix rows +* and columns ILO thorugh INFO of the final, output +* value of H. +* +* If INFO .GT. 0 and WANTT is .TRUE., then on exit +* (*) (initial value of H)*U = U*(final value of H) +* where U is an orthognal matrix. The final +* value of H is upper Hessenberg and triangular in +* rows and columns INFO+1 through IHI. +* +* If INFO .GT. 0 and WANTZ is .TRUE., then on exit +* (final value of Z) = (initial value of Z)*U +* where U is the orthogonal matrix in (*) +* (regardless of the value of WANTT.) +* +* Further Details +* =============== +* +* 02-96 Based on modifications by +* David Day, Sandia National Laboratory, USA +* +* 12-04 Further modifications by +* Ralph Byers, University of Kansas, USA +* +* This is a modified version of SLAHQR from LAPACK version 3.0. +* It is (1) more robust against overflow and underflow and +* (2) adopts the more conservative Ahues & Tisseur stopping +* criterion (LAWN 122, 1997). +* +* ========================================================= +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 30 ) + REAL ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0e0, ONE = 1.0e0, TWO = 2.0e0 ) + REAL DAT1, DAT2 + PARAMETER ( DAT1 = 3.0e0 / 4.0e0, DAT2 = -0.4375e0 ) +* .. +* .. Local Scalars .. + REAL AA, AB, BA, BB, CS, DET, H11, H12, H21, H21S, + $ H22, RT1I, RT1R, RT2I, RT2R, RTDISC, S, SAFMAX, + $ SAFMIN, SMLNUM, SN, SUM, T1, T2, T3, TR, TST, + $ ULP, V2, V3 + INTEGER I, I1, I2, ITS, J, K, L, M, NH, NR, NZ +* .. +* .. Local Arrays .. + REAL V( 3 ) +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLABAD, SLANV2, SLARFG, SROT +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, REAL, SQRT +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN + IF( ILO.EQ.IHI ) THEN + WR( ILO ) = H( ILO, ILO ) + WI( ILO ) = ZERO + RETURN + END IF +* +* ==== clear out the trash ==== + DO 10 J = ILO, IHI - 3 + H( J+2, J ) = ZERO + H( J+3, J ) = ZERO + 10 CONTINUE + IF( ILO.LE.IHI-2 ) + $ H( IHI, IHI-2 ) = ZERO +* + NH = IHI - ILO + 1 + NZ = IHIZ - ILOZ + 1 +* +* Set machine-dependent constants for the stopping criterion. +* + SAFMIN = SLAMCH( 'SAFE MINIMUM' ) + SAFMAX = ONE / SAFMIN + CALL SLABAD( SAFMIN, SAFMAX ) + ULP = SLAMCH( 'PRECISION' ) + SMLNUM = SAFMIN*( REAL( NH ) / ULP ) +* +* I1 and I2 are the indices of the first row and last column of H +* to which transformations must be applied. If eigenvalues only are +* being computed, I1 and I2 are set inside the main loop. +* + IF( WANTT ) THEN + I1 = 1 + I2 = N + END IF +* +* The main loop begins here. I is the loop index and decreases from +* IHI to ILO in steps of 1 or 2. Each iteration of the loop works +* with the active submatrix in rows and columns L to I. +* Eigenvalues I+1 to IHI have already converged. Either L = ILO or +* H(L,L-1) is negligible so that the matrix splits. +* + I = IHI + 20 CONTINUE + L = ILO + IF( I.LT.ILO ) + $ GO TO 160 +* +* Perform QR iterations on rows and columns ILO to I until a +* submatrix of order 1 or 2 splits off at the bottom because a +* subdiagonal element has become negligible. +* + DO 140 ITS = 0, ITMAX +* +* Look for a single small subdiagonal element. +* + DO 30 K = I, L + 1, -1 + IF( ABS( H( K, K-1 ) ).LE.SMLNUM ) + $ GO TO 40 + TST = ABS( H( K-1, K-1 ) ) + ABS( H( K, K ) ) + IF( TST.EQ.ZERO ) THEN + IF( K-2.GE.ILO ) + $ TST = TST + ABS( H( K-1, K-2 ) ) + IF( K+1.LE.IHI ) + $ TST = TST + ABS( H( K+1, K ) ) + END IF +* ==== The following is a conservative small subdiagonal +* . deflation criterion due to Ahues & Tisseur (LAWN 122, +* . 1997). It has better mathematical foundation and +* . improves accuracy in some cases. ==== + IF( ABS( H( K, K-1 ) ).LE.ULP*TST ) THEN + AB = MAX( ABS( H( K, K-1 ) ), ABS( H( K-1, K ) ) ) + BA = MIN( ABS( H( K, K-1 ) ), ABS( H( K-1, K ) ) ) + AA = MAX( ABS( H( K, K ) ), + $ ABS( H( K-1, K-1 )-H( K, K ) ) ) + BB = MIN( ABS( H( K, K ) ), + $ ABS( H( K-1, K-1 )-H( K, K ) ) ) + S = AA + AB + IF( BA*( AB / S ).LE.MAX( SMLNUM, + $ ULP*( BB*( AA / S ) ) ) )GO TO 40 + END IF + 30 CONTINUE + 40 CONTINUE + L = K + IF( L.GT.ILO ) THEN +* +* H(L,L-1) is negligible +* + H( L, L-1 ) = ZERO + END IF +* +* Exit from loop if a submatrix of order 1 or 2 has split off. +* + IF( L.GE.I-1 ) + $ GO TO 150 +* +* Now the active submatrix is in rows and columns L to I. If +* eigenvalues only are being computed, only the active submatrix +* need be transformed. +* + IF( .NOT.WANTT ) THEN + I1 = L + I2 = I + END IF +* + IF( ITS.EQ.10 .OR. ITS.EQ.20 ) THEN +* +* Exceptional shift. +* + H11 = DAT1*S + H( I, I ) + H12 = DAT2*S + H21 = S + H22 = H11 + ELSE +* +* Prepare to use Francis' double shift +* (i.e. 2nd degree generalized Rayleigh quotient) +* + H11 = H( I-1, I-1 ) + H21 = H( I, I-1 ) + H12 = H( I-1, I ) + H22 = H( I, I ) + END IF + S = ABS( H11 ) + ABS( H12 ) + ABS( H21 ) + ABS( H22 ) + IF( S.EQ.ZERO ) THEN + RT1R = ZERO + RT1I = ZERO + RT2R = ZERO + RT2I = ZERO + ELSE + H11 = H11 / S + H21 = H21 / S + H12 = H12 / S + H22 = H22 / S + TR = ( H11+H22 ) / TWO + DET = ( H11-TR )*( H22-TR ) - H12*H21 + RTDISC = SQRT( ABS( DET ) ) + IF( DET.GE.ZERO ) THEN +* +* ==== complex conjugate shifts ==== +* + RT1R = TR*S + RT2R = RT1R + RT1I = RTDISC*S + RT2I = -RT1I + ELSE +* +* ==== real shifts (use only one of them) ==== +* + RT1R = TR + RTDISC + RT2R = TR - RTDISC + IF( ABS( RT1R-H22 ).LE.ABS( RT2R-H22 ) ) THEN + RT1R = RT1R*S + RT2R = RT1R + ELSE + RT2R = RT2R*S + RT1R = RT2R + END IF + RT1I = ZERO + RT2I = ZERO + END IF + END IF +* +* Look for two consecutive small subdiagonal elements. +* + DO 50 M = I - 2, L, -1 +* Determine the effect of starting the double-shift QR +* iteration at row M, and see if this would make H(M,M-1) +* negligible. (The following uses scaling to avoid +* overflows and most underflows.) +* + H21S = H( M+1, M ) + S = ABS( H( M, M )-RT2R ) + ABS( RT2I ) + ABS( H21S ) + H21S = H( M+1, M ) / S + V( 1 ) = H21S*H( M, M+1 ) + ( H( M, M )-RT1R )* + $ ( ( H( M, M )-RT2R ) / S ) - RT1I*( RT2I / S ) + V( 2 ) = H21S*( H( M, M )+H( M+1, M+1 )-RT1R-RT2R ) + V( 3 ) = H21S*H( M+2, M+1 ) + S = ABS( V( 1 ) ) + ABS( V( 2 ) ) + ABS( V( 3 ) ) + V( 1 ) = V( 1 ) / S + V( 2 ) = V( 2 ) / S + V( 3 ) = V( 3 ) / S + IF( M.EQ.L ) + $ GO TO 60 + IF( ABS( H( M, M-1 ) )*( ABS( V( 2 ) )+ABS( V( 3 ) ) ).LE. + $ ULP*ABS( V( 1 ) )*( ABS( H( M-1, M-1 ) )+ABS( H( M, + $ M ) )+ABS( H( M+1, M+1 ) ) ) )GO TO 60 + 50 CONTINUE + 60 CONTINUE +* +* Double-shift QR step +* + DO 130 K = M, I - 1 +* +* The first iteration of this loop determines a reflection G +* from the vector V and applies it from left and right to H, +* thus creating a nonzero bulge below the subdiagonal. +* +* Each subsequent iteration determines a reflection G to +* restore the Hessenberg form in the (K-1)th column, and thus +* chases the bulge one step toward the bottom of the active +* submatrix. NR is the order of G. +* + NR = MIN( 3, I-K+1 ) + IF( K.GT.M ) + $ CALL SCOPY( NR, H( K, K-1 ), 1, V, 1 ) + CALL SLARFG( NR, V( 1 ), V( 2 ), 1, T1 ) + IF( K.GT.M ) THEN + H( K, K-1 ) = V( 1 ) + H( K+1, K-1 ) = ZERO + IF( K.LT.I-1 ) + $ H( K+2, K-1 ) = ZERO + ELSE IF( M.GT.L ) THEN + H( K, K-1 ) = -H( K, K-1 ) + END IF + V2 = V( 2 ) + T2 = T1*V2 + IF( NR.EQ.3 ) THEN + V3 = V( 3 ) + T3 = T1*V3 +* +* Apply G from the left to transform the rows of the matrix +* in columns K to I2. +* + DO 70 J = K, I2 + SUM = H( K, J ) + V2*H( K+1, J ) + V3*H( K+2, J ) + H( K, J ) = H( K, J ) - SUM*T1 + H( K+1, J ) = H( K+1, J ) - SUM*T2 + H( K+2, J ) = H( K+2, J ) - SUM*T3 + 70 CONTINUE +* +* Apply G from the right to transform the columns of the +* matrix in rows I1 to min(K+3,I). +* + DO 80 J = I1, MIN( K+3, I ) + SUM = H( J, K ) + V2*H( J, K+1 ) + V3*H( J, K+2 ) + H( J, K ) = H( J, K ) - SUM*T1 + H( J, K+1 ) = H( J, K+1 ) - SUM*T2 + H( J, K+2 ) = H( J, K+2 ) - SUM*T3 + 80 CONTINUE +* + IF( WANTZ ) THEN +* +* Accumulate transformations in the matrix Z +* + DO 90 J = ILOZ, IHIZ + SUM = Z( J, K ) + V2*Z( J, K+1 ) + V3*Z( J, K+2 ) + Z( J, K ) = Z( J, K ) - SUM*T1 + Z( J, K+1 ) = Z( J, K+1 ) - SUM*T2 + Z( J, K+2 ) = Z( J, K+2 ) - SUM*T3 + 90 CONTINUE + END IF + ELSE IF( NR.EQ.2 ) THEN +* +* Apply G from the left to transform the rows of the matrix +* in columns K to I2. +* + DO 100 J = K, I2 + SUM = H( K, J ) + V2*H( K+1, J ) + H( K, J ) = H( K, J ) - SUM*T1 + H( K+1, J ) = H( K+1, J ) - SUM*T2 + 100 CONTINUE +* +* Apply G from the right to transform the columns of the +* matrix in rows I1 to min(K+3,I). +* + DO 110 J = I1, I + SUM = H( J, K ) + V2*H( J, K+1 ) + H( J, K ) = H( J, K ) - SUM*T1 + H( J, K+1 ) = H( J, K+1 ) - SUM*T2 + 110 CONTINUE +* + IF( WANTZ ) THEN +* +* Accumulate transformations in the matrix Z +* + DO 120 J = ILOZ, IHIZ + SUM = Z( J, K ) + V2*Z( J, K+1 ) + Z( J, K ) = Z( J, K ) - SUM*T1 + Z( J, K+1 ) = Z( J, K+1 ) - SUM*T2 + 120 CONTINUE + END IF + END IF + 130 CONTINUE +* + 140 CONTINUE +* +* Failure to converge in remaining number of iterations +* + INFO = I + RETURN +* + 150 CONTINUE +* + IF( L.EQ.I ) THEN +* +* H(I,I-1) is negligible: one eigenvalue has converged. +* + WR( I ) = H( I, I ) + WI( I ) = ZERO + ELSE IF( L.EQ.I-1 ) THEN +* +* H(I-1,I-2) is negligible: a pair of eigenvalues have converged. +* +* Transform the 2-by-2 submatrix to standard Schur form, +* and compute and store the eigenvalues. +* + CALL SLANV2( H( I-1, I-1 ), H( I-1, I ), H( I, I-1 ), + $ H( I, I ), WR( I-1 ), WI( I-1 ), WR( I ), WI( I ), + $ CS, SN ) +* + IF( WANTT ) THEN +* +* Apply the transformation to the rest of H. +* + IF( I2.GT.I ) + $ CALL SROT( I2-I, H( I-1, I+1 ), LDH, H( I, I+1 ), LDH, + $ CS, SN ) + CALL SROT( I-I1-1, H( I1, I-1 ), 1, H( I1, I ), 1, CS, SN ) + END IF + IF( WANTZ ) THEN +* +* Apply the transformation to Z. +* + CALL SROT( NZ, Z( ILOZ, I-1 ), 1, Z( ILOZ, I ), 1, CS, SN ) + END IF + END IF +* +* return to start of the main loop with new value of I. +* + I = L - 1 + GO TO 20 +* + 160 CONTINUE + RETURN +* +* End of SLAHQR +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slahr2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slahr2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,238 @@ + SUBROUTINE SLAHR2( N, K, NB, A, LDA, TAU, T, LDT, Y, LDY ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER K, LDA, LDT, LDY, N, NB +* .. +* .. Array Arguments .. + REAL A( LDA, * ), T( LDT, NB ), TAU( NB ), + $ Y( LDY, NB ) +* .. +* +* Purpose +* ======= +* +* SLAHR2 reduces the first NB columns of A real general n-BY-(n-k+1) +* matrix A so that elements below the k-th subdiagonal are zero. The +* reduction is performed by an orthogonal similarity transformation +* Q' * A * Q. The routine returns the matrices V and T which determine +* Q as a block reflector I - V*T*V', and also the matrix Y = A * V * T. +* +* This is an auxiliary routine called by SGEHRD. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. +* +* K (input) INTEGER +* The offset for the reduction. Elements below the k-th +* subdiagonal in the first NB columns are reduced to zero. +* K < N. +* +* NB (input) INTEGER +* The number of columns to be reduced. +* +* A (input/output) REAL array, dimension (LDA,N-K+1) +* On entry, the n-by-(n-k+1) general matrix A. +* On exit, the elements on and above the k-th subdiagonal in +* the first NB columns are overwritten with the corresponding +* elements of the reduced matrix; the elements below the k-th +* subdiagonal, with the array TAU, represent the matrix Q as a +* product of elementary reflectors. The other columns of A are +* unchanged. See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* TAU (output) REAL array, dimension (NB) +* The scalar factors of the elementary reflectors. See Further +* Details. +* +* T (output) REAL array, dimension (LDT,NB) +* The upper triangular matrix T. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= NB. +* +* Y (output) REAL array, dimension (LDY,NB) +* The n-by-nb matrix Y. +* +* LDY (input) INTEGER +* The leading dimension of the array Y. LDY >= N. +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of nb elementary reflectors +* +* Q = H(1) H(2) . . . H(nb). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in +* A(i+k+1:n,i), and tau in TAU(i). +* +* The elements of the vectors v together form the (n-k+1)-by-nb matrix +* V which is needed, with T and Y, to apply the transformation to the +* unreduced part of the matrix, using an update of the form: +* A := (I - V*T*V') * (A - Y*V'). +* +* The contents of A on exit are illustrated by the following example +* with n = 7, k = 3 and nb = 2: +* +* ( a a a a a ) +* ( a a a a a ) +* ( a a a a a ) +* ( h h a a a ) +* ( v1 h a a a ) +* ( v1 v2 a a a ) +* ( v1 v2 a a a ) +* +* where a denotes an element of the original matrix A, h denotes a +* modified element of the upper Hessenberg matrix H, and vi denotes an +* element of the vector defining H(i). +* +* This file is a slight modification of LAPACK-3.0's SLAHRD +* incorporating improvements proposed by Quintana-Orti and Van de +* Gejin. Note that the entries of A(1:K,2:NB) differ from those +* returned by the original LAPACK routine. This function is +* not backward compatible with LAPACK3.0. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, + $ ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I + REAL EI +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SGEMM, SGEMV, SLACPY, + $ SLARFG, SSCAL, STRMM, STRMV +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.1 ) + $ RETURN +* + DO 10 I = 1, NB + IF( I.GT.1 ) THEN +* +* Update A(K+1:N,I) +* +* Update I-th column of A - Y * V' +* + CALL SGEMV( 'NO TRANSPOSE', N-K, I-1, -ONE, Y(K+1,1), LDY, + $ A( K+I-1, 1 ), LDA, ONE, A( K+1, I ), 1 ) +* +* Apply I - V * T' * V' to this column (call it b) from the +* left, using the last column of T as workspace +* +* Let V = ( V1 ) and b = ( b1 ) (first I-1 rows) +* ( V2 ) ( b2 ) +* +* where V1 is unit lower triangular +* +* w := V1' * b1 +* + CALL SCOPY( I-1, A( K+1, I ), 1, T( 1, NB ), 1 ) + CALL STRMV( 'Lower', 'Transpose', 'UNIT', + $ I-1, A( K+1, 1 ), + $ LDA, T( 1, NB ), 1 ) +* +* w := w + V2'*b2 +* + CALL SGEMV( 'Transpose', N-K-I+1, I-1, + $ ONE, A( K+I, 1 ), + $ LDA, A( K+I, I ), 1, ONE, T( 1, NB ), 1 ) +* +* w := T'*w +* + CALL STRMV( 'Upper', 'Transpose', 'NON-UNIT', + $ I-1, T, LDT, + $ T( 1, NB ), 1 ) +* +* b2 := b2 - V2*w +* + CALL SGEMV( 'NO TRANSPOSE', N-K-I+1, I-1, -ONE, + $ A( K+I, 1 ), + $ LDA, T( 1, NB ), 1, ONE, A( K+I, I ), 1 ) +* +* b1 := b1 - V1*w +* + CALL STRMV( 'Lower', 'NO TRANSPOSE', + $ 'UNIT', I-1, + $ A( K+1, 1 ), LDA, T( 1, NB ), 1 ) + CALL SAXPY( I-1, -ONE, T( 1, NB ), 1, A( K+1, I ), 1 ) +* + A( K+I-1, I-1 ) = EI + END IF +* +* Generate the elementary reflector H(I) to annihilate +* A(K+I+1:N,I) +* + CALL SLARFG( N-K-I+1, A( K+I, I ), A( MIN( K+I+1, N ), I ), 1, + $ TAU( I ) ) + EI = A( K+I, I ) + A( K+I, I ) = ONE +* +* Compute Y(K+1:N,I) +* + CALL SGEMV( 'NO TRANSPOSE', N-K, N-K-I+1, + $ ONE, A( K+1, I+1 ), + $ LDA, A( K+I, I ), 1, ZERO, Y( K+1, I ), 1 ) + CALL SGEMV( 'Transpose', N-K-I+1, I-1, + $ ONE, A( K+I, 1 ), LDA, + $ A( K+I, I ), 1, ZERO, T( 1, I ), 1 ) + CALL SGEMV( 'NO TRANSPOSE', N-K, I-1, -ONE, + $ Y( K+1, 1 ), LDY, + $ T( 1, I ), 1, ONE, Y( K+1, I ), 1 ) + CALL SSCAL( N-K, TAU( I ), Y( K+1, I ), 1 ) +* +* Compute T(1:I,I) +* + CALL SSCAL( I-1, -TAU( I ), T( 1, I ), 1 ) + CALL STRMV( 'Upper', 'No Transpose', 'NON-UNIT', + $ I-1, T, LDT, + $ T( 1, I ), 1 ) + T( I, I ) = TAU( I ) +* + 10 CONTINUE + A( K+NB, NB ) = EI +* +* Compute Y(1:K,1:NB) +* + CALL SLACPY( 'ALL', K, NB, A( 1, 2 ), LDA, Y, LDY ) + CALL STRMM( 'RIGHT', 'Lower', 'NO TRANSPOSE', + $ 'UNIT', K, NB, + $ ONE, A( K+1, 1 ), LDA, Y, LDY ) + IF( N.GT.K+NB ) + $ CALL SGEMM( 'NO TRANSPOSE', 'NO TRANSPOSE', K, + $ NB, N-K-NB, ONE, + $ A( 1, 2+NB ), LDA, A( K+1+NB, 1 ), LDA, ONE, Y, + $ LDY ) + CALL STRMM( 'RIGHT', 'Upper', 'NO TRANSPOSE', + $ 'NON-UNIT', K, NB, + $ ONE, T, LDT, Y, LDY ) +* + RETURN +* +* End of SLAHR2 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slahrd.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slahrd.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,207 @@ + SUBROUTINE SLAHRD( N, K, NB, A, LDA, TAU, T, LDT, Y, LDY ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER K, LDA, LDT, LDY, N, NB +* .. +* .. Array Arguments .. + REAL A( LDA, * ), T( LDT, NB ), TAU( NB ), + $ Y( LDY, NB ) +* .. +* +* Purpose +* ======= +* +* SLAHRD reduces the first NB columns of a real general n-by-(n-k+1) +* matrix A so that elements below the k-th subdiagonal are zero. The +* reduction is performed by an orthogonal similarity transformation +* Q' * A * Q. The routine returns the matrices V and T which determine +* Q as a block reflector I - V*T*V', and also the matrix Y = A * V * T. +* +* This is an OBSOLETE auxiliary routine. +* This routine will be 'deprecated' in a future release. +* Please use the new routine SLAHR2 instead. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. +* +* K (input) INTEGER +* The offset for the reduction. Elements below the k-th +* subdiagonal in the first NB columns are reduced to zero. +* +* NB (input) INTEGER +* The number of columns to be reduced. +* +* A (input/output) REAL array, dimension (LDA,N-K+1) +* On entry, the n-by-(n-k+1) general matrix A. +* On exit, the elements on and above the k-th subdiagonal in +* the first NB columns are overwritten with the corresponding +* elements of the reduced matrix; the elements below the k-th +* subdiagonal, with the array TAU, represent the matrix Q as a +* product of elementary reflectors. The other columns of A are +* unchanged. See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* TAU (output) REAL array, dimension (NB) +* The scalar factors of the elementary reflectors. See Further +* Details. +* +* T (output) REAL array, dimension (LDT,NB) +* The upper triangular matrix T. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= NB. +* +* Y (output) REAL array, dimension (LDY,NB) +* The n-by-nb matrix Y. +* +* LDY (input) INTEGER +* The leading dimension of the array Y. LDY >= N. +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of nb elementary reflectors +* +* Q = H(1) H(2) . . . H(nb). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in +* A(i+k+1:n,i), and tau in TAU(i). +* +* The elements of the vectors v together form the (n-k+1)-by-nb matrix +* V which is needed, with T and Y, to apply the transformation to the +* unreduced part of the matrix, using an update of the form: +* A := (I - V*T*V') * (A - Y*V'). +* +* The contents of A on exit are illustrated by the following example +* with n = 7, k = 3 and nb = 2: +* +* ( a h a a a ) +* ( a h a a a ) +* ( a h a a a ) +* ( h h a a a ) +* ( v1 h a a a ) +* ( v1 v2 a a a ) +* ( v1 v2 a a a ) +* +* where a denotes an element of the original matrix A, h denotes a +* modified element of the upper Hessenberg matrix H, and vi denotes an +* element of the vector defining H(i). +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I + REAL EI +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SGEMV, SLARFG, SSCAL, STRMV +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.1 ) + $ RETURN +* + DO 10 I = 1, NB + IF( I.GT.1 ) THEN +* +* Update A(1:n,i) +* +* Compute i-th column of A - Y * V' +* + CALL SGEMV( 'No transpose', N, I-1, -ONE, Y, LDY, + $ A( K+I-1, 1 ), LDA, ONE, A( 1, I ), 1 ) +* +* Apply I - V * T' * V' to this column (call it b) from the +* left, using the last column of T as workspace +* +* Let V = ( V1 ) and b = ( b1 ) (first I-1 rows) +* ( V2 ) ( b2 ) +* +* where V1 is unit lower triangular +* +* w := V1' * b1 +* + CALL SCOPY( I-1, A( K+1, I ), 1, T( 1, NB ), 1 ) + CALL STRMV( 'Lower', 'Transpose', 'Unit', I-1, A( K+1, 1 ), + $ LDA, T( 1, NB ), 1 ) +* +* w := w + V2'*b2 +* + CALL SGEMV( 'Transpose', N-K-I+1, I-1, ONE, A( K+I, 1 ), + $ LDA, A( K+I, I ), 1, ONE, T( 1, NB ), 1 ) +* +* w := T'*w +* + CALL STRMV( 'Upper', 'Transpose', 'Non-unit', I-1, T, LDT, + $ T( 1, NB ), 1 ) +* +* b2 := b2 - V2*w +* + CALL SGEMV( 'No transpose', N-K-I+1, I-1, -ONE, A( K+I, 1 ), + $ LDA, T( 1, NB ), 1, ONE, A( K+I, I ), 1 ) +* +* b1 := b1 - V1*w +* + CALL STRMV( 'Lower', 'No transpose', 'Unit', I-1, + $ A( K+1, 1 ), LDA, T( 1, NB ), 1 ) + CALL SAXPY( I-1, -ONE, T( 1, NB ), 1, A( K+1, I ), 1 ) +* + A( K+I-1, I-1 ) = EI + END IF +* +* Generate the elementary reflector H(i) to annihilate +* A(k+i+1:n,i) +* + CALL SLARFG( N-K-I+1, A( K+I, I ), A( MIN( K+I+1, N ), I ), 1, + $ TAU( I ) ) + EI = A( K+I, I ) + A( K+I, I ) = ONE +* +* Compute Y(1:n,i) +* + CALL SGEMV( 'No transpose', N, N-K-I+1, ONE, A( 1, I+1 ), LDA, + $ A( K+I, I ), 1, ZERO, Y( 1, I ), 1 ) + CALL SGEMV( 'Transpose', N-K-I+1, I-1, ONE, A( K+I, 1 ), LDA, + $ A( K+I, I ), 1, ZERO, T( 1, I ), 1 ) + CALL SGEMV( 'No transpose', N, I-1, -ONE, Y, LDY, T( 1, I ), 1, + $ ONE, Y( 1, I ), 1 ) + CALL SSCAL( N, TAU( I ), Y( 1, I ), 1 ) +* +* Compute T(1:i,i) +* + CALL SSCAL( I-1, -TAU( I ), T( 1, I ), 1 ) + CALL STRMV( 'Upper', 'No transpose', 'Non-unit', I-1, T, LDT, + $ T( 1, I ), 1 ) + T( I, I ) = TAU( I ) +* + 10 CONTINUE + A( K+NB, NB ) = EI +* + RETURN +* +* End of SLAHRD +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slaic1.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slaic1.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,292 @@ + SUBROUTINE SLAIC1( JOB, J, X, SEST, W, GAMMA, SESTPR, S, C ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER J, JOB + REAL C, GAMMA, S, SEST, SESTPR +* .. +* .. Array Arguments .. + REAL W( J ), X( J ) +* .. +* +* Purpose +* ======= +* +* SLAIC1 applies one step of incremental condition estimation in +* its simplest version: +* +* Let x, twonorm(x) = 1, be an approximate singular vector of an j-by-j +* lower triangular matrix L, such that +* twonorm(L*x) = sest +* Then SLAIC1 computes sestpr, s, c such that +* the vector +* [ s*x ] +* xhat = [ c ] +* is an approximate singular vector of +* [ L 0 ] +* Lhat = [ w' gamma ] +* in the sense that +* twonorm(Lhat*xhat) = sestpr. +* +* Depending on JOB, an estimate for the largest or smallest singular +* value is computed. +* +* Note that [s c]' and sestpr**2 is an eigenpair of the system +* +* diag(sest*sest, 0) + [alpha gamma] * [ alpha ] +* [ gamma ] +* +* where alpha = x'*w. +* +* Arguments +* ========= +* +* JOB (input) INTEGER +* = 1: an estimate for the largest singular value is computed. +* = 2: an estimate for the smallest singular value is computed. +* +* J (input) INTEGER +* Length of X and W +* +* X (input) REAL array, dimension (J) +* The j-vector x. +* +* SEST (input) REAL +* Estimated singular value of j by j matrix L +* +* W (input) REAL array, dimension (J) +* The j-vector w. +* +* GAMMA (input) REAL +* The diagonal element gamma. +* +* SESTPR (output) REAL +* Estimated singular value of (j+1) by (j+1) matrix Lhat. +* +* S (output) REAL +* Sine needed in forming xhat. +* +* C (output) REAL +* Cosine needed in forming xhat. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0 ) + REAL HALF, FOUR + PARAMETER ( HALF = 0.5E0, FOUR = 4.0E0 ) +* .. +* .. Local Scalars .. + REAL ABSALP, ABSEST, ABSGAM, ALPHA, B, COSINE, EPS, + $ NORMA, S1, S2, SINE, T, TEST, TMP, ZETA1, ZETA2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SIGN, SQRT +* .. +* .. External Functions .. + REAL SDOT, SLAMCH + EXTERNAL SDOT, SLAMCH +* .. +* .. Executable Statements .. +* + EPS = SLAMCH( 'Epsilon' ) + ALPHA = SDOT( J, X, 1, W, 1 ) +* + ABSALP = ABS( ALPHA ) + ABSGAM = ABS( GAMMA ) + ABSEST = ABS( SEST ) +* + IF( JOB.EQ.1 ) THEN +* +* Estimating largest singular value +* +* special cases +* + IF( SEST.EQ.ZERO ) THEN + S1 = MAX( ABSGAM, ABSALP ) + IF( S1.EQ.ZERO ) THEN + S = ZERO + C = ONE + SESTPR = ZERO + ELSE + S = ALPHA / S1 + C = GAMMA / S1 + TMP = SQRT( S*S+C*C ) + S = S / TMP + C = C / TMP + SESTPR = S1*TMP + END IF + RETURN + ELSE IF( ABSGAM.LE.EPS*ABSEST ) THEN + S = ONE + C = ZERO + TMP = MAX( ABSEST, ABSALP ) + S1 = ABSEST / TMP + S2 = ABSALP / TMP + SESTPR = TMP*SQRT( S1*S1+S2*S2 ) + RETURN + ELSE IF( ABSALP.LE.EPS*ABSEST ) THEN + S1 = ABSGAM + S2 = ABSEST + IF( S1.LE.S2 ) THEN + S = ONE + C = ZERO + SESTPR = S2 + ELSE + S = ZERO + C = ONE + SESTPR = S1 + END IF + RETURN + ELSE IF( ABSEST.LE.EPS*ABSALP .OR. ABSEST.LE.EPS*ABSGAM ) THEN + S1 = ABSGAM + S2 = ABSALP + IF( S1.LE.S2 ) THEN + TMP = S1 / S2 + S = SQRT( ONE+TMP*TMP ) + SESTPR = S2*S + C = ( GAMMA / S2 ) / S + S = SIGN( ONE, ALPHA ) / S + ELSE + TMP = S2 / S1 + C = SQRT( ONE+TMP*TMP ) + SESTPR = S1*C + S = ( ALPHA / S1 ) / C + C = SIGN( ONE, GAMMA ) / C + END IF + RETURN + ELSE +* +* normal case +* + ZETA1 = ALPHA / ABSEST + ZETA2 = GAMMA / ABSEST +* + B = ( ONE-ZETA1*ZETA1-ZETA2*ZETA2 )*HALF + C = ZETA1*ZETA1 + IF( B.GT.ZERO ) THEN + T = C / ( B+SQRT( B*B+C ) ) + ELSE + T = SQRT( B*B+C ) - B + END IF +* + SINE = -ZETA1 / T + COSINE = -ZETA2 / ( ONE+T ) + TMP = SQRT( SINE*SINE+COSINE*COSINE ) + S = SINE / TMP + C = COSINE / TMP + SESTPR = SQRT( T+ONE )*ABSEST + RETURN + END IF +* + ELSE IF( JOB.EQ.2 ) THEN +* +* Estimating smallest singular value +* +* special cases +* + IF( SEST.EQ.ZERO ) THEN + SESTPR = ZERO + IF( MAX( ABSGAM, ABSALP ).EQ.ZERO ) THEN + SINE = ONE + COSINE = ZERO + ELSE + SINE = -GAMMA + COSINE = ALPHA + END IF + S1 = MAX( ABS( SINE ), ABS( COSINE ) ) + S = SINE / S1 + C = COSINE / S1 + TMP = SQRT( S*S+C*C ) + S = S / TMP + C = C / TMP + RETURN + ELSE IF( ABSGAM.LE.EPS*ABSEST ) THEN + S = ZERO + C = ONE + SESTPR = ABSGAM + RETURN + ELSE IF( ABSALP.LE.EPS*ABSEST ) THEN + S1 = ABSGAM + S2 = ABSEST + IF( S1.LE.S2 ) THEN + S = ZERO + C = ONE + SESTPR = S1 + ELSE + S = ONE + C = ZERO + SESTPR = S2 + END IF + RETURN + ELSE IF( ABSEST.LE.EPS*ABSALP .OR. ABSEST.LE.EPS*ABSGAM ) THEN + S1 = ABSGAM + S2 = ABSALP + IF( S1.LE.S2 ) THEN + TMP = S1 / S2 + C = SQRT( ONE+TMP*TMP ) + SESTPR = ABSEST*( TMP / C ) + S = -( GAMMA / S2 ) / C + C = SIGN( ONE, ALPHA ) / C + ELSE + TMP = S2 / S1 + S = SQRT( ONE+TMP*TMP ) + SESTPR = ABSEST / S + C = ( ALPHA / S1 ) / S + S = -SIGN( ONE, GAMMA ) / S + END IF + RETURN + ELSE +* +* normal case +* + ZETA1 = ALPHA / ABSEST + ZETA2 = GAMMA / ABSEST +* + NORMA = MAX( ONE+ZETA1*ZETA1+ABS( ZETA1*ZETA2 ), + $ ABS( ZETA1*ZETA2 )+ZETA2*ZETA2 ) +* +* See if root is closer to zero or to ONE +* + TEST = ONE + TWO*( ZETA1-ZETA2 )*( ZETA1+ZETA2 ) + IF( TEST.GE.ZERO ) THEN +* +* root is close to zero, compute directly +* + B = ( ZETA1*ZETA1+ZETA2*ZETA2+ONE )*HALF + C = ZETA2*ZETA2 + T = C / ( B+SQRT( ABS( B*B-C ) ) ) + SINE = ZETA1 / ( ONE-T ) + COSINE = -ZETA2 / T + SESTPR = SQRT( T+FOUR*EPS*EPS*NORMA )*ABSEST + ELSE +* +* root is closer to ONE, shift by that amount +* + B = ( ZETA2*ZETA2+ZETA1*ZETA1-ONE )*HALF + C = ZETA1*ZETA1 + IF( B.GE.ZERO ) THEN + T = -C / ( B+SQRT( B*B+C ) ) + ELSE + T = B - SQRT( B*B+C ) + END IF + SINE = -ZETA1 / T + COSINE = -ZETA2 / ( ONE+T ) + SESTPR = SQRT( ONE+T+FOUR*EPS*EPS*NORMA )*ABSEST + END IF + TMP = SQRT( SINE*SINE+COSINE*COSINE ) + S = SINE / TMP + C = COSINE / TMP + RETURN +* + END IF + END IF + RETURN +* +* End of SLAIC1 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slaln2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slaln2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,507 @@ + SUBROUTINE SLALN2( LTRANS, NA, NW, SMIN, CA, A, LDA, D1, D2, B, + $ LDB, WR, WI, X, LDX, SCALE, XNORM, INFO ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + LOGICAL LTRANS + INTEGER INFO, LDA, LDB, LDX, NA, NW + REAL CA, D1, D2, SCALE, SMIN, WI, WR, XNORM +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* SLALN2 solves a system of the form (ca A - w D ) X = s B +* or (ca A' - w D) X = s B with possible scaling ("s") and +* perturbation of A. (A' means A-transpose.) +* +* A is an NA x NA real matrix, ca is a real scalar, D is an NA x NA +* real diagonal matrix, w is a real or complex value, and X and B are +* NA x 1 matrices -- real if w is real, complex if w is complex. NA +* may be 1 or 2. +* +* If w is complex, X and B are represented as NA x 2 matrices, +* the first column of each being the real part and the second +* being the imaginary part. +* +* "s" is a scaling factor (.LE. 1), computed by SLALN2, which is +* so chosen that X can be computed without overflow. X is further +* scaled if necessary to assure that norm(ca A - w D)*norm(X) is less +* than overflow. +* +* If both singular values of (ca A - w D) are less than SMIN, +* SMIN*identity will be used instead of (ca A - w D). If only one +* singular value is less than SMIN, one element of (ca A - w D) will be +* perturbed enough to make the smallest singular value roughly SMIN. +* If both singular values are at least SMIN, (ca A - w D) will not be +* perturbed. In any case, the perturbation will be at most some small +* multiple of max( SMIN, ulp*norm(ca A - w D) ). The singular values +* are computed by infinity-norm approximations, and thus will only be +* correct to a factor of 2 or so. +* +* Note: all input quantities are assumed to be smaller than overflow +* by a reasonable factor. (See BIGNUM.) +* +* Arguments +* ========== +* +* LTRANS (input) LOGICAL +* =.TRUE.: A-transpose will be used. +* =.FALSE.: A will be used (not transposed.) +* +* NA (input) INTEGER +* The size of the matrix A. It may (only) be 1 or 2. +* +* NW (input) INTEGER +* 1 if "w" is real, 2 if "w" is complex. It may only be 1 +* or 2. +* +* SMIN (input) REAL +* The desired lower bound on the singular values of A. This +* should be a safe distance away from underflow or overflow, +* say, between (underflow/machine precision) and (machine +* precision * overflow ). (See BIGNUM and ULP.) +* +* CA (input) REAL +* The coefficient c, which A is multiplied by. +* +* A (input) REAL array, dimension (LDA,NA) +* The NA x NA matrix A. +* +* LDA (input) INTEGER +* The leading dimension of A. It must be at least NA. +* +* D1 (input) REAL +* The 1,1 element in the diagonal matrix D. +* +* D2 (input) REAL +* The 2,2 element in the diagonal matrix D. Not used if NW=1. +* +* B (input) REAL array, dimension (LDB,NW) +* The NA x NW matrix B (right-hand side). If NW=2 ("w" is +* complex), column 1 contains the real part of B and column 2 +* contains the imaginary part. +* +* LDB (input) INTEGER +* The leading dimension of B. It must be at least NA. +* +* WR (input) REAL +* The real part of the scalar "w". +* +* WI (input) REAL +* The imaginary part of the scalar "w". Not used if NW=1. +* +* X (output) REAL array, dimension (LDX,NW) +* The NA x NW matrix X (unknowns), as computed by SLALN2. +* If NW=2 ("w" is complex), on exit, column 1 will contain +* the real part of X and column 2 will contain the imaginary +* part. +* +* LDX (input) INTEGER +* The leading dimension of X. It must be at least NA. +* +* SCALE (output) REAL +* The scale factor that B must be multiplied by to insure +* that overflow does not occur when computing X. Thus, +* (ca A - w D) X will be SCALE*B, not B (ignoring +* perturbations of A.) It will be at most 1. +* +* XNORM (output) REAL +* The infinity-norm of X, when X is regarded as an NA x NW +* real matrix. +* +* INFO (output) INTEGER +* An error flag. It will be set to zero if no error occurs, +* a negative number if an argument is in error, or a positive +* number if ca A - w D had to be perturbed. +* The possible values are: +* = 0: No error occurred, and (ca A - w D) did not have to be +* perturbed. +* = 1: (ca A - w D) had to be perturbed to make its smallest +* (or only) singular value greater than SMIN. +* NOTE: In the interests of speed, this routine does not +* check the inputs for errors. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) + REAL TWO + PARAMETER ( TWO = 2.0E0 ) +* .. +* .. Local Scalars .. + INTEGER ICMAX, J + REAL BBND, BI1, BI2, BIGNUM, BNORM, BR1, BR2, CI21, + $ CI22, CMAX, CNORM, CR21, CR22, CSI, CSR, LI21, + $ LR21, SMINI, SMLNUM, TEMP, U22ABS, UI11, UI11R, + $ UI12, UI12S, UI22, UR11, UR11R, UR12, UR12S, + $ UR22, XI1, XI2, XR1, XR2 +* .. +* .. Local Arrays .. + LOGICAL CSWAP( 4 ), RSWAP( 4 ) + INTEGER IPIVOT( 4, 4 ) + REAL CI( 2, 2 ), CIV( 4 ), CR( 2, 2 ), CRV( 4 ) +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SLADIV +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Equivalences .. + EQUIVALENCE ( CI( 1, 1 ), CIV( 1 ) ), + $ ( CR( 1, 1 ), CRV( 1 ) ) +* .. +* .. Data statements .. + DATA CSWAP / .FALSE., .FALSE., .TRUE., .TRUE. / + DATA RSWAP / .FALSE., .TRUE., .FALSE., .TRUE. / + DATA IPIVOT / 1, 2, 3, 4, 2, 1, 4, 3, 3, 4, 1, 2, 4, + $ 3, 2, 1 / +* .. +* .. Executable Statements .. +* +* Compute BIGNUM +* + SMLNUM = TWO*SLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + SMINI = MAX( SMIN, SMLNUM ) +* +* Don't check for input errors +* + INFO = 0 +* +* Standard Initializations +* + SCALE = ONE +* + IF( NA.EQ.1 ) THEN +* +* 1 x 1 (i.e., scalar) system C X = B +* + IF( NW.EQ.1 ) THEN +* +* Real 1x1 system. +* +* C = ca A - w D +* + CSR = CA*A( 1, 1 ) - WR*D1 + CNORM = ABS( CSR ) +* +* If | C | < SMINI, use C = SMINI +* + IF( CNORM.LT.SMINI ) THEN + CSR = SMINI + CNORM = SMINI + INFO = 1 + END IF +* +* Check scaling for X = B / C +* + BNORM = ABS( B( 1, 1 ) ) + IF( CNORM.LT.ONE .AND. BNORM.GT.ONE ) THEN + IF( BNORM.GT.BIGNUM*CNORM ) + $ SCALE = ONE / BNORM + END IF +* +* Compute X +* + X( 1, 1 ) = ( B( 1, 1 )*SCALE ) / CSR + XNORM = ABS( X( 1, 1 ) ) + ELSE +* +* Complex 1x1 system (w is complex) +* +* C = ca A - w D +* + CSR = CA*A( 1, 1 ) - WR*D1 + CSI = -WI*D1 + CNORM = ABS( CSR ) + ABS( CSI ) +* +* If | C | < SMINI, use C = SMINI +* + IF( CNORM.LT.SMINI ) THEN + CSR = SMINI + CSI = ZERO + CNORM = SMINI + INFO = 1 + END IF +* +* Check scaling for X = B / C +* + BNORM = ABS( B( 1, 1 ) ) + ABS( B( 1, 2 ) ) + IF( CNORM.LT.ONE .AND. BNORM.GT.ONE ) THEN + IF( BNORM.GT.BIGNUM*CNORM ) + $ SCALE = ONE / BNORM + END IF +* +* Compute X +* + CALL SLADIV( SCALE*B( 1, 1 ), SCALE*B( 1, 2 ), CSR, CSI, + $ X( 1, 1 ), X( 1, 2 ) ) + XNORM = ABS( X( 1, 1 ) ) + ABS( X( 1, 2 ) ) + END IF +* + ELSE +* +* 2x2 System +* +* Compute the real part of C = ca A - w D (or ca A' - w D ) +* + CR( 1, 1 ) = CA*A( 1, 1 ) - WR*D1 + CR( 2, 2 ) = CA*A( 2, 2 ) - WR*D2 + IF( LTRANS ) THEN + CR( 1, 2 ) = CA*A( 2, 1 ) + CR( 2, 1 ) = CA*A( 1, 2 ) + ELSE + CR( 2, 1 ) = CA*A( 2, 1 ) + CR( 1, 2 ) = CA*A( 1, 2 ) + END IF +* + IF( NW.EQ.1 ) THEN +* +* Real 2x2 system (w is real) +* +* Find the largest element in C +* + CMAX = ZERO + ICMAX = 0 +* + DO 10 J = 1, 4 + IF( ABS( CRV( J ) ).GT.CMAX ) THEN + CMAX = ABS( CRV( J ) ) + ICMAX = J + END IF + 10 CONTINUE +* +* If norm(C) < SMINI, use SMINI*identity. +* + IF( CMAX.LT.SMINI ) THEN + BNORM = MAX( ABS( B( 1, 1 ) ), ABS( B( 2, 1 ) ) ) + IF( SMINI.LT.ONE .AND. BNORM.GT.ONE ) THEN + IF( BNORM.GT.BIGNUM*SMINI ) + $ SCALE = ONE / BNORM + END IF + TEMP = SCALE / SMINI + X( 1, 1 ) = TEMP*B( 1, 1 ) + X( 2, 1 ) = TEMP*B( 2, 1 ) + XNORM = TEMP*BNORM + INFO = 1 + RETURN + END IF +* +* Gaussian elimination with complete pivoting. +* + UR11 = CRV( ICMAX ) + CR21 = CRV( IPIVOT( 2, ICMAX ) ) + UR12 = CRV( IPIVOT( 3, ICMAX ) ) + CR22 = CRV( IPIVOT( 4, ICMAX ) ) + UR11R = ONE / UR11 + LR21 = UR11R*CR21 + UR22 = CR22 - UR12*LR21 +* +* If smaller pivot < SMINI, use SMINI +* + IF( ABS( UR22 ).LT.SMINI ) THEN + UR22 = SMINI + INFO = 1 + END IF + IF( RSWAP( ICMAX ) ) THEN + BR1 = B( 2, 1 ) + BR2 = B( 1, 1 ) + ELSE + BR1 = B( 1, 1 ) + BR2 = B( 2, 1 ) + END IF + BR2 = BR2 - LR21*BR1 + BBND = MAX( ABS( BR1*( UR22*UR11R ) ), ABS( BR2 ) ) + IF( BBND.GT.ONE .AND. ABS( UR22 ).LT.ONE ) THEN + IF( BBND.GE.BIGNUM*ABS( UR22 ) ) + $ SCALE = ONE / BBND + END IF +* + XR2 = ( BR2*SCALE ) / UR22 + XR1 = ( SCALE*BR1 )*UR11R - XR2*( UR11R*UR12 ) + IF( CSWAP( ICMAX ) ) THEN + X( 1, 1 ) = XR2 + X( 2, 1 ) = XR1 + ELSE + X( 1, 1 ) = XR1 + X( 2, 1 ) = XR2 + END IF + XNORM = MAX( ABS( XR1 ), ABS( XR2 ) ) +* +* Further scaling if norm(A) norm(X) > overflow +* + IF( XNORM.GT.ONE .AND. CMAX.GT.ONE ) THEN + IF( XNORM.GT.BIGNUM / CMAX ) THEN + TEMP = CMAX / BIGNUM + X( 1, 1 ) = TEMP*X( 1, 1 ) + X( 2, 1 ) = TEMP*X( 2, 1 ) + XNORM = TEMP*XNORM + SCALE = TEMP*SCALE + END IF + END IF + ELSE +* +* Complex 2x2 system (w is complex) +* +* Find the largest element in C +* + CI( 1, 1 ) = -WI*D1 + CI( 2, 1 ) = ZERO + CI( 1, 2 ) = ZERO + CI( 2, 2 ) = -WI*D2 + CMAX = ZERO + ICMAX = 0 +* + DO 20 J = 1, 4 + IF( ABS( CRV( J ) )+ABS( CIV( J ) ).GT.CMAX ) THEN + CMAX = ABS( CRV( J ) ) + ABS( CIV( J ) ) + ICMAX = J + END IF + 20 CONTINUE +* +* If norm(C) < SMINI, use SMINI*identity. +* + IF( CMAX.LT.SMINI ) THEN + BNORM = MAX( ABS( B( 1, 1 ) )+ABS( B( 1, 2 ) ), + $ ABS( B( 2, 1 ) )+ABS( B( 2, 2 ) ) ) + IF( SMINI.LT.ONE .AND. BNORM.GT.ONE ) THEN + IF( BNORM.GT.BIGNUM*SMINI ) + $ SCALE = ONE / BNORM + END IF + TEMP = SCALE / SMINI + X( 1, 1 ) = TEMP*B( 1, 1 ) + X( 2, 1 ) = TEMP*B( 2, 1 ) + X( 1, 2 ) = TEMP*B( 1, 2 ) + X( 2, 2 ) = TEMP*B( 2, 2 ) + XNORM = TEMP*BNORM + INFO = 1 + RETURN + END IF +* +* Gaussian elimination with complete pivoting. +* + UR11 = CRV( ICMAX ) + UI11 = CIV( ICMAX ) + CR21 = CRV( IPIVOT( 2, ICMAX ) ) + CI21 = CIV( IPIVOT( 2, ICMAX ) ) + UR12 = CRV( IPIVOT( 3, ICMAX ) ) + UI12 = CIV( IPIVOT( 3, ICMAX ) ) + CR22 = CRV( IPIVOT( 4, ICMAX ) ) + CI22 = CIV( IPIVOT( 4, ICMAX ) ) + IF( ICMAX.EQ.1 .OR. ICMAX.EQ.4 ) THEN +* +* Code when off-diagonals of pivoted C are real +* + IF( ABS( UR11 ).GT.ABS( UI11 ) ) THEN + TEMP = UI11 / UR11 + UR11R = ONE / ( UR11*( ONE+TEMP**2 ) ) + UI11R = -TEMP*UR11R + ELSE + TEMP = UR11 / UI11 + UI11R = -ONE / ( UI11*( ONE+TEMP**2 ) ) + UR11R = -TEMP*UI11R + END IF + LR21 = CR21*UR11R + LI21 = CR21*UI11R + UR12S = UR12*UR11R + UI12S = UR12*UI11R + UR22 = CR22 - UR12*LR21 + UI22 = CI22 - UR12*LI21 + ELSE +* +* Code when diagonals of pivoted C are real +* + UR11R = ONE / UR11 + UI11R = ZERO + LR21 = CR21*UR11R + LI21 = CI21*UR11R + UR12S = UR12*UR11R + UI12S = UI12*UR11R + UR22 = CR22 - UR12*LR21 + UI12*LI21 + UI22 = -UR12*LI21 - UI12*LR21 + END IF + U22ABS = ABS( UR22 ) + ABS( UI22 ) +* +* If smaller pivot < SMINI, use SMINI +* + IF( U22ABS.LT.SMINI ) THEN + UR22 = SMINI + UI22 = ZERO + INFO = 1 + END IF + IF( RSWAP( ICMAX ) ) THEN + BR2 = B( 1, 1 ) + BR1 = B( 2, 1 ) + BI2 = B( 1, 2 ) + BI1 = B( 2, 2 ) + ELSE + BR1 = B( 1, 1 ) + BR2 = B( 2, 1 ) + BI1 = B( 1, 2 ) + BI2 = B( 2, 2 ) + END IF + BR2 = BR2 - LR21*BR1 + LI21*BI1 + BI2 = BI2 - LI21*BR1 - LR21*BI1 + BBND = MAX( ( ABS( BR1 )+ABS( BI1 ) )* + $ ( U22ABS*( ABS( UR11R )+ABS( UI11R ) ) ), + $ ABS( BR2 )+ABS( BI2 ) ) + IF( BBND.GT.ONE .AND. U22ABS.LT.ONE ) THEN + IF( BBND.GE.BIGNUM*U22ABS ) THEN + SCALE = ONE / BBND + BR1 = SCALE*BR1 + BI1 = SCALE*BI1 + BR2 = SCALE*BR2 + BI2 = SCALE*BI2 + END IF + END IF +* + CALL SLADIV( BR2, BI2, UR22, UI22, XR2, XI2 ) + XR1 = UR11R*BR1 - UI11R*BI1 - UR12S*XR2 + UI12S*XI2 + XI1 = UI11R*BR1 + UR11R*BI1 - UI12S*XR2 - UR12S*XI2 + IF( CSWAP( ICMAX ) ) THEN + X( 1, 1 ) = XR2 + X( 2, 1 ) = XR1 + X( 1, 2 ) = XI2 + X( 2, 2 ) = XI1 + ELSE + X( 1, 1 ) = XR1 + X( 2, 1 ) = XR2 + X( 1, 2 ) = XI1 + X( 2, 2 ) = XI2 + END IF + XNORM = MAX( ABS( XR1 )+ABS( XI1 ), ABS( XR2 )+ABS( XI2 ) ) +* +* Further scaling if norm(A) norm(X) > overflow +* + IF( XNORM.GT.ONE .AND. CMAX.GT.ONE ) THEN + IF( XNORM.GT.BIGNUM / CMAX ) THEN + TEMP = CMAX / BIGNUM + X( 1, 1 ) = TEMP*X( 1, 1 ) + X( 2, 1 ) = TEMP*X( 2, 1 ) + X( 1, 2 ) = TEMP*X( 1, 2 ) + X( 2, 2 ) = TEMP*X( 2, 2 ) + XNORM = TEMP*XNORM + SCALE = TEMP*SCALE + END IF + END IF + END IF + END IF +* + RETURN +* +* End of SLALN2 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slals0.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slals0.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,377 @@ + SUBROUTINE SLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B, LDB, BX, LDBX, + $ PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, LDGNUM, + $ POLES, DIFL, DIFR, Z, K, C, S, WORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER GIVPTR, ICOMPQ, INFO, K, LDB, LDBX, LDGCOL, + $ LDGNUM, NL, NR, NRHS, SQRE + REAL C, S +* .. +* .. Array Arguments .. + INTEGER GIVCOL( LDGCOL, * ), PERM( * ) + REAL B( LDB, * ), BX( LDBX, * ), DIFL( * ), + $ DIFR( LDGNUM, * ), GIVNUM( LDGNUM, * ), + $ POLES( LDGNUM, * ), WORK( * ), Z( * ) +* .. +* +* Purpose +* ======= +* +* SLALS0 applies back the multiplying factors of either the left or the +* right singular vector matrix of a diagonal matrix appended by a row +* to the right hand side matrix B in solving the least squares problem +* using the divide-and-conquer SVD approach. +* +* For the left singular vector matrix, three types of orthogonal +* matrices are involved: +* +* (1L) Givens rotations: the number of such rotations is GIVPTR; the +* pairs of columns/rows they were applied to are stored in GIVCOL; +* and the C- and S-values of these rotations are stored in GIVNUM. +* +* (2L) Permutation. The (NL+1)-st row of B is to be moved to the first +* row, and for J=2:N, PERM(J)-th row of B is to be moved to the +* J-th row. +* +* (3L) The left singular vector matrix of the remaining matrix. +* +* For the right singular vector matrix, four types of orthogonal +* matrices are involved: +* +* (1R) The right singular vector matrix of the remaining matrix. +* +* (2R) If SQRE = 1, one extra Givens rotation to generate the right +* null space. +* +* (3R) The inverse transformation of (2L). +* +* (4R) The inverse transformation of (1L). +* +* Arguments +* ========= +* +* ICOMPQ (input) INTEGER +* Specifies whether singular vectors are to be computed in +* factored form: +* = 0: Left singular vector matrix. +* = 1: Right singular vector matrix. +* +* NL (input) INTEGER +* The row dimension of the upper block. NL >= 1. +* +* NR (input) INTEGER +* The row dimension of the lower block. NR >= 1. +* +* SQRE (input) INTEGER +* = 0: the lower block is an NR-by-NR square matrix. +* = 1: the lower block is an NR-by-(NR+1) rectangular matrix. +* +* The bidiagonal matrix has row dimension N = NL + NR + 1, +* and column dimension M = N + SQRE. +* +* NRHS (input) INTEGER +* The number of columns of B and BX. NRHS must be at least 1. +* +* B (input/output) REAL array, dimension ( LDB, NRHS ) +* On input, B contains the right hand sides of the least +* squares problem in rows 1 through M. On output, B contains +* the solution X in rows 1 through N. +* +* LDB (input) INTEGER +* The leading dimension of B. LDB must be at least +* max(1,MAX( M, N ) ). +* +* BX (workspace) REAL array, dimension ( LDBX, NRHS ) +* +* LDBX (input) INTEGER +* The leading dimension of BX. +* +* PERM (input) INTEGER array, dimension ( N ) +* The permutations (from deflation and sorting) applied +* to the two blocks. +* +* GIVPTR (input) INTEGER +* The number of Givens rotations which took place in this +* subproblem. +* +* GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 ) +* Each pair of numbers indicates a pair of rows/columns +* involved in a Givens rotation. +* +* LDGCOL (input) INTEGER +* The leading dimension of GIVCOL, must be at least N. +* +* GIVNUM (input) REAL array, dimension ( LDGNUM, 2 ) +* Each number indicates the C or S value used in the +* corresponding Givens rotation. +* +* LDGNUM (input) INTEGER +* The leading dimension of arrays DIFR, POLES and +* GIVNUM, must be at least K. +* +* POLES (input) REAL array, dimension ( LDGNUM, 2 ) +* On entry, POLES(1:K, 1) contains the new singular +* values obtained from solving the secular equation, and +* POLES(1:K, 2) is an array containing the poles in the secular +* equation. +* +* DIFL (input) REAL array, dimension ( K ). +* On entry, DIFL(I) is the distance between I-th updated +* (undeflated) singular value and the I-th (undeflated) old +* singular value. +* +* DIFR (input) REAL array, dimension ( LDGNUM, 2 ). +* On entry, DIFR(I, 1) contains the distances between I-th +* updated (undeflated) singular value and the I+1-th +* (undeflated) old singular value. And DIFR(I, 2) is the +* normalizing factor for the I-th right singular vector. +* +* Z (input) REAL array, dimension ( K ) +* Contain the components of the deflation-adjusted updating row +* vector. +* +* K (input) INTEGER +* Contains the dimension of the non-deflated matrix, +* This is the order of the related secular equation. 1 <= K <=N. +* +* C (input) REAL +* C contains garbage if SQRE =0 and the C-value of a Givens +* rotation related to the right null space if SQRE = 1. +* +* S (input) REAL +* S contains garbage if SQRE =0 and the S-value of a Givens +* rotation related to the right null space if SQRE = 1. +* +* WORK (workspace) REAL array, dimension ( K ) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Ren-Cang Li, Computer Science Division, University of +* California at Berkeley, USA +* Osni Marques, LBNL/NERSC, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO, NEGONE + PARAMETER ( ONE = 1.0E0, ZERO = 0.0E0, NEGONE = -1.0E0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, M, N, NLP1 + REAL DIFLJ, DIFRJ, DJ, DSIGJ, DSIGJP, TEMP +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEMV, SLACPY, SLASCL, SROT, SSCAL, + $ XERBLA +* .. +* .. External Functions .. + REAL SLAMC3, SNRM2 + EXTERNAL SLAMC3, SNRM2 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN + INFO = -1 + ELSE IF( NL.LT.1 ) THEN + INFO = -2 + ELSE IF( NR.LT.1 ) THEN + INFO = -3 + ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN + INFO = -4 + END IF +* + N = NL + NR + 1 +* + IF( NRHS.LT.1 ) THEN + INFO = -5 + ELSE IF( LDB.LT.N ) THEN + INFO = -7 + ELSE IF( LDBX.LT.N ) THEN + INFO = -9 + ELSE IF( GIVPTR.LT.0 ) THEN + INFO = -11 + ELSE IF( LDGCOL.LT.N ) THEN + INFO = -13 + ELSE IF( LDGNUM.LT.N ) THEN + INFO = -15 + ELSE IF( K.LT.1 ) THEN + INFO = -20 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLALS0', -INFO ) + RETURN + END IF +* + M = N + SQRE + NLP1 = NL + 1 +* + IF( ICOMPQ.EQ.0 ) THEN +* +* Apply back orthogonal transformations from the left. +* +* Step (1L): apply back the Givens rotations performed. +* + DO 10 I = 1, GIVPTR + CALL SROT( NRHS, B( GIVCOL( I, 2 ), 1 ), LDB, + $ B( GIVCOL( I, 1 ), 1 ), LDB, GIVNUM( I, 2 ), + $ GIVNUM( I, 1 ) ) + 10 CONTINUE +* +* Step (2L): permute rows of B. +* + CALL SCOPY( NRHS, B( NLP1, 1 ), LDB, BX( 1, 1 ), LDBX ) + DO 20 I = 2, N + CALL SCOPY( NRHS, B( PERM( I ), 1 ), LDB, BX( I, 1 ), LDBX ) + 20 CONTINUE +* +* Step (3L): apply the inverse of the left singular vector +* matrix to BX. +* + IF( K.EQ.1 ) THEN + CALL SCOPY( NRHS, BX, LDBX, B, LDB ) + IF( Z( 1 ).LT.ZERO ) THEN + CALL SSCAL( NRHS, NEGONE, B, LDB ) + END IF + ELSE + DO 50 J = 1, K + DIFLJ = DIFL( J ) + DJ = POLES( J, 1 ) + DSIGJ = -POLES( J, 2 ) + IF( J.LT.K ) THEN + DIFRJ = -DIFR( J, 1 ) + DSIGJP = -POLES( J+1, 2 ) + END IF + IF( ( Z( J ).EQ.ZERO ) .OR. ( POLES( J, 2 ).EQ.ZERO ) ) + $ THEN + WORK( J ) = ZERO + ELSE + WORK( J ) = -POLES( J, 2 )*Z( J ) / DIFLJ / + $ ( POLES( J, 2 )+DJ ) + END IF + DO 30 I = 1, J - 1 + IF( ( Z( I ).EQ.ZERO ) .OR. + $ ( POLES( I, 2 ).EQ.ZERO ) ) THEN + WORK( I ) = ZERO + ELSE + WORK( I ) = POLES( I, 2 )*Z( I ) / + $ ( SLAMC3( POLES( I, 2 ), DSIGJ )- + $ DIFLJ ) / ( POLES( I, 2 )+DJ ) + END IF + 30 CONTINUE + DO 40 I = J + 1, K + IF( ( Z( I ).EQ.ZERO ) .OR. + $ ( POLES( I, 2 ).EQ.ZERO ) ) THEN + WORK( I ) = ZERO + ELSE + WORK( I ) = POLES( I, 2 )*Z( I ) / + $ ( SLAMC3( POLES( I, 2 ), DSIGJP )+ + $ DIFRJ ) / ( POLES( I, 2 )+DJ ) + END IF + 40 CONTINUE + WORK( 1 ) = NEGONE + TEMP = SNRM2( K, WORK, 1 ) + CALL SGEMV( 'T', K, NRHS, ONE, BX, LDBX, WORK, 1, ZERO, + $ B( J, 1 ), LDB ) + CALL SLASCL( 'G', 0, 0, TEMP, ONE, 1, NRHS, B( J, 1 ), + $ LDB, INFO ) + 50 CONTINUE + END IF +* +* Move the deflated rows of BX to B also. +* + IF( K.LT.MAX( M, N ) ) + $ CALL SLACPY( 'A', N-K, NRHS, BX( K+1, 1 ), LDBX, + $ B( K+1, 1 ), LDB ) + ELSE +* +* Apply back the right orthogonal transformations. +* +* Step (1R): apply back the new right singular vector matrix +* to B. +* + IF( K.EQ.1 ) THEN + CALL SCOPY( NRHS, B, LDB, BX, LDBX ) + ELSE + DO 80 J = 1, K + DSIGJ = POLES( J, 2 ) + IF( Z( J ).EQ.ZERO ) THEN + WORK( J ) = ZERO + ELSE + WORK( J ) = -Z( J ) / DIFL( J ) / + $ ( DSIGJ+POLES( J, 1 ) ) / DIFR( J, 2 ) + END IF + DO 60 I = 1, J - 1 + IF( Z( J ).EQ.ZERO ) THEN + WORK( I ) = ZERO + ELSE + WORK( I ) = Z( J ) / ( SLAMC3( DSIGJ, -POLES( I+1, + $ 2 ) )-DIFR( I, 1 ) ) / + $ ( DSIGJ+POLES( I, 1 ) ) / DIFR( I, 2 ) + END IF + 60 CONTINUE + DO 70 I = J + 1, K + IF( Z( J ).EQ.ZERO ) THEN + WORK( I ) = ZERO + ELSE + WORK( I ) = Z( J ) / ( SLAMC3( DSIGJ, -POLES( I, + $ 2 ) )-DIFL( I ) ) / + $ ( DSIGJ+POLES( I, 1 ) ) / DIFR( I, 2 ) + END IF + 70 CONTINUE + CALL SGEMV( 'T', K, NRHS, ONE, B, LDB, WORK, 1, ZERO, + $ BX( J, 1 ), LDBX ) + 80 CONTINUE + END IF +* +* Step (2R): if SQRE = 1, apply back the rotation that is +* related to the right null space of the subproblem. +* + IF( SQRE.EQ.1 ) THEN + CALL SCOPY( NRHS, B( M, 1 ), LDB, BX( M, 1 ), LDBX ) + CALL SROT( NRHS, BX( 1, 1 ), LDBX, BX( M, 1 ), LDBX, C, S ) + END IF + IF( K.LT.MAX( M, N ) ) + $ CALL SLACPY( 'A', N-K, NRHS, B( K+1, 1 ), LDB, BX( K+1, 1 ), + $ LDBX ) +* +* Step (3R): permute rows of B. +* + CALL SCOPY( NRHS, BX( 1, 1 ), LDBX, B( NLP1, 1 ), LDB ) + IF( SQRE.EQ.1 ) THEN + CALL SCOPY( NRHS, BX( M, 1 ), LDBX, B( M, 1 ), LDB ) + END IF + DO 90 I = 2, N + CALL SCOPY( NRHS, BX( I, 1 ), LDBX, B( PERM( I ), 1 ), LDB ) + 90 CONTINUE +* +* Step (4R): apply back the Givens rotations performed. +* + DO 100 I = GIVPTR, 1, -1 + CALL SROT( NRHS, B( GIVCOL( I, 2 ), 1 ), LDB, + $ B( GIVCOL( I, 1 ), 1 ), LDB, GIVNUM( I, 2 ), + $ -GIVNUM( I, 1 ) ) + 100 CONTINUE + END IF +* + RETURN +* +* End of SLALS0 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slalsa.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slalsa.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,362 @@ + SUBROUTINE SLALSA( ICOMPQ, SMLSIZ, N, NRHS, B, LDB, BX, LDBX, U, + $ LDU, VT, K, DIFL, DIFR, Z, POLES, GIVPTR, + $ GIVCOL, LDGCOL, PERM, GIVNUM, C, S, WORK, + $ IWORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER ICOMPQ, INFO, LDB, LDBX, LDGCOL, LDU, N, NRHS, + $ SMLSIZ +* .. +* .. Array Arguments .. + INTEGER GIVCOL( LDGCOL, * ), GIVPTR( * ), IWORK( * ), + $ K( * ), PERM( LDGCOL, * ) + REAL B( LDB, * ), BX( LDBX, * ), C( * ), + $ DIFL( LDU, * ), DIFR( LDU, * ), + $ GIVNUM( LDU, * ), POLES( LDU, * ), S( * ), + $ U( LDU, * ), VT( LDU, * ), WORK( * ), + $ Z( LDU, * ) +* .. +* +* Purpose +* ======= +* +* SLALSA is an itermediate step in solving the least squares problem +* by computing the SVD of the coefficient matrix in compact form (The +* singular vectors are computed as products of simple orthorgonal +* matrices.). +* +* If ICOMPQ = 0, SLALSA applies the inverse of the left singular vector +* matrix of an upper bidiagonal matrix to the right hand side; and if +* ICOMPQ = 1, SLALSA applies the right singular vector matrix to the +* right hand side. The singular vector matrices were generated in +* compact form by SLALSA. +* +* Arguments +* ========= +* +* +* ICOMPQ (input) INTEGER +* Specifies whether the left or the right singular vector +* matrix is involved. +* = 0: Left singular vector matrix +* = 1: Right singular vector matrix +* +* SMLSIZ (input) INTEGER +* The maximum size of the subproblems at the bottom of the +* computation tree. +* +* N (input) INTEGER +* The row and column dimensions of the upper bidiagonal matrix. +* +* NRHS (input) INTEGER +* The number of columns of B and BX. NRHS must be at least 1. +* +* B (input/output) REAL array, dimension ( LDB, NRHS ) +* On input, B contains the right hand sides of the least +* squares problem in rows 1 through M. +* On output, B contains the solution X in rows 1 through N. +* +* LDB (input) INTEGER +* The leading dimension of B in the calling subprogram. +* LDB must be at least max(1,MAX( M, N ) ). +* +* BX (output) REAL array, dimension ( LDBX, NRHS ) +* On exit, the result of applying the left or right singular +* vector matrix to B. +* +* LDBX (input) INTEGER +* The leading dimension of BX. +* +* U (input) REAL array, dimension ( LDU, SMLSIZ ). +* On entry, U contains the left singular vector matrices of all +* subproblems at the bottom level. +* +* LDU (input) INTEGER, LDU = > N. +* The leading dimension of arrays U, VT, DIFL, DIFR, +* POLES, GIVNUM, and Z. +* +* VT (input) REAL array, dimension ( LDU, SMLSIZ+1 ). +* On entry, VT' contains the right singular vector matrices of +* all subproblems at the bottom level. +* +* K (input) INTEGER array, dimension ( N ). +* +* DIFL (input) REAL array, dimension ( LDU, NLVL ). +* where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1. +* +* DIFR (input) REAL array, dimension ( LDU, 2 * NLVL ). +* On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record +* distances between singular values on the I-th level and +* singular values on the (I -1)-th level, and DIFR(*, 2 * I) +* record the normalizing factors of the right singular vectors +* matrices of subproblems on I-th level. +* +* Z (input) REAL array, dimension ( LDU, NLVL ). +* On entry, Z(1, I) contains the components of the deflation- +* adjusted updating row vector for subproblems on the I-th +* level. +* +* POLES (input) REAL array, dimension ( LDU, 2 * NLVL ). +* On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old +* singular values involved in the secular equations on the I-th +* level. +* +* GIVPTR (input) INTEGER array, dimension ( N ). +* On entry, GIVPTR( I ) records the number of Givens +* rotations performed on the I-th problem on the computation +* tree. +* +* GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 * NLVL ). +* On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the +* locations of Givens rotations performed on the I-th level on +* the computation tree. +* +* LDGCOL (input) INTEGER, LDGCOL = > N. +* The leading dimension of arrays GIVCOL and PERM. +* +* PERM (input) INTEGER array, dimension ( LDGCOL, NLVL ). +* On entry, PERM(*, I) records permutations done on the I-th +* level of the computation tree. +* +* GIVNUM (input) REAL array, dimension ( LDU, 2 * NLVL ). +* On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S- +* values of Givens rotations performed on the I-th level on the +* computation tree. +* +* C (input) REAL array, dimension ( N ). +* On entry, if the I-th subproblem is not square, +* C( I ) contains the C-value of a Givens rotation related to +* the right null space of the I-th subproblem. +* +* S (input) REAL array, dimension ( N ). +* On entry, if the I-th subproblem is not square, +* S( I ) contains the S-value of a Givens rotation related to +* the right null space of the I-th subproblem. +* +* WORK (workspace) REAL array. +* The dimension must be at least N. +* +* IWORK (workspace) INTEGER array. +* The dimension must be at least 3 * N +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Ren-Cang Li, Computer Science Division, University of +* California at Berkeley, USA +* Osni Marques, LBNL/NERSC, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + INTEGER I, I1, IC, IM1, INODE, J, LF, LL, LVL, LVL2, + $ ND, NDB1, NDIML, NDIMR, NL, NLF, NLP1, NLVL, + $ NR, NRF, NRP1, SQRE +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEMM, SLALS0, SLASDT, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN + INFO = -1 + ELSE IF( SMLSIZ.LT.3 ) THEN + INFO = -2 + ELSE IF( N.LT.SMLSIZ ) THEN + INFO = -3 + ELSE IF( NRHS.LT.1 ) THEN + INFO = -4 + ELSE IF( LDB.LT.N ) THEN + INFO = -6 + ELSE IF( LDBX.LT.N ) THEN + INFO = -8 + ELSE IF( LDU.LT.N ) THEN + INFO = -10 + ELSE IF( LDGCOL.LT.N ) THEN + INFO = -19 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLALSA', -INFO ) + RETURN + END IF +* +* Book-keeping and setting up the computation tree. +* + INODE = 1 + NDIML = INODE + N + NDIMR = NDIML + N +* + CALL SLASDT( N, NLVL, ND, IWORK( INODE ), IWORK( NDIML ), + $ IWORK( NDIMR ), SMLSIZ ) +* +* The following code applies back the left singular vector factors. +* For applying back the right singular vector factors, go to 50. +* + IF( ICOMPQ.EQ.1 ) THEN + GO TO 50 + END IF +* +* The nodes on the bottom level of the tree were solved +* by SLASDQ. The corresponding left and right singular vector +* matrices are in explicit form. First apply back the left +* singular vector matrices. +* + NDB1 = ( ND+1 ) / 2 + DO 10 I = NDB1, ND +* +* IC : center row of each node +* NL : number of rows of left subproblem +* NR : number of rows of right subproblem +* NLF: starting row of the left subproblem +* NRF: starting row of the right subproblem +* + I1 = I - 1 + IC = IWORK( INODE+I1 ) + NL = IWORK( NDIML+I1 ) + NR = IWORK( NDIMR+I1 ) + NLF = IC - NL + NRF = IC + 1 + CALL SGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU, + $ B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX ) + CALL SGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU, + $ B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX ) + 10 CONTINUE +* +* Next copy the rows of B that correspond to unchanged rows +* in the bidiagonal matrix to BX. +* + DO 20 I = 1, ND + IC = IWORK( INODE+I-1 ) + CALL SCOPY( NRHS, B( IC, 1 ), LDB, BX( IC, 1 ), LDBX ) + 20 CONTINUE +* +* Finally go through the left singular vector matrices of all +* the other subproblems bottom-up on the tree. +* + J = 2**NLVL + SQRE = 0 +* + DO 40 LVL = NLVL, 1, -1 + LVL2 = 2*LVL - 1 +* +* find the first node LF and last node LL on +* the current level LVL +* + IF( LVL.EQ.1 ) THEN + LF = 1 + LL = 1 + ELSE + LF = 2**( LVL-1 ) + LL = 2*LF - 1 + END IF + DO 30 I = LF, LL + IM1 = I - 1 + IC = IWORK( INODE+IM1 ) + NL = IWORK( NDIML+IM1 ) + NR = IWORK( NDIMR+IM1 ) + NLF = IC - NL + NRF = IC + 1 + J = J - 1 + CALL SLALS0( ICOMPQ, NL, NR, SQRE, NRHS, BX( NLF, 1 ), LDBX, + $ B( NLF, 1 ), LDB, PERM( NLF, LVL ), + $ GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL, + $ GIVNUM( NLF, LVL2 ), LDU, POLES( NLF, LVL2 ), + $ DIFL( NLF, LVL ), DIFR( NLF, LVL2 ), + $ Z( NLF, LVL ), K( J ), C( J ), S( J ), WORK, + $ INFO ) + 30 CONTINUE + 40 CONTINUE + GO TO 90 +* +* ICOMPQ = 1: applying back the right singular vector factors. +* + 50 CONTINUE +* +* First now go through the right singular vector matrices of all +* the tree nodes top-down. +* + J = 0 + DO 70 LVL = 1, NLVL + LVL2 = 2*LVL - 1 +* +* Find the first node LF and last node LL on +* the current level LVL. +* + IF( LVL.EQ.1 ) THEN + LF = 1 + LL = 1 + ELSE + LF = 2**( LVL-1 ) + LL = 2*LF - 1 + END IF + DO 60 I = LL, LF, -1 + IM1 = I - 1 + IC = IWORK( INODE+IM1 ) + NL = IWORK( NDIML+IM1 ) + NR = IWORK( NDIMR+IM1 ) + NLF = IC - NL + NRF = IC + 1 + IF( I.EQ.LL ) THEN + SQRE = 0 + ELSE + SQRE = 1 + END IF + J = J + 1 + CALL SLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B( NLF, 1 ), LDB, + $ BX( NLF, 1 ), LDBX, PERM( NLF, LVL ), + $ GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL, + $ GIVNUM( NLF, LVL2 ), LDU, POLES( NLF, LVL2 ), + $ DIFL( NLF, LVL ), DIFR( NLF, LVL2 ), + $ Z( NLF, LVL ), K( J ), C( J ), S( J ), WORK, + $ INFO ) + 60 CONTINUE + 70 CONTINUE +* +* The nodes on the bottom level of the tree were solved +* by SLASDQ. The corresponding right singular vector +* matrices are in explicit form. Apply them back. +* + NDB1 = ( ND+1 ) / 2 + DO 80 I = NDB1, ND + I1 = I - 1 + IC = IWORK( INODE+I1 ) + NL = IWORK( NDIML+I1 ) + NR = IWORK( NDIMR+I1 ) + NLP1 = NL + 1 + IF( I.EQ.ND ) THEN + NRP1 = NR + ELSE + NRP1 = NR + 1 + END IF + NLF = IC - NL + NRF = IC + 1 + CALL SGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU, + $ B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX ) + CALL SGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU, + $ B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX ) + 80 CONTINUE +* + 90 CONTINUE +* + RETURN +* +* End of SLALSA +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slalsd.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slalsd.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,434 @@ + SUBROUTINE SLALSD( UPLO, SMLSIZ, N, NRHS, D, E, B, LDB, RCOND, + $ RANK, WORK, IWORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS, RANK, SMLSIZ + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL B( LDB, * ), D( * ), E( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SLALSD uses the singular value decomposition of A to solve the least +* squares problem of finding X to minimize the Euclidean norm of each +* column of A*X-B, where A is N-by-N upper bidiagonal, and X and B +* are N-by-NRHS. The solution X overwrites B. +* +* The singular values of A smaller than RCOND times the largest +* singular value are treated as zero in solving the least squares +* problem; in this case a minimum norm solution is returned. +* The actual singular values are returned in D in ascending order. +* +* This code makes very mild assumptions about floating point +* arithmetic. It will work on machines with a guard digit in +* add/subtract, or on those binary machines without guard digits +* which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. +* It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': D and E define an upper bidiagonal matrix. +* = 'L': D and E define a lower bidiagonal matrix. +* +* SMLSIZ (input) INTEGER +* The maximum size of the subproblems at the bottom of the +* computation tree. +* +* N (input) INTEGER +* The dimension of the bidiagonal matrix. N >= 0. +* +* NRHS (input) INTEGER +* The number of columns of B. NRHS must be at least 1. +* +* D (input/output) REAL array, dimension (N) +* On entry D contains the main diagonal of the bidiagonal +* matrix. On exit, if INFO = 0, D contains its singular values. +* +* E (input/output) REAL array, dimension (N-1) +* Contains the super-diagonal entries of the bidiagonal matrix. +* On exit, E has been destroyed. +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On input, B contains the right hand sides of the least +* squares problem. On output, B contains the solution X. +* +* LDB (input) INTEGER +* The leading dimension of B in the calling subprogram. +* LDB must be at least max(1,N). +* +* RCOND (input) REAL +* The singular values of A less than or equal to RCOND times +* the largest singular value are treated as zero in solving +* the least squares problem. If RCOND is negative, +* machine precision is used instead. +* For example, if diag(S)*X=B were the least squares problem, +* where diag(S) is a diagonal matrix of singular values, the +* solution would be X(i) = B(i) / S(i) if S(i) is greater than +* RCOND*max(S), and X(i) = 0 if S(i) is less than or equal to +* RCOND*max(S). +* +* RANK (output) INTEGER +* The number of singular values of A greater than RCOND times +* the largest singular value. +* +* WORK (workspace) REAL array, dimension at least +* (9*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2), +* where NLVL = max(0, INT(log_2 (N/(SMLSIZ+1))) + 1). +* +* IWORK (workspace) INTEGER array, dimension at least +* (3*N*NLVL + 11*N) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: The algorithm failed to compute an singular value while +* working on the submatrix lying in rows and columns +* INFO/(N+1) through MOD(INFO,N+1). +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Ren-Cang Li, Computer Science Division, University of +* California at Berkeley, USA +* Osni Marques, LBNL/NERSC, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0 ) +* .. +* .. Local Scalars .. + INTEGER BX, BXST, C, DIFL, DIFR, GIVCOL, GIVNUM, + $ GIVPTR, I, ICMPQ1, ICMPQ2, IWK, J, K, NLVL, + $ NM1, NSIZE, NSUB, NWORK, PERM, POLES, S, SIZEI, + $ SMLSZP, SQRE, ST, ST1, U, VT, Z + REAL CS, EPS, ORGNRM, R, RCND, SN, TOL +* .. +* .. External Functions .. + INTEGER ISAMAX + REAL SLAMCH, SLANST + EXTERNAL ISAMAX, SLAMCH, SLANST +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEMM, SLACPY, SLALSA, SLARTG, SLASCL, + $ SLASDA, SLASDQ, SLASET, SLASRT, SROT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, INT, LOG, REAL, SIGN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.1 ) THEN + INFO = -4 + ELSE IF( ( LDB.LT.1 ) .OR. ( LDB.LT.N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLALSD', -INFO ) + RETURN + END IF +* + EPS = SLAMCH( 'Epsilon' ) +* +* Set up the tolerance. +* + IF( ( RCOND.LE.ZERO ) .OR. ( RCOND.GE.ONE ) ) THEN + RCND = EPS + ELSE + RCND = RCOND + END IF +* + RANK = 0 +* +* Quick return if possible. +* + IF( N.EQ.0 ) THEN + RETURN + ELSE IF( N.EQ.1 ) THEN + IF( D( 1 ).EQ.ZERO ) THEN + CALL SLASET( 'A', 1, NRHS, ZERO, ZERO, B, LDB ) + ELSE + RANK = 1 + CALL SLASCL( 'G', 0, 0, D( 1 ), ONE, 1, NRHS, B, LDB, INFO ) + D( 1 ) = ABS( D( 1 ) ) + END IF + RETURN + END IF +* +* Rotate the matrix if it is lower bidiagonal. +* + IF( UPLO.EQ.'L' ) THEN + DO 10 I = 1, N - 1 + CALL SLARTG( D( I ), E( I ), CS, SN, R ) + D( I ) = R + E( I ) = SN*D( I+1 ) + D( I+1 ) = CS*D( I+1 ) + IF( NRHS.EQ.1 ) THEN + CALL SROT( 1, B( I, 1 ), 1, B( I+1, 1 ), 1, CS, SN ) + ELSE + WORK( I*2-1 ) = CS + WORK( I*2 ) = SN + END IF + 10 CONTINUE + IF( NRHS.GT.1 ) THEN + DO 30 I = 1, NRHS + DO 20 J = 1, N - 1 + CS = WORK( J*2-1 ) + SN = WORK( J*2 ) + CALL SROT( 1, B( J, I ), 1, B( J+1, I ), 1, CS, SN ) + 20 CONTINUE + 30 CONTINUE + END IF + END IF +* +* Scale. +* + NM1 = N - 1 + ORGNRM = SLANST( 'M', N, D, E ) + IF( ORGNRM.EQ.ZERO ) THEN + CALL SLASET( 'A', N, NRHS, ZERO, ZERO, B, LDB ) + RETURN + END IF +* + CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, N, 1, D, N, INFO ) + CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, NM1, 1, E, NM1, INFO ) +* +* If N is smaller than the minimum divide size SMLSIZ, then solve +* the problem with another solver. +* + IF( N.LE.SMLSIZ ) THEN + NWORK = 1 + N*N + CALL SLASET( 'A', N, N, ZERO, ONE, WORK, N ) + CALL SLASDQ( 'U', 0, N, N, 0, NRHS, D, E, WORK, N, WORK, N, B, + $ LDB, WORK( NWORK ), INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + TOL = RCND*ABS( D( ISAMAX( N, D, 1 ) ) ) + DO 40 I = 1, N + IF( D( I ).LE.TOL ) THEN + CALL SLASET( 'A', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB ) + ELSE + CALL SLASCL( 'G', 0, 0, D( I ), ONE, 1, NRHS, B( I, 1 ), + $ LDB, INFO ) + RANK = RANK + 1 + END IF + 40 CONTINUE + CALL SGEMM( 'T', 'N', N, NRHS, N, ONE, WORK, N, B, LDB, ZERO, + $ WORK( NWORK ), N ) + CALL SLACPY( 'A', N, NRHS, WORK( NWORK ), N, B, LDB ) +* +* Unscale. +* + CALL SLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO ) + CALL SLASRT( 'D', N, D, INFO ) + CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, N, NRHS, B, LDB, INFO ) +* + RETURN + END IF +* +* Book-keeping and setting up some constants. +* + NLVL = INT( LOG( REAL( N ) / REAL( SMLSIZ+1 ) ) / LOG( TWO ) ) + 1 +* + SMLSZP = SMLSIZ + 1 +* + U = 1 + VT = 1 + SMLSIZ*N + DIFL = VT + SMLSZP*N + DIFR = DIFL + NLVL*N + Z = DIFR + NLVL*N*2 + C = Z + NLVL*N + S = C + N + POLES = S + N + GIVNUM = POLES + 2*NLVL*N + BX = GIVNUM + 2*NLVL*N + NWORK = BX + N*NRHS +* + SIZEI = 1 + N + K = SIZEI + N + GIVPTR = K + N + PERM = GIVPTR + N + GIVCOL = PERM + NLVL*N + IWK = GIVCOL + NLVL*N*2 +* + ST = 1 + SQRE = 0 + ICMPQ1 = 1 + ICMPQ2 = 0 + NSUB = 0 +* + DO 50 I = 1, N + IF( ABS( D( I ) ).LT.EPS ) THEN + D( I ) = SIGN( EPS, D( I ) ) + END IF + 50 CONTINUE +* + DO 60 I = 1, NM1 + IF( ( ABS( E( I ) ).LT.EPS ) .OR. ( I.EQ.NM1 ) ) THEN + NSUB = NSUB + 1 + IWORK( NSUB ) = ST +* +* Subproblem found. First determine its size and then +* apply divide and conquer on it. +* + IF( I.LT.NM1 ) THEN +* +* A subproblem with E(I) small for I < NM1. +* + NSIZE = I - ST + 1 + IWORK( SIZEI+NSUB-1 ) = NSIZE + ELSE IF( ABS( E( I ) ).GE.EPS ) THEN +* +* A subproblem with E(NM1) not too small but I = NM1. +* + NSIZE = N - ST + 1 + IWORK( SIZEI+NSUB-1 ) = NSIZE + ELSE +* +* A subproblem with E(NM1) small. This implies an +* 1-by-1 subproblem at D(N), which is not solved +* explicitly. +* + NSIZE = I - ST + 1 + IWORK( SIZEI+NSUB-1 ) = NSIZE + NSUB = NSUB + 1 + IWORK( NSUB ) = N + IWORK( SIZEI+NSUB-1 ) = 1 + CALL SCOPY( NRHS, B( N, 1 ), LDB, WORK( BX+NM1 ), N ) + END IF + ST1 = ST - 1 + IF( NSIZE.EQ.1 ) THEN +* +* This is a 1-by-1 subproblem and is not solved +* explicitly. +* + CALL SCOPY( NRHS, B( ST, 1 ), LDB, WORK( BX+ST1 ), N ) + ELSE IF( NSIZE.LE.SMLSIZ ) THEN +* +* This is a small subproblem and is solved by SLASDQ. +* + CALL SLASET( 'A', NSIZE, NSIZE, ZERO, ONE, + $ WORK( VT+ST1 ), N ) + CALL SLASDQ( 'U', 0, NSIZE, NSIZE, 0, NRHS, D( ST ), + $ E( ST ), WORK( VT+ST1 ), N, WORK( NWORK ), + $ N, B( ST, 1 ), LDB, WORK( NWORK ), INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + CALL SLACPY( 'A', NSIZE, NRHS, B( ST, 1 ), LDB, + $ WORK( BX+ST1 ), N ) + ELSE +* +* A large problem. Solve it using divide and conquer. +* + CALL SLASDA( ICMPQ1, SMLSIZ, NSIZE, SQRE, D( ST ), + $ E( ST ), WORK( U+ST1 ), N, WORK( VT+ST1 ), + $ IWORK( K+ST1 ), WORK( DIFL+ST1 ), + $ WORK( DIFR+ST1 ), WORK( Z+ST1 ), + $ WORK( POLES+ST1 ), IWORK( GIVPTR+ST1 ), + $ IWORK( GIVCOL+ST1 ), N, IWORK( PERM+ST1 ), + $ WORK( GIVNUM+ST1 ), WORK( C+ST1 ), + $ WORK( S+ST1 ), WORK( NWORK ), IWORK( IWK ), + $ INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + BXST = BX + ST1 + CALL SLALSA( ICMPQ2, SMLSIZ, NSIZE, NRHS, B( ST, 1 ), + $ LDB, WORK( BXST ), N, WORK( U+ST1 ), N, + $ WORK( VT+ST1 ), IWORK( K+ST1 ), + $ WORK( DIFL+ST1 ), WORK( DIFR+ST1 ), + $ WORK( Z+ST1 ), WORK( POLES+ST1 ), + $ IWORK( GIVPTR+ST1 ), IWORK( GIVCOL+ST1 ), N, + $ IWORK( PERM+ST1 ), WORK( GIVNUM+ST1 ), + $ WORK( C+ST1 ), WORK( S+ST1 ), WORK( NWORK ), + $ IWORK( IWK ), INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + END IF + ST = I + 1 + END IF + 60 CONTINUE +* +* Apply the singular values and treat the tiny ones as zero. +* + TOL = RCND*ABS( D( ISAMAX( N, D, 1 ) ) ) +* + DO 70 I = 1, N +* +* Some of the elements in D can be negative because 1-by-1 +* subproblems were not solved explicitly. +* + IF( ABS( D( I ) ).LE.TOL ) THEN + CALL SLASET( 'A', 1, NRHS, ZERO, ZERO, WORK( BX+I-1 ), N ) + ELSE + RANK = RANK + 1 + CALL SLASCL( 'G', 0, 0, D( I ), ONE, 1, NRHS, + $ WORK( BX+I-1 ), N, INFO ) + END IF + D( I ) = ABS( D( I ) ) + 70 CONTINUE +* +* Now apply back the right singular vectors. +* + ICMPQ2 = 1 + DO 80 I = 1, NSUB + ST = IWORK( I ) + ST1 = ST - 1 + NSIZE = IWORK( SIZEI+I-1 ) + BXST = BX + ST1 + IF( NSIZE.EQ.1 ) THEN + CALL SCOPY( NRHS, WORK( BXST ), N, B( ST, 1 ), LDB ) + ELSE IF( NSIZE.LE.SMLSIZ ) THEN + CALL SGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE, + $ WORK( VT+ST1 ), N, WORK( BXST ), N, ZERO, + $ B( ST, 1 ), LDB ) + ELSE + CALL SLALSA( ICMPQ2, SMLSIZ, NSIZE, NRHS, WORK( BXST ), N, + $ B( ST, 1 ), LDB, WORK( U+ST1 ), N, + $ WORK( VT+ST1 ), IWORK( K+ST1 ), + $ WORK( DIFL+ST1 ), WORK( DIFR+ST1 ), + $ WORK( Z+ST1 ), WORK( POLES+ST1 ), + $ IWORK( GIVPTR+ST1 ), IWORK( GIVCOL+ST1 ), N, + $ IWORK( PERM+ST1 ), WORK( GIVNUM+ST1 ), + $ WORK( C+ST1 ), WORK( S+ST1 ), WORK( NWORK ), + $ IWORK( IWK ), INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + END IF + 80 CONTINUE +* +* Unscale and sort the singular values. +* + CALL SLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO ) + CALL SLASRT( 'D', N, D, INFO ) + CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, N, NRHS, B, LDB, INFO ) +* + RETURN +* +* End of SLALSD +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slamc1.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slamc1.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,183 @@ + SUBROUTINE SLAMC1( BETA, T, RND, IEEE1 ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + LOGICAL IEEE1, RND + INTEGER BETA, T +* .. +* +* Purpose +* ======= +* +* SLAMC1 determines the machine parameters given by BETA, T, RND, and +* IEEE1. +* +* Arguments +* ========= +* +* BETA (output) INTEGER +* The base of the machine. +* +* T (output) INTEGER +* The number of ( BETA ) digits in the mantissa. +* +* RND (output) LOGICAL +* Specifies whether proper rounding ( RND = .TRUE. ) or +* chopping ( RND = .FALSE. ) occurs in addition. This may not +* be a reliable guide to the way in which the machine performs +* its arithmetic. +* +* IEEE1 (output) LOGICAL +* Specifies whether rounding appears to be done in the IEEE +* 'round to nearest' style. +* +* Further Details +* =============== +* +* The routine is based on the routine ENVRON by Malcolm and +* incorporates suggestions by Gentleman and Marovich. See +* +* Malcolm M. A. (1972) Algorithms to reveal properties of +* floating-point arithmetic. Comms. of the ACM, 15, 949-951. +* +* Gentleman W. M. and Marovich S. B. (1974) More on algorithms +* that reveal properties of floating point arithmetic units. +* Comms. of the ACM, 17, 276-277. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL FIRST, LIEEE1, LRND + INTEGER LBETA, LT + REAL A, B, C, F, ONE, QTR, SAVEC, T1, T2 +* .. +* .. External Functions .. + REAL SLAMC3 + EXTERNAL SLAMC3 +* .. +* .. Save statement .. + SAVE FIRST, LIEEE1, LBETA, LRND, LT +* .. +* .. Data statements .. + DATA FIRST / .TRUE. / +* .. +* .. Executable Statements .. +* + IF( FIRST ) THEN + ONE = 1 +* +* LBETA, LIEEE1, LT and LRND are the local values of BETA, +* IEEE1, T and RND. +* +* Throughout this routine we use the function SLAMC3 to ensure +* that relevant values are stored and not held in registers, or +* are not affected by optimizers. +* +* Compute a = 2.0**m with the smallest positive integer m such +* that +* +* fl( a + 1.0 ) = a. +* + A = 1 + C = 1 +* +*+ WHILE( C.EQ.ONE )LOOP + 10 CONTINUE + IF( C.EQ.ONE ) THEN + A = 2*A + C = SLAMC3( A, ONE ) + C = SLAMC3( C, -A ) + GO TO 10 + END IF +*+ END WHILE +* +* Now compute b = 2.0**m with the smallest positive integer m +* such that +* +* fl( a + b ) .gt. a. +* + B = 1 + C = SLAMC3( A, B ) +* +*+ WHILE( C.EQ.A )LOOP + 20 CONTINUE + IF( C.EQ.A ) THEN + B = 2*B + C = SLAMC3( A, B ) + GO TO 20 + END IF +*+ END WHILE +* +* Now compute the base. a and c are neighbouring floating point +* numbers in the interval ( beta**t, beta**( t + 1 ) ) and so +* their difference is beta. Adding 0.25 to c is to ensure that it +* is truncated to beta and not ( beta - 1 ). +* + QTR = ONE / 4 + SAVEC = C + C = SLAMC3( C, -A ) + LBETA = C + QTR +* +* Now determine whether rounding or chopping occurs, by adding a +* bit less than beta/2 and a bit more than beta/2 to a. +* + B = LBETA + F = SLAMC3( B / 2, -B / 100 ) + C = SLAMC3( F, A ) + IF( C.EQ.A ) THEN + LRND = .TRUE. + ELSE + LRND = .FALSE. + END IF + F = SLAMC3( B / 2, B / 100 ) + C = SLAMC3( F, A ) + IF( ( LRND ) .AND. ( C.EQ.A ) ) + $ LRND = .FALSE. +* +* Try and decide whether rounding is done in the IEEE 'round to +* nearest' style. B/2 is half a unit in the last place of the two +* numbers A and SAVEC. Furthermore, A is even, i.e. has last bit +* zero, and SAVEC is odd. Thus adding B/2 to A should not change +* A, but adding B/2 to SAVEC should change SAVEC. +* + T1 = SLAMC3( B / 2, A ) + T2 = SLAMC3( B / 2, SAVEC ) + LIEEE1 = ( T1.EQ.A ) .AND. ( T2.GT.SAVEC ) .AND. LRND +* +* Now find the mantissa, t. It should be the integer part of +* log to the base beta of a, however it is safer to determine t +* by powering. So we find t as the smallest positive integer for +* which +* +* fl( beta**t + 1.0 ) = 1.0. +* + LT = 0 + A = 1 + C = 1 +* +*+ WHILE( C.EQ.ONE )LOOP + 30 CONTINUE + IF( C.EQ.ONE ) THEN + LT = LT + 1 + A = A*LBETA + C = SLAMC3( A, ONE ) + C = SLAMC3( C, -A ) + GO TO 30 + END IF +*+ END WHILE +* + END IF +* + BETA = LBETA + T = LT + RND = LRND + IEEE1 = LIEEE1 + FIRST = .FALSE. + RETURN +* +* End of SLAMC1 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slamc2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slamc2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,255 @@ + SUBROUTINE SLAMC2( BETA, T, RND, EPS, EMIN, RMIN, EMAX, RMAX ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + LOGICAL RND + INTEGER BETA, EMAX, EMIN, T + REAL EPS, RMAX, RMIN +* .. +* +* Purpose +* ======= +* +* SLAMC2 determines the machine parameters specified in its argument +* list. +* +* Arguments +* ========= +* +* BETA (output) INTEGER +* The base of the machine. +* +* T (output) INTEGER +* The number of ( BETA ) digits in the mantissa. +* +* RND (output) LOGICAL +* Specifies whether proper rounding ( RND = .TRUE. ) or +* chopping ( RND = .FALSE. ) occurs in addition. This may not +* be a reliable guide to the way in which the machine performs +* its arithmetic. +* +* EPS (output) REAL +* The smallest positive number such that +* +* fl( 1.0 - EPS ) .LT. 1.0, +* +* where fl denotes the computed value. +* +* EMIN (output) INTEGER +* The minimum exponent before (gradual) underflow occurs. +* +* RMIN (output) REAL +* The smallest normalized number for the machine, given by +* BASE**( EMIN - 1 ), where BASE is the floating point value +* of BETA. +* +* EMAX (output) INTEGER +* The maximum exponent before overflow occurs. +* +* RMAX (output) REAL +* The largest positive number for the machine, given by +* BASE**EMAX * ( 1 - EPS ), where BASE is the floating point +* value of BETA. +* +* Further Details +* =============== +* +* The computation of EPS is based on a routine PARANOIA by +* W. Kahan of the University of California at Berkeley. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL FIRST, IEEE, IWARN, LIEEE1, LRND + INTEGER GNMIN, GPMIN, I, LBETA, LEMAX, LEMIN, LT, + $ NGNMIN, NGPMIN + REAL A, B, C, HALF, LEPS, LRMAX, LRMIN, ONE, RBASE, + $ SIXTH, SMALL, THIRD, TWO, ZERO +* .. +* .. External Functions .. + REAL SLAMC3 + EXTERNAL SLAMC3 +* .. +* .. External Subroutines .. + EXTERNAL SLAMC1, SLAMC4, SLAMC5 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Save statement .. + SAVE FIRST, IWARN, LBETA, LEMAX, LEMIN, LEPS, LRMAX, + $ LRMIN, LT +* .. +* .. Data statements .. + DATA FIRST / .TRUE. / , IWARN / .FALSE. / +* .. +* .. Executable Statements .. +* + IF( FIRST ) THEN + ZERO = 0 + ONE = 1 + TWO = 2 +* +* LBETA, LT, LRND, LEPS, LEMIN and LRMIN are the local values of +* BETA, T, RND, EPS, EMIN and RMIN. +* +* Throughout this routine we use the function SLAMC3 to ensure +* that relevant values are stored and not held in registers, or +* are not affected by optimizers. +* +* SLAMC1 returns the parameters LBETA, LT, LRND and LIEEE1. +* + CALL SLAMC1( LBETA, LT, LRND, LIEEE1 ) +* +* Start to find EPS. +* + B = LBETA + A = B**( -LT ) + LEPS = A +* +* Try some tricks to see whether or not this is the correct EPS. +* + B = TWO / 3 + HALF = ONE / 2 + SIXTH = SLAMC3( B, -HALF ) + THIRD = SLAMC3( SIXTH, SIXTH ) + B = SLAMC3( THIRD, -HALF ) + B = SLAMC3( B, SIXTH ) + B = ABS( B ) + IF( B.LT.LEPS ) + $ B = LEPS +* + LEPS = 1 +* +*+ WHILE( ( LEPS.GT.B ).AND.( B.GT.ZERO ) )LOOP + 10 CONTINUE + IF( ( LEPS.GT.B ) .AND. ( B.GT.ZERO ) ) THEN + LEPS = B + C = SLAMC3( HALF*LEPS, ( TWO**5 )*( LEPS**2 ) ) + C = SLAMC3( HALF, -C ) + B = SLAMC3( HALF, C ) + C = SLAMC3( HALF, -B ) + B = SLAMC3( HALF, C ) + GO TO 10 + END IF +*+ END WHILE +* + IF( A.LT.LEPS ) + $ LEPS = A +* +* Computation of EPS complete. +* +* Now find EMIN. Let A = + or - 1, and + or - (1 + BASE**(-3)). +* Keep dividing A by BETA until (gradual) underflow occurs. This +* is detected when we cannot recover the previous A. +* + RBASE = ONE / LBETA + SMALL = ONE + DO 20 I = 1, 3 + SMALL = SLAMC3( SMALL*RBASE, ZERO ) + 20 CONTINUE + A = SLAMC3( ONE, SMALL ) + CALL SLAMC4( NGPMIN, ONE, LBETA ) + CALL SLAMC4( NGNMIN, -ONE, LBETA ) + CALL SLAMC4( GPMIN, A, LBETA ) + CALL SLAMC4( GNMIN, -A, LBETA ) + IEEE = .FALSE. +* + IF( ( NGPMIN.EQ.NGNMIN ) .AND. ( GPMIN.EQ.GNMIN ) ) THEN + IF( NGPMIN.EQ.GPMIN ) THEN + LEMIN = NGPMIN +* ( Non twos-complement machines, no gradual underflow; +* e.g., VAX ) + ELSE IF( ( GPMIN-NGPMIN ).EQ.3 ) THEN + LEMIN = NGPMIN - 1 + LT + IEEE = .TRUE. +* ( Non twos-complement machines, with gradual underflow; +* e.g., IEEE standard followers ) + ELSE + LEMIN = MIN( NGPMIN, GPMIN ) +* ( A guess; no known machine ) + IWARN = .TRUE. + END IF +* + ELSE IF( ( NGPMIN.EQ.GPMIN ) .AND. ( NGNMIN.EQ.GNMIN ) ) THEN + IF( ABS( NGPMIN-NGNMIN ).EQ.1 ) THEN + LEMIN = MAX( NGPMIN, NGNMIN ) +* ( Twos-complement machines, no gradual underflow; +* e.g., CYBER 205 ) + ELSE + LEMIN = MIN( NGPMIN, NGNMIN ) +* ( A guess; no known machine ) + IWARN = .TRUE. + END IF +* + ELSE IF( ( ABS( NGPMIN-NGNMIN ).EQ.1 ) .AND. + $ ( GPMIN.EQ.GNMIN ) ) THEN + IF( ( GPMIN-MIN( NGPMIN, NGNMIN ) ).EQ.3 ) THEN + LEMIN = MAX( NGPMIN, NGNMIN ) - 1 + LT +* ( Twos-complement machines with gradual underflow; +* no known machine ) + ELSE + LEMIN = MIN( NGPMIN, NGNMIN ) +* ( A guess; no known machine ) + IWARN = .TRUE. + END IF +* + ELSE + LEMIN = MIN( NGPMIN, NGNMIN, GPMIN, GNMIN ) +* ( A guess; no known machine ) + IWARN = .TRUE. + END IF + FIRST = .FALSE. +*** +* Comment out this if block if EMIN is ok + IF( IWARN ) THEN + FIRST = .TRUE. + WRITE( 6, FMT = 9999 )LEMIN + END IF +*** +* +* Assume IEEE arithmetic if we found denormalised numbers above, +* or if arithmetic seems to round in the IEEE style, determined +* in routine SLAMC1. A true IEEE machine should have both things +* true; however, faulty machines may have one or the other. +* + IEEE = IEEE .OR. LIEEE1 +* +* Compute RMIN by successive division by BETA. We could compute +* RMIN as BASE**( EMIN - 1 ), but some machines underflow during +* this computation. +* + LRMIN = 1 + DO 30 I = 1, 1 - LEMIN + LRMIN = SLAMC3( LRMIN*RBASE, ZERO ) + 30 CONTINUE +* +* Finally, call SLAMC5 to compute EMAX and RMAX. +* + CALL SLAMC5( LBETA, LT, LEMIN, IEEE, LEMAX, LRMAX ) + END IF +* + BETA = LBETA + T = LT + RND = LRND + EPS = LEPS + EMIN = LEMIN + RMIN = LRMIN + EMAX = LEMAX + RMAX = LRMAX +* + RETURN +* + 9999 FORMAT( / / ' WARNING. The value EMIN may be incorrect:-', + $ ' EMIN = ', I8, / + $ ' If, after inspection, the value EMIN looks', + $ ' acceptable please comment out ', + $ / ' the IF block as marked within the code of routine', + $ ' SLAMC2,', / ' otherwise supply EMIN explicitly.', / ) +* +* End of SLAMC2 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slamc3.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slamc3.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,35 @@ + REAL FUNCTION SLAMC3( A, B ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + REAL A, B +* .. +* +* Purpose +* ======= +* +* SLAMC3 is intended to force A and B to be stored prior to doing +* the addition of A and B , for use in situations where optimizers +* might hold one of these in a register. +* +* Arguments +* ========= +* +* A (input) REAL +* B (input) REAL +* The values A and B. +* +* ===================================================================== +* +* .. Executable Statements .. +* + SLAMC3 = A + B +* + RETURN +* +* End of SLAMC3 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slamc4.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slamc4.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,81 @@ + SUBROUTINE SLAMC4( EMIN, START, BASE ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER BASE + INTEGER EMIN + REAL START +* .. +* +* Purpose +* ======= +* +* SLAMC4 is a service routine for SLAMC2. +* +* Arguments +* ========= +* +* EMIN (output) INTEGER +* The minimum exponent before (gradual) underflow, computed by +* setting A = START and dividing by BASE until the previous A +* can not be recovered. +* +* START (input) REAL +* The starting point for determining EMIN. +* +* BASE (input) INTEGER +* The base of the machine. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I + REAL A, B1, B2, C1, C2, D1, D2, ONE, RBASE, ZERO +* .. +* .. External Functions .. + REAL SLAMC3 + EXTERNAL SLAMC3 +* .. +* .. Executable Statements .. +* + A = START + ONE = 1 + RBASE = ONE / BASE + ZERO = 0 + EMIN = 1 + B1 = SLAMC3( A*RBASE, ZERO ) + C1 = A + C2 = A + D1 = A + D2 = A +*+ WHILE( ( C1.EQ.A ).AND.( C2.EQ.A ).AND. +* $ ( D1.EQ.A ).AND.( D2.EQ.A ) )LOOP + 10 CONTINUE + IF( ( C1.EQ.A ) .AND. ( C2.EQ.A ) .AND. ( D1.EQ.A ) .AND. + $ ( D2.EQ.A ) ) THEN + EMIN = EMIN - 1 + A = B1 + B1 = SLAMC3( A / BASE, ZERO ) + C1 = SLAMC3( B1*BASE, ZERO ) + D1 = ZERO + DO 20 I = 1, BASE + D1 = D1 + B1 + 20 CONTINUE + B2 = SLAMC3( A*RBASE, ZERO ) + C2 = SLAMC3( B2 / RBASE, ZERO ) + D2 = ZERO + DO 30 I = 1, BASE + D2 = D2 + B2 + 30 CONTINUE + GO TO 10 + END IF +*+ END WHILE +* + RETURN +* +* End of SLAMC4 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slamc5.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slamc5.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,158 @@ + SUBROUTINE SLAMC5( BETA, P, EMIN, IEEE, EMAX, RMAX ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + LOGICAL IEEE + INTEGER BETA, EMAX, EMIN, P + REAL RMAX +* .. +* +* Purpose +* ======= +* +* SLAMC5 attempts to compute RMAX, the largest machine floating-point +* number, without overflow. It assumes that EMAX + abs(EMIN) sum +* approximately to a power of 2. It will fail on machines where this +* assumption does not hold, for example, the Cyber 205 (EMIN = -28625, +* EMAX = 28718). It will also fail if the value supplied for EMIN is +* too large (i.e. too close to zero), probably with overflow. +* +* Arguments +* ========= +* +* BETA (input) INTEGER +* The base of floating-point arithmetic. +* +* P (input) INTEGER +* The number of base BETA digits in the mantissa of a +* floating-point value. +* +* EMIN (input) INTEGER +* The minimum exponent before (gradual) underflow. +* +* IEEE (input) LOGICAL +* A logical flag specifying whether or not the arithmetic +* system is thought to comply with the IEEE standard. +* +* EMAX (output) INTEGER +* The largest exponent before overflow +* +* RMAX (output) REAL +* The largest machine floating-point number. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + INTEGER EXBITS, EXPSUM, I, LEXP, NBITS, TRY, UEXP + REAL OLDY, RECBAS, Y, Z +* .. +* .. External Functions .. + REAL SLAMC3 + EXTERNAL SLAMC3 +* .. +* .. Intrinsic Functions .. + INTRINSIC MOD +* .. +* .. Executable Statements .. +* +* First compute LEXP and UEXP, two powers of 2 that bound +* abs(EMIN). We then assume that EMAX + abs(EMIN) will sum +* approximately to the bound that is closest to abs(EMIN). +* (EMAX is the exponent of the required number RMAX). +* + LEXP = 1 + EXBITS = 1 + 10 CONTINUE + TRY = LEXP*2 + IF( TRY.LE.( -EMIN ) ) THEN + LEXP = TRY + EXBITS = EXBITS + 1 + GO TO 10 + END IF + IF( LEXP.EQ.-EMIN ) THEN + UEXP = LEXP + ELSE + UEXP = TRY + EXBITS = EXBITS + 1 + END IF +* +* Now -LEXP is less than or equal to EMIN, and -UEXP is greater +* than or equal to EMIN. EXBITS is the number of bits needed to +* store the exponent. +* + IF( ( UEXP+EMIN ).GT.( -LEXP-EMIN ) ) THEN + EXPSUM = 2*LEXP + ELSE + EXPSUM = 2*UEXP + END IF +* +* EXPSUM is the exponent range, approximately equal to +* EMAX - EMIN + 1 . +* + EMAX = EXPSUM + EMIN - 1 + NBITS = 1 + EXBITS + P +* +* NBITS is the total number of bits needed to store a +* floating-point number. +* + IF( ( MOD( NBITS, 2 ).EQ.1 ) .AND. ( BETA.EQ.2 ) ) THEN +* +* Either there are an odd number of bits used to store a +* floating-point number, which is unlikely, or some bits are +* not used in the representation of numbers, which is possible, +* (e.g. Cray machines) or the mantissa has an implicit bit, +* (e.g. IEEE machines, Dec Vax machines), which is perhaps the +* most likely. We have to assume the last alternative. +* If this is true, then we need to reduce EMAX by one because +* there must be some way of representing zero in an implicit-bit +* system. On machines like Cray, we are reducing EMAX by one +* unnecessarily. +* + EMAX = EMAX - 1 + END IF +* + IF( IEEE ) THEN +* +* Assume we are on an IEEE machine which reserves one exponent +* for infinity and NaN. +* + EMAX = EMAX - 1 + END IF +* +* Now create RMAX, the largest machine number, which should +* be equal to (1.0 - BETA**(-P)) * BETA**EMAX . +* +* First compute 1.0 - BETA**(-P), being careful that the +* result is less than 1.0 . +* + RECBAS = ONE / BETA + Z = BETA - ONE + Y = ZERO + DO 20 I = 1, P + Z = Z*RECBAS + IF( Y.LT.ONE ) + $ OLDY = Y + Y = SLAMC3( Y, Z ) + 20 CONTINUE + IF( Y.GE.ONE ) + $ Y = OLDY +* +* Now multiply by BETA**EMAX to get RMAX. +* + DO 30 I = 1, EMAX + Y = SLAMC3( Y*BETA, ZERO ) + 30 CONTINUE +* + RMAX = Y + RETURN +* +* End of SLAMC5 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slamch.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slamch.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,126 @@ + REAL FUNCTION SLAMCH( CMACH ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER CMACH +* .. +* +* Purpose +* ======= +* +* SLAMCH determines single precision machine parameters. +* +* Arguments +* ========= +* +* CMACH (input) CHARACTER*1 +* Specifies the value to be returned by SLAMCH: +* = 'E' or 'e', SLAMCH := eps +* = 'S' or 's , SLAMCH := sfmin +* = 'B' or 'b', SLAMCH := base +* = 'P' or 'p', SLAMCH := eps*base +* = 'N' or 'n', SLAMCH := t +* = 'R' or 'r', SLAMCH := rnd +* = 'M' or 'm', SLAMCH := emin +* = 'U' or 'u', SLAMCH := rmin +* = 'L' or 'l', SLAMCH := emax +* = 'O' or 'o', SLAMCH := rmax +* +* where +* +* eps = relative machine precision +* sfmin = safe minimum, such that 1/sfmin does not overflow +* base = base of the machine +* prec = eps*base +* t = number of (base) digits in the mantissa +* rnd = 1.0 when rounding occurs in addition, 0.0 otherwise +* emin = minimum exponent before (gradual) underflow +* rmin = underflow threshold - base**(emin-1) +* emax = largest exponent before overflow +* rmax = overflow threshold - (base**emax)*(1-eps) +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL FIRST, LRND + INTEGER BETA, IMAX, IMIN, IT + REAL BASE, EMAX, EMIN, EPS, PREC, RMACH, RMAX, RMIN, + $ RND, SFMIN, SMALL, T +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SLAMC2 +* .. +* .. Save statement .. + SAVE FIRST, EPS, SFMIN, BASE, T, RND, EMIN, RMIN, + $ EMAX, RMAX, PREC +* .. +* .. Data statements .. + DATA FIRST / .TRUE. / +* .. +* .. Executable Statements .. +* + IF( FIRST ) THEN + CALL SLAMC2( BETA, IT, LRND, EPS, IMIN, RMIN, IMAX, RMAX ) + BASE = BETA + T = IT + IF( LRND ) THEN + RND = ONE + EPS = ( BASE**( 1-IT ) ) / 2 + ELSE + RND = ZERO + EPS = BASE**( 1-IT ) + END IF + PREC = EPS*BASE + EMIN = IMIN + EMAX = IMAX + SFMIN = RMIN + SMALL = ONE / RMAX + IF( SMALL.GE.SFMIN ) THEN +* +* Use SMALL plus a bit, to avoid the possibility of rounding +* causing overflow when computing 1/sfmin. +* + SFMIN = SMALL*( ONE+EPS ) + END IF + END IF +* + IF( LSAME( CMACH, 'E' ) ) THEN + RMACH = EPS + ELSE IF( LSAME( CMACH, 'S' ) ) THEN + RMACH = SFMIN + ELSE IF( LSAME( CMACH, 'B' ) ) THEN + RMACH = BASE + ELSE IF( LSAME( CMACH, 'P' ) ) THEN + RMACH = PREC + ELSE IF( LSAME( CMACH, 'N' ) ) THEN + RMACH = T + ELSE IF( LSAME( CMACH, 'R' ) ) THEN + RMACH = RND + ELSE IF( LSAME( CMACH, 'M' ) ) THEN + RMACH = EMIN + ELSE IF( LSAME( CMACH, 'U' ) ) THEN + RMACH = RMIN + ELSE IF( LSAME( CMACH, 'L' ) ) THEN + RMACH = EMAX + ELSE IF( LSAME( CMACH, 'O' ) ) THEN + RMACH = RMAX + END IF +* + SLAMCH = RMACH + FIRST = .FALSE. + RETURN +* +* End of SLAMCH +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slamrg.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slamrg.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,103 @@ + SUBROUTINE SLAMRG( N1, N2, A, STRD1, STRD2, INDEX ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER N1, N2, STRD1, STRD2 +* .. +* .. Array Arguments .. + INTEGER INDEX( * ) + REAL A( * ) +* .. +* +* Purpose +* ======= +* +* SLAMRG will create a permutation list which will merge the elements +* of A (which is composed of two independently sorted sets) into a +* single set which is sorted in ascending order. +* +* Arguments +* ========= +* +* N1 (input) INTEGER +* N2 (input) INTEGER +* These arguements contain the respective lengths of the two +* sorted lists to be merged. +* +* A (input) REAL array, dimension (N1+N2) +* The first N1 elements of A contain a list of numbers which +* are sorted in either ascending or descending order. Likewise +* for the final N2 elements. +* +* STRD1 (input) INTEGER +* STRD2 (input) INTEGER +* These are the strides to be taken through the array A. +* Allowable strides are 1 and -1. They indicate whether a +* subset of A is sorted in ascending (STRDx = 1) or descending +* (STRDx = -1) order. +* +* INDEX (output) INTEGER array, dimension (N1+N2) +* On exit this array will contain a permutation such that +* if B( I ) = A( INDEX( I ) ) for I=1,N1+N2, then B will be +* sorted in ascending order. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IND1, IND2, N1SV, N2SV +* .. +* .. Executable Statements .. +* + N1SV = N1 + N2SV = N2 + IF( STRD1.GT.0 ) THEN + IND1 = 1 + ELSE + IND1 = N1 + END IF + IF( STRD2.GT.0 ) THEN + IND2 = 1 + N1 + ELSE + IND2 = N1 + N2 + END IF + I = 1 +* while ( (N1SV > 0) & (N2SV > 0) ) + 10 CONTINUE + IF( N1SV.GT.0 .AND. N2SV.GT.0 ) THEN + IF( A( IND1 ).LE.A( IND2 ) ) THEN + INDEX( I ) = IND1 + I = I + 1 + IND1 = IND1 + STRD1 + N1SV = N1SV - 1 + ELSE + INDEX( I ) = IND2 + I = I + 1 + IND2 = IND2 + STRD2 + N2SV = N2SV - 1 + END IF + GO TO 10 + END IF +* end while + IF( N1SV.EQ.0 ) THEN + DO 20 N1SV = 1, N2SV + INDEX( I ) = IND2 + I = I + 1 + IND2 = IND2 + STRD2 + 20 CONTINUE + ELSE +* N2SV .EQ. 0 + DO 30 N2SV = 1, N1SV + INDEX( I ) = IND1 + I = I + 1 + IND1 = IND1 + STRD1 + 30 CONTINUE + END IF +* + RETURN +* +* End of SLAMRG +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slange.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slange.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,144 @@ + REAL FUNCTION SLANGE( NORM, M, N, A, LDA, WORK ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER LDA, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SLANGE returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of a +* real matrix A. +* +* Description +* =========== +* +* SLANGE returns the value +* +* SLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in SLANGE as described +* above. +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. When M = 0, +* SLANGE is set to zero. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. When N = 0, +* SLANGE is set to zero. +* +* A (input) REAL array, dimension (LDA,N) +* The m by n matrix A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(M,1). +* +* WORK (workspace) REAL array, dimension (MAX(1,LWORK)), +* where LWORK >= M when NORM = 'I'; otherwise, WORK is not +* referenced. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL SCALE, SUM, VALUE +* .. +* .. External Subroutines .. + EXTERNAL SLASSQ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( MIN( M, N ).EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + DO 20 J = 1, N + DO 10 I = 1, M + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 10 CONTINUE + 20 CONTINUE + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + DO 40 J = 1, N + SUM = ZERO + DO 30 I = 1, M + SUM = SUM + ABS( A( I, J ) ) + 30 CONTINUE + VALUE = MAX( VALUE, SUM ) + 40 CONTINUE + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + DO 50 I = 1, M + WORK( I ) = ZERO + 50 CONTINUE + DO 70 J = 1, N + DO 60 I = 1, M + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 60 CONTINUE + 70 CONTINUE + VALUE = ZERO + DO 80 I = 1, M + VALUE = MAX( VALUE, WORK( I ) ) + 80 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + DO 90 J = 1, N + CALL SLASSQ( M, A( 1, J ), 1, SCALE, SUM ) + 90 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* + SLANGE = VALUE + RETURN +* +* End of SLANGE +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slanhs.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slanhs.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,141 @@ + REAL FUNCTION SLANHS( NORM, N, A, LDA, WORK ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER LDA, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SLANHS returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of a +* Hessenberg matrix A. +* +* Description +* =========== +* +* SLANHS returns the value +* +* SLANHS = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in SLANHS as described +* above. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. When N = 0, SLANHS is +* set to zero. +* +* A (input) REAL array, dimension (LDA,N) +* The n by n upper Hessenberg matrix A; the part of A below the +* first sub-diagonal is not referenced. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(N,1). +* +* WORK (workspace) REAL array, dimension (MAX(1,LWORK)), +* where LWORK >= N when NORM = 'I'; otherwise, WORK is not +* referenced. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL SCALE, SUM, VALUE +* .. +* .. External Subroutines .. + EXTERNAL SLASSQ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + DO 20 J = 1, N + DO 10 I = 1, MIN( N, J+1 ) + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 10 CONTINUE + 20 CONTINUE + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + DO 40 J = 1, N + SUM = ZERO + DO 30 I = 1, MIN( N, J+1 ) + SUM = SUM + ABS( A( I, J ) ) + 30 CONTINUE + VALUE = MAX( VALUE, SUM ) + 40 CONTINUE + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + DO 50 I = 1, N + WORK( I ) = ZERO + 50 CONTINUE + DO 70 J = 1, N + DO 60 I = 1, MIN( N, J+1 ) + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 60 CONTINUE + 70 CONTINUE + VALUE = ZERO + DO 80 I = 1, N + VALUE = MAX( VALUE, WORK( I ) ) + 80 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + DO 90 J = 1, N + CALL SLASSQ( MIN( N, J+1 ), A( 1, J ), 1, SCALE, SUM ) + 90 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* + SLANHS = VALUE + RETURN +* +* End of SLANHS +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slanst.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slanst.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,124 @@ + REAL FUNCTION SLANST( NORM, N, D, E ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER N +* .. +* .. Array Arguments .. + REAL D( * ), E( * ) +* .. +* +* Purpose +* ======= +* +* SLANST returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of a +* real symmetric tridiagonal matrix A. +* +* Description +* =========== +* +* SLANST returns the value +* +* SLANST = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in SLANST as described +* above. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. When N = 0, SLANST is +* set to zero. +* +* D (input) REAL array, dimension (N) +* The diagonal elements of A. +* +* E (input) REAL array, dimension (N-1) +* The (n-1) sub-diagonal or super-diagonal elements of A. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I + REAL ANORM, SCALE, SUM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* + IF( N.LE.0 ) THEN + ANORM = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + ANORM = ABS( D( N ) ) + DO 10 I = 1, N - 1 + ANORM = MAX( ANORM, ABS( D( I ) ) ) + ANORM = MAX( ANORM, ABS( E( I ) ) ) + 10 CONTINUE + ELSE IF( LSAME( NORM, 'O' ) .OR. NORM.EQ.'1' .OR. + $ LSAME( NORM, 'I' ) ) THEN +* +* Find norm1(A). +* + IF( N.EQ.1 ) THEN + ANORM = ABS( D( 1 ) ) + ELSE + ANORM = MAX( ABS( D( 1 ) )+ABS( E( 1 ) ), + $ ABS( E( N-1 ) )+ABS( D( N ) ) ) + DO 20 I = 2, N - 1 + ANORM = MAX( ANORM, ABS( D( I ) )+ABS( E( I ) )+ + $ ABS( E( I-1 ) ) ) + 20 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + IF( N.GT.1 ) THEN + CALL SLASSQ( N-1, E, 1, SCALE, SUM ) + SUM = 2*SUM + END IF + CALL SLASSQ( N, D, 1, SCALE, SUM ) + ANORM = SCALE*SQRT( SUM ) + END IF +* + SLANST = ANORM + RETURN +* +* End of SLANST +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slansy.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slansy.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,173 @@ + REAL FUNCTION SLANSY( NORM, UPLO, N, A, LDA, WORK ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER NORM, UPLO + INTEGER LDA, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SLANSY returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of a +* real symmetric matrix A. +* +* Description +* =========== +* +* SLANSY returns the value +* +* SLANSY = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in SLANSY as described +* above. +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* symmetric matrix A is to be referenced. +* = 'U': Upper triangular part of A is referenced +* = 'L': Lower triangular part of A is referenced +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. When N = 0, SLANSY is +* set to zero. +* +* A (input) REAL array, dimension (LDA,N) +* The symmetric matrix A. If UPLO = 'U', the leading n by n +* upper triangular part of A contains the upper triangular part +* of the matrix A, and the strictly lower triangular part of A +* is not referenced. If UPLO = 'L', the leading n by n lower +* triangular part of A contains the lower triangular part of +* the matrix A, and the strictly upper triangular part of A is +* not referenced. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(N,1). +* +* WORK (workspace) REAL array, dimension (MAX(1,LWORK)), +* where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, +* WORK is not referenced. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL ABSA, SCALE, SUM, VALUE +* .. +* .. External Subroutines .. + EXTERNAL SLASSQ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = 1, J + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1, N + DO 30 I = J, N + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. + $ ( NORM.EQ.'1' ) ) THEN +* +* Find normI(A) ( = norm1(A), since A is symmetric). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + SUM = ZERO + DO 50 I = 1, J - 1 + ABSA = ABS( A( I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 50 CONTINUE + WORK( J ) = SUM + ABS( A( J, J ) ) + 60 CONTINUE + DO 70 I = 1, N + VALUE = MAX( VALUE, WORK( I ) ) + 70 CONTINUE + ELSE + DO 80 I = 1, N + WORK( I ) = ZERO + 80 CONTINUE + DO 100 J = 1, N + SUM = WORK( J ) + ABS( A( J, J ) ) + DO 90 I = J + 1, N + ABSA = ABS( A( I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 90 CONTINUE + VALUE = MAX( VALUE, SUM ) + 100 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 2, N + CALL SLASSQ( J-1, A( 1, J ), 1, SCALE, SUM ) + 110 CONTINUE + ELSE + DO 120 J = 1, N - 1 + CALL SLASSQ( N-J, A( J+1, J ), 1, SCALE, SUM ) + 120 CONTINUE + END IF + SUM = 2*SUM + CALL SLASSQ( N, A, LDA+1, SCALE, SUM ) + VALUE = SCALE*SQRT( SUM ) + END IF +* + SLANSY = VALUE + RETURN +* +* End of SLANSY +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slantr.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slantr.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,276 @@ + REAL FUNCTION SLANTR( NORM, UPLO, DIAG, M, N, A, LDA, + $ WORK ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER LDA, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SLANTR returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of a +* trapezoidal or triangular matrix A. +* +* Description +* =========== +* +* SLANTR returns the value +* +* SLANTR = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in SLANTR as described +* above. +* +* UPLO (input) CHARACTER*1 +* Specifies whether the matrix A is upper or lower trapezoidal. +* = 'U': Upper trapezoidal +* = 'L': Lower trapezoidal +* Note that A is triangular instead of trapezoidal if M = N. +* +* DIAG (input) CHARACTER*1 +* Specifies whether or not the matrix A has unit diagonal. +* = 'N': Non-unit diagonal +* = 'U': Unit diagonal +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0, and if +* UPLO = 'U', M <= N. When M = 0, SLANTR is set to zero. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0, and if +* UPLO = 'L', N <= M. When N = 0, SLANTR is set to zero. +* +* A (input) REAL array, dimension (LDA,N) +* The trapezoidal matrix A (A is triangular if M = N). +* If UPLO = 'U', the leading m by n upper trapezoidal part of +* the array A contains the upper trapezoidal matrix, and the +* strictly lower triangular part of A is not referenced. +* If UPLO = 'L', the leading m by n lower trapezoidal part of +* the array A contains the lower trapezoidal matrix, and the +* strictly upper triangular part of A is not referenced. Note +* that when DIAG = 'U', the diagonal elements of A are not +* referenced and are assumed to be one. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(M,1). +* +* WORK (workspace) REAL array, dimension (MAX(1,LWORK)), +* where LWORK >= M when NORM = 'I'; otherwise, WORK is not +* referenced. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UDIAG + INTEGER I, J + REAL SCALE, SUM, VALUE +* .. +* .. External Subroutines .. + EXTERNAL SLASSQ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( MIN( M, N ).EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + IF( LSAME( DIAG, 'U' ) ) THEN + VALUE = ONE + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = 1, MIN( M, J-1 ) + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1, N + DO 30 I = J + 1, M + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + DO 50 I = 1, MIN( M, J ) + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 50 CONTINUE + 60 CONTINUE + ELSE + DO 80 J = 1, N + DO 70 I = J, M + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 70 CONTINUE + 80 CONTINUE + END IF + END IF + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + UDIAG = LSAME( DIAG, 'U' ) + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 1, N + IF( ( UDIAG ) .AND. ( J.LE.M ) ) THEN + SUM = ONE + DO 90 I = 1, J - 1 + SUM = SUM + ABS( A( I, J ) ) + 90 CONTINUE + ELSE + SUM = ZERO + DO 100 I = 1, MIN( M, J ) + SUM = SUM + ABS( A( I, J ) ) + 100 CONTINUE + END IF + VALUE = MAX( VALUE, SUM ) + 110 CONTINUE + ELSE + DO 140 J = 1, N + IF( UDIAG ) THEN + SUM = ONE + DO 120 I = J + 1, M + SUM = SUM + ABS( A( I, J ) ) + 120 CONTINUE + ELSE + SUM = ZERO + DO 130 I = J, M + SUM = SUM + ABS( A( I, J ) ) + 130 CONTINUE + END IF + VALUE = MAX( VALUE, SUM ) + 140 CONTINUE + END IF + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + DO 150 I = 1, M + WORK( I ) = ONE + 150 CONTINUE + DO 170 J = 1, N + DO 160 I = 1, MIN( M, J-1 ) + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 160 CONTINUE + 170 CONTINUE + ELSE + DO 180 I = 1, M + WORK( I ) = ZERO + 180 CONTINUE + DO 200 J = 1, N + DO 190 I = 1, MIN( M, J ) + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 190 CONTINUE + 200 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + DO 210 I = 1, N + WORK( I ) = ONE + 210 CONTINUE + DO 220 I = N + 1, M + WORK( I ) = ZERO + 220 CONTINUE + DO 240 J = 1, N + DO 230 I = J + 1, M + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 230 CONTINUE + 240 CONTINUE + ELSE + DO 250 I = 1, M + WORK( I ) = ZERO + 250 CONTINUE + DO 270 J = 1, N + DO 260 I = J, M + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 260 CONTINUE + 270 CONTINUE + END IF + END IF + VALUE = ZERO + DO 280 I = 1, M + VALUE = MAX( VALUE, WORK( I ) ) + 280 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = MIN( M, N ) + DO 290 J = 2, N + CALL SLASSQ( MIN( M, J-1 ), A( 1, J ), 1, SCALE, SUM ) + 290 CONTINUE + ELSE + SCALE = ZERO + SUM = ONE + DO 300 J = 1, N + CALL SLASSQ( MIN( M, J ), A( 1, J ), 1, SCALE, SUM ) + 300 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = MIN( M, N ) + DO 310 J = 1, N + CALL SLASSQ( M-J, A( MIN( M, J+1 ), J ), 1, SCALE, + $ SUM ) + 310 CONTINUE + ELSE + SCALE = ZERO + SUM = ONE + DO 320 J = 1, N + CALL SLASSQ( M-J+1, A( J, J ), 1, SCALE, SUM ) + 320 CONTINUE + END IF + END IF + VALUE = SCALE*SQRT( SUM ) + END IF +* + SLANTR = VALUE + RETURN +* +* End of SLANTR +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slanv2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slanv2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,205 @@ + SUBROUTINE SLANV2( A, B, C, D, RT1R, RT1I, RT2R, RT2I, CS, SN ) +* +* -- LAPACK driver routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + REAL A, B, C, CS, D, RT1I, RT1R, RT2I, RT2R, SN +* .. +* +* Purpose +* ======= +* +* SLANV2 computes the Schur factorization of a real 2-by-2 nonsymmetric +* matrix in standard form: +* +* [ A B ] = [ CS -SN ] [ AA BB ] [ CS SN ] +* [ C D ] [ SN CS ] [ CC DD ] [-SN CS ] +* +* where either +* 1) CC = 0 so that AA and DD are real eigenvalues of the matrix, or +* 2) AA = DD and BB*CC < 0, so that AA + or - sqrt(BB*CC) are complex +* conjugate eigenvalues. +* +* Arguments +* ========= +* +* A (input/output) REAL +* B (input/output) REAL +* C (input/output) REAL +* D (input/output) REAL +* On entry, the elements of the input matrix. +* On exit, they are overwritten by the elements of the +* standardised Schur form. +* +* RT1R (output) REAL +* RT1I (output) REAL +* RT2R (output) REAL +* RT2I (output) REAL +* The real and imaginary parts of the eigenvalues. If the +* eigenvalues are a complex conjugate pair, RT1I > 0. +* +* CS (output) REAL +* SN (output) REAL +* Parameters of the rotation matrix. +* +* Further Details +* =============== +* +* Modified by V. Sima, Research Institute for Informatics, Bucharest, +* Romania, to reduce the risk of cancellation errors, +* when computing real eigenvalues, and to ensure, if possible, that +* abs(RT1R) >= abs(RT2R). +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, HALF, ONE + PARAMETER ( ZERO = 0.0E+0, HALF = 0.5E+0, ONE = 1.0E+0 ) + REAL MULTPL + PARAMETER ( MULTPL = 4.0E+0 ) +* .. +* .. Local Scalars .. + REAL AA, BB, BCMAX, BCMIS, CC, CS1, DD, EPS, P, SAB, + $ SAC, SCALE, SIGMA, SN1, TAU, TEMP, Z +* .. +* .. External Functions .. + REAL SLAMCH, SLAPY2 + EXTERNAL SLAMCH, SLAPY2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SIGN, SQRT +* .. +* .. Executable Statements .. +* + EPS = SLAMCH( 'P' ) + IF( C.EQ.ZERO ) THEN + CS = ONE + SN = ZERO + GO TO 10 +* + ELSE IF( B.EQ.ZERO ) THEN +* +* Swap rows and columns +* + CS = ZERO + SN = ONE + TEMP = D + D = A + A = TEMP + B = -C + C = ZERO + GO TO 10 + ELSE IF( (A-D).EQ.ZERO .AND. SIGN( ONE, B ).NE. + $ SIGN( ONE, C ) ) THEN + CS = ONE + SN = ZERO + GO TO 10 + ELSE +* + TEMP = A - D + P = HALF*TEMP + BCMAX = MAX( ABS( B ), ABS( C ) ) + BCMIS = MIN( ABS( B ), ABS( C ) )*SIGN( ONE, B )*SIGN( ONE, C ) + SCALE = MAX( ABS( P ), BCMAX ) + Z = ( P / SCALE )*P + ( BCMAX / SCALE )*BCMIS +* +* If Z is of the order of the machine accuracy, postpone the +* decision on the nature of eigenvalues +* + IF( Z.GE.MULTPL*EPS ) THEN +* +* Real eigenvalues. Compute A and D. +* + Z = P + SIGN( SQRT( SCALE )*SQRT( Z ), P ) + A = D + Z + D = D - ( BCMAX / Z )*BCMIS +* +* Compute B and the rotation matrix +* + TAU = SLAPY2( C, Z ) + CS = Z / TAU + SN = C / TAU + B = B - C + C = ZERO + ELSE +* +* Complex eigenvalues, or real (almost) equal eigenvalues. +* Make diagonal elements equal. +* + SIGMA = B + C + TAU = SLAPY2( SIGMA, TEMP ) + CS = SQRT( HALF*( ONE+ABS( SIGMA ) / TAU ) ) + SN = -( P / ( TAU*CS ) )*SIGN( ONE, SIGMA ) +* +* Compute [ AA BB ] = [ A B ] [ CS -SN ] +* [ CC DD ] [ C D ] [ SN CS ] +* + AA = A*CS + B*SN + BB = -A*SN + B*CS + CC = C*CS + D*SN + DD = -C*SN + D*CS +* +* Compute [ A B ] = [ CS SN ] [ AA BB ] +* [ C D ] [-SN CS ] [ CC DD ] +* + A = AA*CS + CC*SN + B = BB*CS + DD*SN + C = -AA*SN + CC*CS + D = -BB*SN + DD*CS +* + TEMP = HALF*( A+D ) + A = TEMP + D = TEMP +* + IF( C.NE.ZERO ) THEN + IF( B.NE.ZERO ) THEN + IF( SIGN( ONE, B ).EQ.SIGN( ONE, C ) ) THEN +* +* Real eigenvalues: reduce to upper triangular form +* + SAB = SQRT( ABS( B ) ) + SAC = SQRT( ABS( C ) ) + P = SIGN( SAB*SAC, C ) + TAU = ONE / SQRT( ABS( B+C ) ) + A = TEMP + P + D = TEMP - P + B = B - C + C = ZERO + CS1 = SAB*TAU + SN1 = SAC*TAU + TEMP = CS*CS1 - SN*SN1 + SN = CS*SN1 + SN*CS1 + CS = TEMP + END IF + ELSE + B = -C + C = ZERO + TEMP = CS + CS = -SN + SN = TEMP + END IF + END IF + END IF +* + END IF +* + 10 CONTINUE +* +* Store eigenvalues in (RT1R,RT1I) and (RT2R,RT2I). +* + RT1R = A + RT2R = D + IF( C.EQ.ZERO ) THEN + RT1I = ZERO + RT2I = ZERO + ELSE + RT1I = SQRT( ABS( B ) )*SQRT( ABS( C ) ) + RT2I = -RT1I + END IF + RETURN +* +* End of SLANV2 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slapy2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slapy2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,53 @@ + REAL FUNCTION SLAPY2( X, Y ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + REAL X, Y +* .. +* +* Purpose +* ======= +* +* SLAPY2 returns sqrt(x**2+y**2), taking care not to cause unnecessary +* overflow. +* +* Arguments +* ========= +* +* X (input) REAL +* Y (input) REAL +* X and Y specify the values x and y. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E0 ) + REAL ONE + PARAMETER ( ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + REAL W, XABS, YABS, Z +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + XABS = ABS( X ) + YABS = ABS( Y ) + W = MAX( XABS, YABS ) + Z = MIN( XABS, YABS ) + IF( Z.EQ.ZERO ) THEN + SLAPY2 = W + ELSE + SLAPY2 = W*SQRT( ONE+( Z / W )**2 ) + END IF + RETURN +* +* End of SLAPY2 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slapy3.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slapy3.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,56 @@ + REAL FUNCTION SLAPY3( X, Y, Z ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + REAL X, Y, Z +* .. +* +* Purpose +* ======= +* +* SLAPY3 returns sqrt(x**2+y**2+z**2), taking care not to cause +* unnecessary overflow. +* +* Arguments +* ========= +* +* X (input) REAL +* Y (input) REAL +* Z (input) REAL +* X, Y and Z specify the values x, y and z. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E0 ) +* .. +* .. Local Scalars .. + REAL W, XABS, YABS, ZABS +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* + XABS = ABS( X ) + YABS = ABS( Y ) + ZABS = ABS( Z ) + W = MAX( XABS, YABS, ZABS ) + IF( W.EQ.ZERO ) THEN +* W can be zero for max(0,nan,0) +* adding all three entries together will make sure +* NaN will not disappear. + SLAPY3 = XABS + YABS + ZABS + ELSE + SLAPY3 = W*SQRT( ( XABS / W )**2+( YABS / W )**2+ + $ ( ZABS / W )**2 ) + END IF + RETURN +* +* End of SLAPY3 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slaqp2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slaqp2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,175 @@ + SUBROUTINE SLAQP2( M, N, OFFSET, A, LDA, JPVT, TAU, VN1, VN2, + $ WORK ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER LDA, M, N, OFFSET +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + REAL A( LDA, * ), TAU( * ), VN1( * ), VN2( * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* SLAQP2 computes a QR factorization with column pivoting of +* the block A(OFFSET+1:M,1:N). +* The block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* OFFSET (input) INTEGER +* The number of rows of the matrix A that must be pivoted +* but no factorized. OFFSET >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, the upper triangle of block A(OFFSET+1:M,1:N) is +* the triangular factor obtained; the elements in block +* A(OFFSET+1:M,1:N) below the diagonal, together with the +* array TAU, represent the orthogonal matrix Q as a product of +* elementary reflectors. Block A(1:OFFSET,1:N) has been +* accordingly pivoted, but no factorized. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* JPVT (input/output) INTEGER array, dimension (N) +* On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted +* to the front of A*P (a leading column); if JPVT(i) = 0, +* the i-th column of A is a free column. +* On exit, if JPVT(i) = k, then the i-th column of A*P +* was the k-th column of A. +* +* TAU (output) REAL array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors. +* +* VN1 (input/output) REAL array, dimension (N) +* The vector with the partial column norms. +* +* VN2 (input/output) REAL array, dimension (N) +* The vector with the exact column norms. +* +* WORK (workspace) REAL array, dimension (N) +* +* Further Details +* =============== +* +* Based on contributions by +* G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain +* X. Sun, Computer Science Dept., Duke University, USA +* +* Partial column norm updating strategy modified by +* Z. Drmac and Z. Bujanovic, Dept. of Mathematics, +* University of Zagreb, Croatia. +* June 2006. +* For more details see LAPACK Working Note 176. +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, ITEMP, J, MN, OFFPI, PVT + REAL AII, TEMP, TEMP2, TOL3Z +* .. +* .. External Subroutines .. + EXTERNAL SLARF, SLARFG, SSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. External Functions .. + INTEGER ISAMAX + REAL SLAMCH, SNRM2 + EXTERNAL ISAMAX, SLAMCH, SNRM2 +* .. +* .. Executable Statements .. +* + MN = MIN( M-OFFSET, N ) + TOL3Z = SQRT(SLAMCH('Epsilon')) +* +* Compute factorization. +* + DO 20 I = 1, MN +* + OFFPI = OFFSET + I +* +* Determine ith pivot column and swap if necessary. +* + PVT = ( I-1 ) + ISAMAX( N-I+1, VN1( I ), 1 ) +* + IF( PVT.NE.I ) THEN + CALL SSWAP( M, A( 1, PVT ), 1, A( 1, I ), 1 ) + ITEMP = JPVT( PVT ) + JPVT( PVT ) = JPVT( I ) + JPVT( I ) = ITEMP + VN1( PVT ) = VN1( I ) + VN2( PVT ) = VN2( I ) + END IF +* +* Generate elementary reflector H(i). +* + IF( OFFPI.LT.M ) THEN + CALL SLARFG( M-OFFPI+1, A( OFFPI, I ), A( OFFPI+1, I ), 1, + $ TAU( I ) ) + ELSE + CALL SLARFG( 1, A( M, I ), A( M, I ), 1, TAU( I ) ) + END IF +* + IF( I.LT.N ) THEN +* +* Apply H(i)' to A(offset+i:m,i+1:n) from the left. +* + AII = A( OFFPI, I ) + A( OFFPI, I ) = ONE + CALL SLARF( 'Left', M-OFFPI+1, N-I, A( OFFPI, I ), 1, + $ TAU( I ), A( OFFPI, I+1 ), LDA, WORK( 1 ) ) + A( OFFPI, I ) = AII + END IF +* +* Update partial column norms. +* + DO 10 J = I + 1, N + IF( VN1( J ).NE.ZERO ) THEN +* +* NOTE: The following 4 lines follow from the analysis in +* Lapack Working Note 176. +* + TEMP = ONE - ( ABS( A( OFFPI, J ) ) / VN1( J ) )**2 + TEMP = MAX( TEMP, ZERO ) + TEMP2 = TEMP*( VN1( J ) / VN2( J ) )**2 + IF( TEMP2 .LE. TOL3Z ) THEN + IF( OFFPI.LT.M ) THEN + VN1( J ) = SNRM2( M-OFFPI, A( OFFPI+1, J ), 1 ) + VN2( J ) = VN1( J ) + ELSE + VN1( J ) = ZERO + VN2( J ) = ZERO + END IF + ELSE + VN1( J ) = VN1( J )*SQRT( TEMP ) + END IF + END IF + 10 CONTINUE +* + 20 CONTINUE +* + RETURN +* +* End of SLAQP2 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slaqps.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slaqps.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,259 @@ + SUBROUTINE SLAQPS( M, N, OFFSET, NB, KB, A, LDA, JPVT, TAU, VN1, + $ VN2, AUXV, F, LDF ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER KB, LDA, LDF, M, N, NB, OFFSET +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + REAL A( LDA, * ), AUXV( * ), F( LDF, * ), TAU( * ), + $ VN1( * ), VN2( * ) +* .. +* +* Purpose +* ======= +* +* SLAQPS computes a step of QR factorization with column pivoting +* of a real M-by-N matrix A by using Blas-3. It tries to factorize +* NB columns from A starting from the row OFFSET+1, and updates all +* of the matrix with Blas-3 xGEMM. +* +* In some cases, due to catastrophic cancellations, it cannot +* factorize NB columns. Hence, the actual number of factorized +* columns is returned in KB. +* +* Block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0 +* +* OFFSET (input) INTEGER +* The number of rows of A that have been factorized in +* previous steps. +* +* NB (input) INTEGER +* The number of columns to factorize. +* +* KB (output) INTEGER +* The number of columns actually factorized. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, block A(OFFSET+1:M,1:KB) is the triangular +* factor obtained and block A(1:OFFSET,1:N) has been +* accordingly pivoted, but no factorized. +* The rest of the matrix, block A(OFFSET+1:M,KB+1:N) has +* been updated. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* JPVT (input/output) INTEGER array, dimension (N) +* JPVT(I) = K <==> Column K of the full matrix A has been +* permuted into position I in AP. +* +* TAU (output) REAL array, dimension (KB) +* The scalar factors of the elementary reflectors. +* +* VN1 (input/output) REAL array, dimension (N) +* The vector with the partial column norms. +* +* VN2 (input/output) REAL array, dimension (N) +* The vector with the exact column norms. +* +* AUXV (input/output) REAL array, dimension (NB) +* Auxiliar vector. +* +* F (input/output) REAL array, dimension (LDF,NB) +* Matrix F' = L*Y'*A. +* +* LDF (input) INTEGER +* The leading dimension of the array F. LDF >= max(1,N). +* +* Further Details +* =============== +* +* Based on contributions by +* G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain +* X. Sun, Computer Science Dept., Duke University, USA +* +* Partial column norm updating strategy modified by +* Z. Drmac and Z. Bujanovic, Dept. of Mathematics, +* University of Zagreb, Croatia. +* June 2006. +* For more details see LAPACK Working Note 176. +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER ITEMP, J, K, LASTRK, LSTICC, PVT, RK + REAL AKK, TEMP, TEMP2, TOL3Z +* .. +* .. External Subroutines .. + EXTERNAL SGEMM, SGEMV, SLARFG, SSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, NINT, REAL, SQRT +* .. +* .. External Functions .. + INTEGER ISAMAX + REAL SLAMCH, SNRM2 + EXTERNAL ISAMAX, SLAMCH, SNRM2 +* .. +* .. Executable Statements .. +* + LASTRK = MIN( M, N+OFFSET ) + LSTICC = 0 + K = 0 + TOL3Z = SQRT(SLAMCH('Epsilon')) +* +* Beginning of while loop. +* + 10 CONTINUE + IF( ( K.LT.NB ) .AND. ( LSTICC.EQ.0 ) ) THEN + K = K + 1 + RK = OFFSET + K +* +* Determine ith pivot column and swap if necessary +* + PVT = ( K-1 ) + ISAMAX( N-K+1, VN1( K ), 1 ) + IF( PVT.NE.K ) THEN + CALL SSWAP( M, A( 1, PVT ), 1, A( 1, K ), 1 ) + CALL SSWAP( K-1, F( PVT, 1 ), LDF, F( K, 1 ), LDF ) + ITEMP = JPVT( PVT ) + JPVT( PVT ) = JPVT( K ) + JPVT( K ) = ITEMP + VN1( PVT ) = VN1( K ) + VN2( PVT ) = VN2( K ) + END IF +* +* Apply previous Householder reflectors to column K: +* A(RK:M,K) := A(RK:M,K) - A(RK:M,1:K-1)*F(K,1:K-1)'. +* + IF( K.GT.1 ) THEN + CALL SGEMV( 'No transpose', M-RK+1, K-1, -ONE, A( RK, 1 ), + $ LDA, F( K, 1 ), LDF, ONE, A( RK, K ), 1 ) + END IF +* +* Generate elementary reflector H(k). +* + IF( RK.LT.M ) THEN + CALL SLARFG( M-RK+1, A( RK, K ), A( RK+1, K ), 1, TAU( K ) ) + ELSE + CALL SLARFG( 1, A( RK, K ), A( RK, K ), 1, TAU( K ) ) + END IF +* + AKK = A( RK, K ) + A( RK, K ) = ONE +* +* Compute Kth column of F: +* +* Compute F(K+1:N,K) := tau(K)*A(RK:M,K+1:N)'*A(RK:M,K). +* + IF( K.LT.N ) THEN + CALL SGEMV( 'Transpose', M-RK+1, N-K, TAU( K ), + $ A( RK, K+1 ), LDA, A( RK, K ), 1, ZERO, + $ F( K+1, K ), 1 ) + END IF +* +* Padding F(1:K,K) with zeros. +* + DO 20 J = 1, K + F( J, K ) = ZERO + 20 CONTINUE +* +* Incremental updating of F: +* F(1:N,K) := F(1:N,K) - tau(K)*F(1:N,1:K-1)*A(RK:M,1:K-1)' +* *A(RK:M,K). +* + IF( K.GT.1 ) THEN + CALL SGEMV( 'Transpose', M-RK+1, K-1, -TAU( K ), A( RK, 1 ), + $ LDA, A( RK, K ), 1, ZERO, AUXV( 1 ), 1 ) +* + CALL SGEMV( 'No transpose', N, K-1, ONE, F( 1, 1 ), LDF, + $ AUXV( 1 ), 1, ONE, F( 1, K ), 1 ) + END IF +* +* Update the current row of A: +* A(RK,K+1:N) := A(RK,K+1:N) - A(RK,1:K)*F(K+1:N,1:K)'. +* + IF( K.LT.N ) THEN + CALL SGEMV( 'No transpose', N-K, K, -ONE, F( K+1, 1 ), LDF, + $ A( RK, 1 ), LDA, ONE, A( RK, K+1 ), LDA ) + END IF +* +* Update partial column norms. +* + IF( RK.LT.LASTRK ) THEN + DO 30 J = K + 1, N + IF( VN1( J ).NE.ZERO ) THEN +* +* NOTE: The following 4 lines follow from the analysis in +* Lapack Working Note 176. +* + TEMP = ABS( A( RK, J ) ) / VN1( J ) + TEMP = MAX( ZERO, ( ONE+TEMP )*( ONE-TEMP ) ) + TEMP2 = TEMP*( VN1( J ) / VN2( J ) )**2 + IF( TEMP2 .LE. TOL3Z ) THEN + VN2( J ) = REAL( LSTICC ) + LSTICC = J + ELSE + VN1( J ) = VN1( J )*SQRT( TEMP ) + END IF + END IF + 30 CONTINUE + END IF +* + A( RK, K ) = AKK +* +* End of while loop. +* + GO TO 10 + END IF + KB = K + RK = OFFSET + KB +* +* Apply the block reflector to the rest of the matrix: +* A(OFFSET+KB+1:M,KB+1:N) := A(OFFSET+KB+1:M,KB+1:N) - +* A(OFFSET+KB+1:M,1:KB)*F(KB+1:N,1:KB)'. +* + IF( KB.LT.MIN( N, M-OFFSET ) ) THEN + CALL SGEMM( 'No transpose', 'Transpose', M-RK, N-KB, KB, -ONE, + $ A( RK+1, 1 ), LDA, F( KB+1, 1 ), LDF, ONE, + $ A( RK+1, KB+1 ), LDA ) + END IF +* +* Recomputation of difficult columns. +* + 40 CONTINUE + IF( LSTICC.GT.0 ) THEN + ITEMP = NINT( VN2( LSTICC ) ) + VN1( LSTICC ) = SNRM2( M-RK, A( RK+1, LSTICC ), 1 ) +* +* NOTE: The computation of VN1( LSTICC ) relies on the fact that +* SNRM2 does not fail on vectors with norm below the value of +* SQRT(DLAMCH('S')) +* + VN2( LSTICC ) = VN1( LSTICC ) + LSTICC = ITEMP + GO TO 40 + END IF +* + RETURN +* +* End of SLAQPS +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slaqr0.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slaqr0.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,640 @@ + SUBROUTINE SLAQR0( WANTT, WANTZ, N, ILO, IHI, H, LDH, WR, WI, + $ ILOZ, IHIZ, Z, LDZ, WORK, LWORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER IHI, IHIZ, ILO, ILOZ, INFO, LDH, LDZ, LWORK, N + LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. + REAL H( LDH, * ), WI( * ), WORK( * ), WR( * ), + $ Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* SLAQR0 computes the eigenvalues of a Hessenberg matrix H +* and, optionally, the matrices T and Z from the Schur decomposition +* H = Z T Z**T, where T is an upper quasi-triangular matrix (the +* Schur form), and Z is the orthogonal matrix of Schur vectors. +* +* Optionally Z may be postmultiplied into an input orthogonal +* matrix Q so that this routine can give the Schur factorization +* of a matrix A which has been reduced to the Hessenberg form H +* by the orthogonal matrix Q: A = Q*H*Q**T = (QZ)*T*(QZ)**T. +* +* Arguments +* ========= +* +* WANTT (input) LOGICAL +* = .TRUE. : the full Schur form T is required; +* = .FALSE.: only eigenvalues are required. +* +* WANTZ (input) LOGICAL +* = .TRUE. : the matrix of Schur vectors Z is required; +* = .FALSE.: Schur vectors are not required. +* +* N (input) INTEGER +* The order of the matrix H. N .GE. 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* It is assumed that H is already upper triangular in rows +* and columns 1:ILO-1 and IHI+1:N and, if ILO.GT.1, +* H(ILO,ILO-1) is zero. ILO and IHI are normally set by a +* previous call to SGEBAL, and then passed to SGEHRD when the +* matrix output by SGEBAL is reduced to Hessenberg form. +* Otherwise, ILO and IHI should be set to 1 and N, +* respectively. If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N. +* If N = 0, then ILO = 1 and IHI = 0. +* +* H (input/output) REAL array, dimension (LDH,N) +* On entry, the upper Hessenberg matrix H. +* On exit, if INFO = 0 and WANTT is .TRUE., then H contains +* the upper quasi-triangular matrix T from the Schur +* decomposition (the Schur form); 2-by-2 diagonal blocks +* (corresponding to complex conjugate pairs of eigenvalues) +* are returned in standard form, with H(i,i) = H(i+1,i+1) +* and H(i+1,i)*H(i,i+1).LT.0. If INFO = 0 and WANTT is +* .FALSE., then the contents of H are unspecified on exit. +* (The output value of H when INFO.GT.0 is given under the +* description of INFO below.) +* +* This subroutine may explicitly set H(i,j) = 0 for i.GT.j and +* j = 1, 2, ... ILO-1 or j = IHI+1, IHI+2, ... N. +* +* LDH (input) INTEGER +* The leading dimension of the array H. LDH .GE. max(1,N). +* +* WR (output) REAL array, dimension (IHI) +* WI (output) REAL array, dimension (IHI) +* The real and imaginary parts, respectively, of the computed +* eigenvalues of H(ILO:IHI,ILO:IHI) are stored WR(ILO:IHI) +* and WI(ILO:IHI). If two eigenvalues are computed as a +* complex conjugate pair, they are stored in consecutive +* elements of WR and WI, say the i-th and (i+1)th, with +* WI(i) .GT. 0 and WI(i+1) .LT. 0. If WANTT is .TRUE., then +* the eigenvalues are stored in the same order as on the +* diagonal of the Schur form returned in H, with +* WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2 diagonal +* block, WI(i) = sqrt(-H(i+1,i)*H(i,i+1)) and +* WI(i+1) = -WI(i). +* +* ILOZ (input) INTEGER +* IHIZ (input) INTEGER +* Specify the rows of Z to which transformations must be +* applied if WANTZ is .TRUE.. +* 1 .LE. ILOZ .LE. ILO; IHI .LE. IHIZ .LE. N. +* +* Z (input/output) REAL array, dimension (LDZ,IHI) +* If WANTZ is .FALSE., then Z is not referenced. +* If WANTZ is .TRUE., then Z(ILO:IHI,ILOZ:IHIZ) is +* replaced by Z(ILO:IHI,ILOZ:IHIZ)*U where U is the +* orthogonal Schur factor of H(ILO:IHI,ILO:IHI). +* (The output value of Z when INFO.GT.0 is given under +* the description of INFO below.) +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. if WANTZ is .TRUE. +* then LDZ.GE.MAX(1,IHIZ). Otherwize, LDZ.GE.1. +* +* WORK (workspace/output) REAL array, dimension LWORK +* On exit, if LWORK = -1, WORK(1) returns an estimate of +* the optimal value for LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK .GE. max(1,N) +* is sufficient, but LWORK typically as large as 6*N may +* be required for optimal performance. A workspace query +* to determine the optimal workspace size is recommended. +* +* If LWORK = -1, then SLAQR0 does a workspace query. +* In this case, SLAQR0 checks the input parameters and +* estimates the optimal workspace size for the given +* values of N, ILO and IHI. The estimate is returned +* in WORK(1). No error message related to LWORK is +* issued by XERBLA. Neither H nor Z are accessed. +* +* +* INFO (output) INTEGER +* = 0: successful exit +* .GT. 0: if INFO = i, SLAQR0 failed to compute all of +* the eigenvalues. Elements 1:ilo-1 and i+1:n of WR +* and WI contain those eigenvalues which have been +* successfully computed. (Failures are rare.) +* +* If INFO .GT. 0 and WANT is .FALSE., then on exit, +* the remaining unconverged eigenvalues are the eigen- +* values of the upper Hessenberg matrix rows and +* columns ILO through INFO of the final, output +* value of H. +* +* If INFO .GT. 0 and WANTT is .TRUE., then on exit +* +* (*) (initial value of H)*U = U*(final value of H) +* +* where U is an orthogonal matrix. The final +* value of H is upper Hessenberg and quasi-triangular +* in rows and columns INFO+1 through IHI. +* +* If INFO .GT. 0 and WANTZ is .TRUE., then on exit +* +* (final value of Z(ILO:IHI,ILOZ:IHIZ) +* = (initial value of Z(ILO:IHI,ILOZ:IHIZ)*U +* +* where U is the orthogonal matrix in (*) (regard- +* less of the value of WANTT.) +* +* If INFO .GT. 0 and WANTZ is .FALSE., then Z is not +* accessed. +* +* ================================================================ +* Based on contributions by +* Karen Braman and Ralph Byers, Department of Mathematics, +* University of Kansas, USA +* +* ================================================================ +* References: +* K. Braman, R. Byers and R. Mathias, The Multi-Shift QR +* Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 +* Performance, SIAM Journal of Matrix Analysis, volume 23, pages +* 929--947, 2002. +* +* K. Braman, R. Byers and R. Mathias, The Multi-Shift QR +* Algorithm Part II: Aggressive Early Deflation, SIAM Journal +* of Matrix Analysis, volume 23, pages 948--973, 2002. +* +* ================================================================ +* .. Parameters .. +* +* ==== Matrices of order NTINY or smaller must be processed by +* . SLAHQR because of insufficient subdiagonal scratch space. +* . (This is a hard limit.) ==== +* +* ==== Exceptional deflation windows: try to cure rare +* . slow convergence by increasing the size of the +* . deflation window after KEXNW iterations. ===== +* +* ==== Exceptional shifts: try to cure rare slow convergence +* . with ad-hoc exceptional shifts every KEXSH iterations. +* . The constants WILK1 and WILK2 are used to form the +* . exceptional shifts. ==== +* + INTEGER NTINY + PARAMETER ( NTINY = 11 ) + INTEGER KEXNW, KEXSH + PARAMETER ( KEXNW = 5, KEXSH = 6 ) + REAL WILK1, WILK2 + PARAMETER ( WILK1 = 0.75e0, WILK2 = -0.4375e0 ) + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0e0, ONE = 1.0e0 ) +* .. +* .. Local Scalars .. + REAL AA, BB, CC, CS, DD, SN, SS, SWAP + INTEGER I, INF, IT, ITMAX, K, KACC22, KBOT, KDU, KS, + $ KT, KTOP, KU, KV, KWH, KWTOP, KWV, LD, LS, + $ LWKOPT, NDFL, NH, NHO, NIBBLE, NMIN, NS, NSMAX, + $ NSR, NVE, NW, NWMAX, NWR + LOGICAL NWINC, SORTED + CHARACTER JBCMPZ*2 +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Local Arrays .. + REAL ZDUM( 1, 1 ) +* .. +* .. External Subroutines .. + EXTERNAL SLACPY, SLAHQR, SLANV2, SLAQR3, SLAQR4, SLAQR5 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, INT, MAX, MIN, MOD, REAL +* .. +* .. Executable Statements .. + INFO = 0 +* +* ==== Quick return for N = 0: nothing to do. ==== +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = ONE + RETURN + END IF +* +* ==== Set up job flags for ILAENV. ==== +* + IF( WANTT ) THEN + JBCMPZ( 1: 1 ) = 'S' + ELSE + JBCMPZ( 1: 1 ) = 'E' + END IF + IF( WANTZ ) THEN + JBCMPZ( 2: 2 ) = 'V' + ELSE + JBCMPZ( 2: 2 ) = 'N' + END IF +* +* ==== Tiny matrices must use SLAHQR. ==== +* + IF( N.LE.NTINY ) THEN +* +* ==== Estimate optimal workspace. ==== +* + LWKOPT = 1 + IF( LWORK.NE.-1 ) + $ CALL SLAHQR( WANTT, WANTZ, N, ILO, IHI, H, LDH, WR, WI, + $ ILOZ, IHIZ, Z, LDZ, INFO ) + ELSE +* +* ==== Use small bulge multi-shift QR with aggressive early +* . deflation on larger-than-tiny matrices. ==== +* +* ==== Hope for the best. ==== +* + INFO = 0 +* +* ==== NWR = recommended deflation window size. At this +* . point, N .GT. NTINY = 11, so there is enough +* . subdiagonal workspace for NWR.GE.2 as required. +* . (In fact, there is enough subdiagonal space for +* . NWR.GE.3.) ==== +* + NWR = ILAENV( 13, 'SLAQR0', JBCMPZ, N, ILO, IHI, LWORK ) + NWR = MAX( 2, NWR ) + NWR = MIN( IHI-ILO+1, ( N-1 ) / 3, NWR ) + NW = NWR +* +* ==== NSR = recommended number of simultaneous shifts. +* . At this point N .GT. NTINY = 11, so there is at +* . enough subdiagonal workspace for NSR to be even +* . and greater than or equal to two as required. ==== +* + NSR = ILAENV( 15, 'SLAQR0', JBCMPZ, N, ILO, IHI, LWORK ) + NSR = MIN( NSR, ( N+6 ) / 9, IHI-ILO ) + NSR = MAX( 2, NSR-MOD( NSR, 2 ) ) +* +* ==== Estimate optimal workspace ==== +* +* ==== Workspace query call to SLAQR3 ==== +* + CALL SLAQR3( WANTT, WANTZ, N, ILO, IHI, NWR+1, H, LDH, ILOZ, + $ IHIZ, Z, LDZ, LS, LD, WR, WI, H, LDH, N, H, LDH, + $ N, H, LDH, WORK, -1 ) +* +* ==== Optimal workspace = MAX(SLAQR5, SLAQR3) ==== +* + LWKOPT = MAX( 3*NSR / 2, INT( WORK( 1 ) ) ) +* +* ==== Quick return in case of workspace query. ==== +* + IF( LWORK.EQ.-1 ) THEN + WORK( 1 ) = REAL( LWKOPT ) + RETURN + END IF +* +* ==== SLAHQR/SLAQR0 crossover point ==== +* + NMIN = ILAENV( 12, 'SLAQR0', JBCMPZ, N, ILO, IHI, LWORK ) + NMIN = MAX( NTINY, NMIN ) +* +* ==== Nibble crossover point ==== +* + NIBBLE = ILAENV( 14, 'SLAQR0', JBCMPZ, N, ILO, IHI, LWORK ) + NIBBLE = MAX( 0, NIBBLE ) +* +* ==== Accumulate reflections during ttswp? Use block +* . 2-by-2 structure during matrix-matrix multiply? ==== +* + KACC22 = ILAENV( 16, 'SLAQR0', JBCMPZ, N, ILO, IHI, LWORK ) + KACC22 = MAX( 0, KACC22 ) + KACC22 = MIN( 2, KACC22 ) +* +* ==== NWMAX = the largest possible deflation window for +* . which there is sufficient workspace. ==== +* + NWMAX = MIN( ( N-1 ) / 3, LWORK / 2 ) +* +* ==== NSMAX = the Largest number of simultaneous shifts +* . for which there is sufficient workspace. ==== +* + NSMAX = MIN( ( N+6 ) / 9, 2*LWORK / 3 ) + NSMAX = NSMAX - MOD( NSMAX, 2 ) +* +* ==== NDFL: an iteration count restarted at deflation. ==== +* + NDFL = 1 +* +* ==== ITMAX = iteration limit ==== +* + ITMAX = MAX( 30, 2*KEXSH )*MAX( 10, ( IHI-ILO+1 ) ) +* +* ==== Last row and column in the active block ==== +* + KBOT = IHI +* +* ==== Main Loop ==== +* + DO 80 IT = 1, ITMAX +* +* ==== Done when KBOT falls below ILO ==== +* + IF( KBOT.LT.ILO ) + $ GO TO 90 +* +* ==== Locate active block ==== +* + DO 10 K = KBOT, ILO + 1, -1 + IF( H( K, K-1 ).EQ.ZERO ) + $ GO TO 20 + 10 CONTINUE + K = ILO + 20 CONTINUE + KTOP = K +* +* ==== Select deflation window size ==== +* + NH = KBOT - KTOP + 1 + IF( NDFL.LT.KEXNW .OR. NH.LT.NW ) THEN +* +* ==== Typical deflation window. If possible and +* . advisable, nibble the entire active block. +* . If not, use size NWR or NWR+1 depending upon +* . which has the smaller corresponding subdiagonal +* . entry (a heuristic). ==== +* + NWINC = .TRUE. + IF( NH.LE.MIN( NMIN, NWMAX ) ) THEN + NW = NH + ELSE + NW = MIN( NWR, NH, NWMAX ) + IF( NW.LT.NWMAX ) THEN + IF( NW.GE.NH-1 ) THEN + NW = NH + ELSE + KWTOP = KBOT - NW + 1 + IF( ABS( H( KWTOP, KWTOP-1 ) ).GT. + $ ABS( H( KWTOP-1, KWTOP-2 ) ) )NW = NW + 1 + END IF + END IF + END IF + ELSE +* +* ==== Exceptional deflation window. If there have +* . been no deflations in KEXNW or more iterations, +* . then vary the deflation window size. At first, +* . because, larger windows are, in general, more +* . powerful than smaller ones, rapidly increase the +* . window up to the maximum reasonable and possible. +* . Then maybe try a slightly smaller window. ==== +* + IF( NWINC .AND. NW.LT.MIN( NWMAX, NH ) ) THEN + NW = MIN( NWMAX, NH, 2*NW ) + ELSE + NWINC = .FALSE. + IF( NW.EQ.NH .AND. NH.GT.2 ) + $ NW = NH - 1 + END IF + END IF +* +* ==== Aggressive early deflation: +* . split workspace under the subdiagonal into +* . - an nw-by-nw work array V in the lower +* . left-hand-corner, +* . - an NW-by-at-least-NW-but-more-is-better +* . (NW-by-NHO) horizontal work array along +* . the bottom edge, +* . - an at-least-NW-but-more-is-better (NHV-by-NW) +* . vertical work array along the left-hand-edge. +* . ==== +* + KV = N - NW + 1 + KT = NW + 1 + NHO = ( N-NW-1 ) - KT + 1 + KWV = NW + 2 + NVE = ( N-NW ) - KWV + 1 +* +* ==== Aggressive early deflation ==== +* + CALL SLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, + $ IHIZ, Z, LDZ, LS, LD, WR, WI, H( KV, 1 ), LDH, + $ NHO, H( KV, KT ), LDH, NVE, H( KWV, 1 ), LDH, + $ WORK, LWORK ) +* +* ==== Adjust KBOT accounting for new deflations. ==== +* + KBOT = KBOT - LD +* +* ==== KS points to the shifts. ==== +* + KS = KBOT - LS + 1 +* +* ==== Skip an expensive QR sweep if there is a (partly +* . heuristic) reason to expect that many eigenvalues +* . will deflate without it. Here, the QR sweep is +* . skipped if many eigenvalues have just been deflated +* . or if the remaining active block is small. +* + IF( ( LD.EQ.0 ) .OR. ( ( 100*LD.LE.NW*NIBBLE ) .AND. ( KBOT- + $ KTOP+1.GT.MIN( NMIN, NWMAX ) ) ) ) THEN +* +* ==== NS = nominal number of simultaneous shifts. +* . This may be lowered (slightly) if SLAQR3 +* . did not provide that many shifts. ==== +* + NS = MIN( NSMAX, NSR, MAX( 2, KBOT-KTOP ) ) + NS = NS - MOD( NS, 2 ) +* +* ==== If there have been no deflations +* . in a multiple of KEXSH iterations, +* . then try exceptional shifts. +* . Otherwise use shifts provided by +* . SLAQR3 above or from the eigenvalues +* . of a trailing principal submatrix. ==== +* + IF( MOD( NDFL, KEXSH ).EQ.0 ) THEN + KS = KBOT - NS + 1 + DO 30 I = KBOT, MAX( KS+1, KTOP+2 ), -2 + SS = ABS( H( I, I-1 ) ) + ABS( H( I-1, I-2 ) ) + AA = WILK1*SS + H( I, I ) + BB = SS + CC = WILK2*SS + DD = AA + CALL SLANV2( AA, BB, CC, DD, WR( I-1 ), WI( I-1 ), + $ WR( I ), WI( I ), CS, SN ) + 30 CONTINUE + IF( KS.EQ.KTOP ) THEN + WR( KS+1 ) = H( KS+1, KS+1 ) + WI( KS+1 ) = ZERO + WR( KS ) = WR( KS+1 ) + WI( KS ) = WI( KS+1 ) + END IF + ELSE +* +* ==== Got NS/2 or fewer shifts? Use SLAQR4 or +* . SLAHQR on a trailing principal submatrix to +* . get more. (Since NS.LE.NSMAX.LE.(N+6)/9, +* . there is enough space below the subdiagonal +* . to fit an NS-by-NS scratch array.) ==== +* + IF( KBOT-KS+1.LE.NS / 2 ) THEN + KS = KBOT - NS + 1 + KT = N - NS + 1 + CALL SLACPY( 'A', NS, NS, H( KS, KS ), LDH, + $ H( KT, 1 ), LDH ) + IF( NS.GT.NMIN ) THEN + CALL SLAQR4( .false., .false., NS, 1, NS, + $ H( KT, 1 ), LDH, WR( KS ), + $ WI( KS ), 1, 1, ZDUM, 1, WORK, + $ LWORK, INF ) + ELSE + CALL SLAHQR( .false., .false., NS, 1, NS, + $ H( KT, 1 ), LDH, WR( KS ), + $ WI( KS ), 1, 1, ZDUM, 1, INF ) + END IF + KS = KS + INF +* +* ==== In case of a rare QR failure use +* . eigenvalues of the trailing 2-by-2 +* . principal submatrix. ==== +* + IF( KS.GE.KBOT ) THEN + AA = H( KBOT-1, KBOT-1 ) + CC = H( KBOT, KBOT-1 ) + BB = H( KBOT-1, KBOT ) + DD = H( KBOT, KBOT ) + CALL SLANV2( AA, BB, CC, DD, WR( KBOT-1 ), + $ WI( KBOT-1 ), WR( KBOT ), + $ WI( KBOT ), CS, SN ) + KS = KBOT - 1 + END IF + END IF +* + IF( KBOT-KS+1.GT.NS ) THEN +* +* ==== Sort the shifts (Helps a little) +* . Bubble sort keeps complex conjugate +* . pairs together. ==== +* + SORTED = .false. + DO 50 K = KBOT, KS + 1, -1 + IF( SORTED ) + $ GO TO 60 + SORTED = .true. + DO 40 I = KS, K - 1 + IF( ABS( WR( I ) )+ABS( WI( I ) ).LT. + $ ABS( WR( I+1 ) )+ABS( WI( I+1 ) ) ) THEN + SORTED = .false. +* + SWAP = WR( I ) + WR( I ) = WR( I+1 ) + WR( I+1 ) = SWAP +* + SWAP = WI( I ) + WI( I ) = WI( I+1 ) + WI( I+1 ) = SWAP + END IF + 40 CONTINUE + 50 CONTINUE + 60 CONTINUE + END IF +* +* ==== Shuffle shifts into pairs of real shifts +* . and pairs of complex conjugate shifts +* . assuming complex conjugate shifts are +* . already adjacent to one another. (Yes, +* . they are.) ==== +* + DO 70 I = KBOT, KS + 2, -2 + IF( WI( I ).NE.-WI( I-1 ) ) THEN +* + SWAP = WR( I ) + WR( I ) = WR( I-1 ) + WR( I-1 ) = WR( I-2 ) + WR( I-2 ) = SWAP +* + SWAP = WI( I ) + WI( I ) = WI( I-1 ) + WI( I-1 ) = WI( I-2 ) + WI( I-2 ) = SWAP + END IF + 70 CONTINUE + END IF +* +* ==== If there are only two shifts and both are +* . real, then use only one. ==== +* + IF( KBOT-KS+1.EQ.2 ) THEN + IF( WI( KBOT ).EQ.ZERO ) THEN + IF( ABS( WR( KBOT )-H( KBOT, KBOT ) ).LT. + $ ABS( WR( KBOT-1 )-H( KBOT, KBOT ) ) ) THEN + WR( KBOT-1 ) = WR( KBOT ) + ELSE + WR( KBOT ) = WR( KBOT-1 ) + END IF + END IF + END IF +* +* ==== Use up to NS of the the smallest magnatiude +* . shifts. If there aren't NS shifts available, +* . then use them all, possibly dropping one to +* . make the number of shifts even. ==== +* + NS = MIN( NS, KBOT-KS+1 ) + NS = NS - MOD( NS, 2 ) + KS = KBOT - NS + 1 +* +* ==== Small-bulge multi-shift QR sweep: +* . split workspace under the subdiagonal into +* . - a KDU-by-KDU work array U in the lower +* . left-hand-corner, +* . - a KDU-by-at-least-KDU-but-more-is-better +* . (KDU-by-NHo) horizontal work array WH along +* . the bottom edge, +* . - and an at-least-KDU-but-more-is-better-by-KDU +* . (NVE-by-KDU) vertical work WV arrow along +* . the left-hand-edge. ==== +* + KDU = 3*NS - 3 + KU = N - KDU + 1 + KWH = KDU + 1 + NHO = ( N-KDU+1-4 ) - ( KDU+1 ) + 1 + KWV = KDU + 4 + NVE = N - KDU - KWV + 1 +* +* ==== Small-bulge multi-shift QR sweep ==== +* + CALL SLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NS, + $ WR( KS ), WI( KS ), H, LDH, ILOZ, IHIZ, Z, + $ LDZ, WORK, 3, H( KU, 1 ), LDH, NVE, + $ H( KWV, 1 ), LDH, NHO, H( KU, KWH ), LDH ) + END IF +* +* ==== Note progress (or the lack of it). ==== +* + IF( LD.GT.0 ) THEN + NDFL = 1 + ELSE + NDFL = NDFL + 1 + END IF +* +* ==== End of main loop ==== + 80 CONTINUE +* +* ==== Iteration limit exceeded. Set INFO to show where +* . the problem occurred and exit. ==== +* + INFO = KBOT + 90 CONTINUE + END IF +* +* ==== Return the optimal value of LWORK. ==== +* + WORK( 1 ) = REAL( LWKOPT ) +* +* ==== End of SLAQR0 ==== +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slaqr1.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slaqr1.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,97 @@ + SUBROUTINE SLAQR1( N, H, LDH, SR1, SI1, SR2, SI2, V ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + REAL SI1, SI2, SR1, SR2 + INTEGER LDH, N +* .. +* .. Array Arguments .. + REAL H( LDH, * ), V( * ) +* .. +* +* Given a 2-by-2 or 3-by-3 matrix H, SLAQR1 sets v to a +* scalar multiple of the first column of the product +* +* (*) K = (H - (sr1 + i*si1)*I)*(H - (sr2 + i*si2)*I) +* +* scaling to avoid overflows and most underflows. It +* is assumed that either +* +* 1) sr1 = sr2 and si1 = -si2 +* or +* 2) si1 = si2 = 0. +* +* This is useful for starting double implicit shift bulges +* in the QR algorithm. +* +* +* N (input) integer +* Order of the matrix H. N must be either 2 or 3. +* +* H (input) REAL array of dimension (LDH,N) +* The 2-by-2 or 3-by-3 matrix H in (*). +* +* LDH (input) integer +* The leading dimension of H as declared in +* the calling procedure. LDH.GE.N +* +* SR1 (input) REAL +* SI1 The shifts in (*). +* SR2 +* SI2 +* +* V (output) REAL array of dimension N +* A scalar multiple of the first column of the +* matrix K in (*). +* +* ================================================================ +* Based on contributions by +* Karen Braman and Ralph Byers, Department of Mathematics, +* University of Kansas, USA +* +* ================================================================ +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0e0 ) +* .. +* .. Local Scalars .. + REAL H21S, H31S, S +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. + IF( N.EQ.2 ) THEN + S = ABS( H( 1, 1 )-SR2 ) + ABS( SI2 ) + ABS( H( 2, 1 ) ) + IF( S.EQ.ZERO ) THEN + V( 1 ) = ZERO + V( 2 ) = ZERO + ELSE + H21S = H( 2, 1 ) / S + V( 1 ) = H21S*H( 1, 2 ) + ( H( 1, 1 )-SR1 )* + $ ( ( H( 1, 1 )-SR2 ) / S ) - SI1*( SI2 / S ) + V( 2 ) = H21S*( H( 1, 1 )+H( 2, 2 )-SR1-SR2 ) + END IF + ELSE + S = ABS( H( 1, 1 )-SR2 ) + ABS( SI2 ) + ABS( H( 2, 1 ) ) + + $ ABS( H( 3, 1 ) ) + IF( S.EQ.ZERO ) THEN + V( 1 ) = ZERO + V( 2 ) = ZERO + V( 3 ) = ZERO + ELSE + H21S = H( 2, 1 ) / S + H31S = H( 3, 1 ) / S + V( 1 ) = ( H( 1, 1 )-SR1 )*( ( H( 1, 1 )-SR2 ) / S ) - + $ SI1*( SI2 / S ) + H( 1, 2 )*H21S + H( 1, 3 )*H31S + V( 2 ) = H21S*( H( 1, 1 )+H( 2, 2 )-SR1-SR2 ) + + $ H( 2, 3 )*H31S + V( 3 ) = H31S*( H( 1, 1 )+H( 3, 3 )-SR1-SR2 ) + + $ H21S*H( 3, 2 ) + END IF + END IF + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slaqr2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slaqr2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,551 @@ + SUBROUTINE SLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, + $ IHIZ, Z, LDZ, NS, ND, SR, SI, V, LDV, NH, T, + $ LDT, NV, WV, LDWV, WORK, LWORK ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV, + $ LDZ, LWORK, N, ND, NH, NS, NV, NW + LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. + REAL H( LDH, * ), SI( * ), SR( * ), T( LDT, * ), + $ V( LDV, * ), WORK( * ), WV( LDWV, * ), + $ Z( LDZ, * ) +* .. +* +* This subroutine is identical to SLAQR3 except that it avoids +* recursion by calling SLAHQR instead of SLAQR4. +* +* +* ****************************************************************** +* Aggressive early deflation: +* +* This subroutine accepts as input an upper Hessenberg matrix +* H and performs an orthogonal similarity transformation +* designed to detect and deflate fully converged eigenvalues from +* a trailing principal submatrix. On output H has been over- +* written by a new Hessenberg matrix that is a perturbation of +* an orthogonal similarity transformation of H. It is to be +* hoped that the final version of H has many zero subdiagonal +* entries. +* +* ****************************************************************** +* WANTT (input) LOGICAL +* If .TRUE., then the Hessenberg matrix H is fully updated +* so that the quasi-triangular Schur factor may be +* computed (in cooperation with the calling subroutine). +* If .FALSE., then only enough of H is updated to preserve +* the eigenvalues. +* +* WANTZ (input) LOGICAL +* If .TRUE., then the orthogonal matrix Z is updated so +* so that the orthogonal Schur factor may be computed +* (in cooperation with the calling subroutine). +* If .FALSE., then Z is not referenced. +* +* N (input) INTEGER +* The order of the matrix H and (if WANTZ is .TRUE.) the +* order of the orthogonal matrix Z. +* +* KTOP (input) INTEGER +* It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0. +* KBOT and KTOP together determine an isolated block +* along the diagonal of the Hessenberg matrix. +* +* KBOT (input) INTEGER +* It is assumed without a check that either +* KBOT = N or H(KBOT+1,KBOT)=0. KBOT and KTOP together +* determine an isolated block along the diagonal of the +* Hessenberg matrix. +* +* NW (input) INTEGER +* Deflation window size. 1 .LE. NW .LE. (KBOT-KTOP+1). +* +* H (input/output) REAL array, dimension (LDH,N) +* On input the initial N-by-N section of H stores the +* Hessenberg matrix undergoing aggressive early deflation. +* On output H has been transformed by an orthogonal +* similarity transformation, perturbed, and the returned +* to Hessenberg form that (it is to be hoped) has some +* zero subdiagonal entries. +* +* LDH (input) integer +* Leading dimension of H just as declared in the calling +* subroutine. N .LE. LDH +* +* ILOZ (input) INTEGER +* IHIZ (input) INTEGER +* Specify the rows of Z to which transformations must be +* applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N. +* +* Z (input/output) REAL array, dimension (LDZ,IHI) +* IF WANTZ is .TRUE., then on output, the orthogonal +* similarity transformation mentioned above has been +* accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right. +* If WANTZ is .FALSE., then Z is unreferenced. +* +* LDZ (input) integer +* The leading dimension of Z just as declared in the +* calling subroutine. 1 .LE. LDZ. +* +* NS (output) integer +* The number of unconverged (ie approximate) eigenvalues +* returned in SR and SI that may be used as shifts by the +* calling subroutine. +* +* ND (output) integer +* The number of converged eigenvalues uncovered by this +* subroutine. +* +* SR (output) REAL array, dimension KBOT +* SI (output) REAL array, dimension KBOT +* On output, the real and imaginary parts of approximate +* eigenvalues that may be used for shifts are stored in +* SR(KBOT-ND-NS+1) through SR(KBOT-ND) and +* SI(KBOT-ND-NS+1) through SI(KBOT-ND), respectively. +* The real and imaginary parts of converged eigenvalues +* are stored in SR(KBOT-ND+1) through SR(KBOT) and +* SI(KBOT-ND+1) through SI(KBOT), respectively. +* +* V (workspace) REAL array, dimension (LDV,NW) +* An NW-by-NW work array. +* +* LDV (input) integer scalar +* The leading dimension of V just as declared in the +* calling subroutine. NW .LE. LDV +* +* NH (input) integer scalar +* The number of columns of T. NH.GE.NW. +* +* T (workspace) REAL array, dimension (LDT,NW) +* +* LDT (input) integer +* The leading dimension of T just as declared in the +* calling subroutine. NW .LE. LDT +* +* NV (input) integer +* The number of rows of work array WV available for +* workspace. NV.GE.NW. +* +* WV (workspace) REAL array, dimension (LDWV,NW) +* +* LDWV (input) integer +* The leading dimension of W just as declared in the +* calling subroutine. NW .LE. LDV +* +* WORK (workspace) REAL array, dimension LWORK. +* On exit, WORK(1) is set to an estimate of the optimal value +* of LWORK for the given values of N, NW, KTOP and KBOT. +* +* LWORK (input) integer +* The dimension of the work array WORK. LWORK = 2*NW +* suffices, but greater efficiency may result from larger +* values of LWORK. +* +* If LWORK = -1, then a workspace query is assumed; SLAQR2 +* only estimates the optimal workspace size for the given +* values of N, NW, KTOP and KBOT. The estimate is returned +* in WORK(1). No error message related to LWORK is issued +* by XERBLA. Neither H nor Z are accessed. +* +* ================================================================ +* Based on contributions by +* Karen Braman and Ralph Byers, Department of Mathematics, +* University of Kansas, USA +* +* ================================================================== +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0e0, ONE = 1.0e0 ) +* .. +* .. Local Scalars .. + REAL AA, BB, BETA, CC, CS, DD, EVI, EVK, FOO, S, + $ SAFMAX, SAFMIN, SMLNUM, SN, TAU, ULP + INTEGER I, IFST, ILST, INFO, INFQR, J, JW, K, KCOL, + $ KEND, KLN, KROW, KWTOP, LTOP, LWK1, LWK2, + $ LWKOPT + LOGICAL BULGE, SORTED +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEHRD, SGEMM, SLABAD, SLACPY, SLAHQR, + $ SLANV2, SLARF, SLARFG, SLASET, SORGHR, STREXC +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, INT, MAX, MIN, REAL, SQRT +* .. +* .. Executable Statements .. +* +* ==== Estimate optimal workspace. ==== +* + JW = MIN( NW, KBOT-KTOP+1 ) + IF( JW.LE.2 ) THEN + LWKOPT = 1 + ELSE +* +* ==== Workspace query call to SGEHRD ==== +* + CALL SGEHRD( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO ) + LWK1 = INT( WORK( 1 ) ) +* +* ==== Workspace query call to SORGHR ==== +* + CALL SORGHR( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO ) + LWK2 = INT( WORK( 1 ) ) +* +* ==== Optimal workspace ==== +* + LWKOPT = JW + MAX( LWK1, LWK2 ) + END IF +* +* ==== Quick return in case of workspace query. ==== +* + IF( LWORK.EQ.-1 ) THEN + WORK( 1 ) = REAL( LWKOPT ) + RETURN + END IF +* +* ==== Nothing to do ... +* ... for an empty active block ... ==== + NS = 0 + ND = 0 + IF( KTOP.GT.KBOT ) + $ RETURN +* ... nor for an empty deflation window. ==== + IF( NW.LT.1 ) + $ RETURN +* +* ==== Machine constants ==== +* + SAFMIN = SLAMCH( 'SAFE MINIMUM' ) + SAFMAX = ONE / SAFMIN + CALL SLABAD( SAFMIN, SAFMAX ) + ULP = SLAMCH( 'PRECISION' ) + SMLNUM = SAFMIN*( REAL( N ) / ULP ) +* +* ==== Setup deflation window ==== +* + JW = MIN( NW, KBOT-KTOP+1 ) + KWTOP = KBOT - JW + 1 + IF( KWTOP.EQ.KTOP ) THEN + S = ZERO + ELSE + S = H( KWTOP, KWTOP-1 ) + END IF +* + IF( KBOT.EQ.KWTOP ) THEN +* +* ==== 1-by-1 deflation window: not much to do ==== +* + SR( KWTOP ) = H( KWTOP, KWTOP ) + SI( KWTOP ) = ZERO + NS = 1 + ND = 0 + IF( ABS( S ).LE.MAX( SMLNUM, ULP*ABS( H( KWTOP, KWTOP ) ) ) ) + $ THEN + NS = 0 + ND = 1 + IF( KWTOP.GT.KTOP ) + $ H( KWTOP, KWTOP-1 ) = ZERO + END IF + RETURN + END IF +* +* ==== Convert to spike-triangular form. (In case of a +* . rare QR failure, this routine continues to do +* . aggressive early deflation using that part of +* . the deflation window that converged using INFQR +* . here and there to keep track.) ==== +* + CALL SLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT ) + CALL SCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), LDT+1 ) +* + CALL SLASET( 'A', JW, JW, ZERO, ONE, V, LDV ) + CALL SLAHQR( .true., .true., JW, 1, JW, T, LDT, SR( KWTOP ), + $ SI( KWTOP ), 1, JW, V, LDV, INFQR ) +* +* ==== STREXC needs a clean margin near the diagonal ==== +* + DO 10 J = 1, JW - 3 + T( J+2, J ) = ZERO + T( J+3, J ) = ZERO + 10 CONTINUE + IF( JW.GT.2 ) + $ T( JW, JW-2 ) = ZERO +* +* ==== Deflation detection loop ==== +* + NS = JW + ILST = INFQR + 1 + 20 CONTINUE + IF( ILST.LE.NS ) THEN + IF( NS.EQ.1 ) THEN + BULGE = .FALSE. + ELSE + BULGE = T( NS, NS-1 ).NE.ZERO + END IF +* +* ==== Small spike tip test for deflation ==== +* + IF( .NOT.BULGE ) THEN +* +* ==== Real eigenvalue ==== +* + FOO = ABS( T( NS, NS ) ) + IF( FOO.EQ.ZERO ) + $ FOO = ABS( S ) + IF( ABS( S*V( 1, NS ) ).LE.MAX( SMLNUM, ULP*FOO ) ) THEN +* +* ==== Deflatable ==== +* + NS = NS - 1 + ELSE +* +* ==== Undeflatable. Move it up out of the way. +* . (STREXC can not fail in this case.) ==== +* + IFST = NS + CALL STREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK, + $ INFO ) + ILST = ILST + 1 + END IF + ELSE +* +* ==== Complex conjugate pair ==== +* + FOO = ABS( T( NS, NS ) ) + SQRT( ABS( T( NS, NS-1 ) ) )* + $ SQRT( ABS( T( NS-1, NS ) ) ) + IF( FOO.EQ.ZERO ) + $ FOO = ABS( S ) + IF( MAX( ABS( S*V( 1, NS ) ), ABS( S*V( 1, NS-1 ) ) ).LE. + $ MAX( SMLNUM, ULP*FOO ) ) THEN +* +* ==== Deflatable ==== +* + NS = NS - 2 + ELSE +* +* ==== Undflatable. Move them up out of the way. +* . Fortunately, STREXC does the right thing with +* . ILST in case of a rare exchange failure. ==== +* + IFST = NS + CALL STREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK, + $ INFO ) + ILST = ILST + 2 + END IF + END IF +* +* ==== End deflation detection loop ==== +* + GO TO 20 + END IF +* +* ==== Return to Hessenberg form ==== +* + IF( NS.EQ.0 ) + $ S = ZERO +* + IF( NS.LT.JW ) THEN +* +* ==== sorting diagonal blocks of T improves accuracy for +* . graded matrices. Bubble sort deals well with +* . exchange failures. ==== +* + SORTED = .false. + I = NS + 1 + 30 CONTINUE + IF( SORTED ) + $ GO TO 50 + SORTED = .true. +* + KEND = I - 1 + I = INFQR + 1 + IF( I.EQ.NS ) THEN + K = I + 1 + ELSE IF( T( I+1, I ).EQ.ZERO ) THEN + K = I + 1 + ELSE + K = I + 2 + END IF + 40 CONTINUE + IF( K.LE.KEND ) THEN + IF( K.EQ.I+1 ) THEN + EVI = ABS( T( I, I ) ) + ELSE + EVI = ABS( T( I, I ) ) + SQRT( ABS( T( I+1, I ) ) )* + $ SQRT( ABS( T( I, I+1 ) ) ) + END IF +* + IF( K.EQ.KEND ) THEN + EVK = ABS( T( K, K ) ) + ELSE IF( T( K+1, K ).EQ.ZERO ) THEN + EVK = ABS( T( K, K ) ) + ELSE + EVK = ABS( T( K, K ) ) + SQRT( ABS( T( K+1, K ) ) )* + $ SQRT( ABS( T( K, K+1 ) ) ) + END IF +* + IF( EVI.GE.EVK ) THEN + I = K + ELSE + SORTED = .false. + IFST = I + ILST = K + CALL STREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK, + $ INFO ) + IF( INFO.EQ.0 ) THEN + I = ILST + ELSE + I = K + END IF + END IF + IF( I.EQ.KEND ) THEN + K = I + 1 + ELSE IF( T( I+1, I ).EQ.ZERO ) THEN + K = I + 1 + ELSE + K = I + 2 + END IF + GO TO 40 + END IF + GO TO 30 + 50 CONTINUE + END IF +* +* ==== Restore shift/eigenvalue array from T ==== +* + I = JW + 60 CONTINUE + IF( I.GE.INFQR+1 ) THEN + IF( I.EQ.INFQR+1 ) THEN + SR( KWTOP+I-1 ) = T( I, I ) + SI( KWTOP+I-1 ) = ZERO + I = I - 1 + ELSE IF( T( I, I-1 ).EQ.ZERO ) THEN + SR( KWTOP+I-1 ) = T( I, I ) + SI( KWTOP+I-1 ) = ZERO + I = I - 1 + ELSE + AA = T( I-1, I-1 ) + CC = T( I, I-1 ) + BB = T( I-1, I ) + DD = T( I, I ) + CALL SLANV2( AA, BB, CC, DD, SR( KWTOP+I-2 ), + $ SI( KWTOP+I-2 ), SR( KWTOP+I-1 ), + $ SI( KWTOP+I-1 ), CS, SN ) + I = I - 2 + END IF + GO TO 60 + END IF +* + IF( NS.LT.JW .OR. S.EQ.ZERO ) THEN + IF( NS.GT.1 .AND. S.NE.ZERO ) THEN +* +* ==== Reflect spike back into lower triangle ==== +* + CALL SCOPY( NS, V, LDV, WORK, 1 ) + BETA = WORK( 1 ) + CALL SLARFG( NS, BETA, WORK( 2 ), 1, TAU ) + WORK( 1 ) = ONE +* + CALL SLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), LDT ) +* + CALL SLARF( 'L', NS, JW, WORK, 1, TAU, T, LDT, + $ WORK( JW+1 ) ) + CALL SLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT, + $ WORK( JW+1 ) ) + CALL SLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV, + $ WORK( JW+1 ) ) +* + CALL SGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ), + $ LWORK-JW, INFO ) + END IF +* +* ==== Copy updated reduced window into place ==== +* + IF( KWTOP.GT.1 ) + $ H( KWTOP, KWTOP-1 ) = S*V( 1, 1 ) + CALL SLACPY( 'U', JW, JW, T, LDT, H( KWTOP, KWTOP ), LDH ) + CALL SCOPY( JW-1, T( 2, 1 ), LDT+1, H( KWTOP+1, KWTOP ), + $ LDH+1 ) +* +* ==== Accumulate orthogonal matrix in order update +* . H and Z, if requested. (A modified version +* . of SORGHR that accumulates block Householder +* . transformations into V directly might be +* . marginally more efficient than the following.) ==== +* + IF( NS.GT.1 .AND. S.NE.ZERO ) THEN + CALL SORGHR( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ), + $ LWORK-JW, INFO ) + CALL SGEMM( 'N', 'N', JW, NS, NS, ONE, V, LDV, T, LDT, ZERO, + $ WV, LDWV ) + CALL SLACPY( 'A', JW, NS, WV, LDWV, V, LDV ) + END IF +* +* ==== Update vertical slab in H ==== +* + IF( WANTT ) THEN + LTOP = 1 + ELSE + LTOP = KTOP + END IF + DO 70 KROW = LTOP, KWTOP - 1, NV + KLN = MIN( NV, KWTOP-KROW ) + CALL SGEMM( 'N', 'N', KLN, JW, JW, ONE, H( KROW, KWTOP ), + $ LDH, V, LDV, ZERO, WV, LDWV ) + CALL SLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH ) + 70 CONTINUE +* +* ==== Update horizontal slab in H ==== +* + IF( WANTT ) THEN + DO 80 KCOL = KBOT + 1, N, NH + KLN = MIN( NH, N-KCOL+1 ) + CALL SGEMM( 'C', 'N', JW, KLN, JW, ONE, V, LDV, + $ H( KWTOP, KCOL ), LDH, ZERO, T, LDT ) + CALL SLACPY( 'A', JW, KLN, T, LDT, H( KWTOP, KCOL ), + $ LDH ) + 80 CONTINUE + END IF +* +* ==== Update vertical slab in Z ==== +* + IF( WANTZ ) THEN + DO 90 KROW = ILOZ, IHIZ, NV + KLN = MIN( NV, IHIZ-KROW+1 ) + CALL SGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, KWTOP ), + $ LDZ, V, LDV, ZERO, WV, LDWV ) + CALL SLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ), + $ LDZ ) + 90 CONTINUE + END IF + END IF +* +* ==== Return the number of deflations ... ==== +* + ND = JW - NS +* +* ==== ... and the number of shifts. (Subtracting +* . INFQR from the spike length takes care +* . of the case of a rare QR failure while +* . calculating eigenvalues of the deflation +* . window.) ==== +* + NS = NS - INFQR +* +* ==== Return optimal workspace. ==== +* + WORK( 1 ) = REAL( LWKOPT ) +* +* ==== End of SLAQR2 ==== +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slaqr3.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slaqr3.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,561 @@ + SUBROUTINE SLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, + $ IHIZ, Z, LDZ, NS, ND, SR, SI, V, LDV, NH, T, + $ LDT, NV, WV, LDWV, WORK, LWORK ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV, + $ LDZ, LWORK, N, ND, NH, NS, NV, NW + LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. + REAL H( LDH, * ), SI( * ), SR( * ), T( LDT, * ), + $ V( LDV, * ), WORK( * ), WV( LDWV, * ), + $ Z( LDZ, * ) +* .. +* +* ****************************************************************** +* Aggressive early deflation: +* +* This subroutine accepts as input an upper Hessenberg matrix +* H and performs an orthogonal similarity transformation +* designed to detect and deflate fully converged eigenvalues from +* a trailing principal submatrix. On output H has been over- +* written by a new Hessenberg matrix that is a perturbation of +* an orthogonal similarity transformation of H. It is to be +* hoped that the final version of H has many zero subdiagonal +* entries. +* +* ****************************************************************** +* WANTT (input) LOGICAL +* If .TRUE., then the Hessenberg matrix H is fully updated +* so that the quasi-triangular Schur factor may be +* computed (in cooperation with the calling subroutine). +* If .FALSE., then only enough of H is updated to preserve +* the eigenvalues. +* +* WANTZ (input) LOGICAL +* If .TRUE., then the orthogonal matrix Z is updated so +* so that the orthogonal Schur factor may be computed +* (in cooperation with the calling subroutine). +* If .FALSE., then Z is not referenced. +* +* N (input) INTEGER +* The order of the matrix H and (if WANTZ is .TRUE.) the +* order of the orthogonal matrix Z. +* +* KTOP (input) INTEGER +* It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0. +* KBOT and KTOP together determine an isolated block +* along the diagonal of the Hessenberg matrix. +* +* KBOT (input) INTEGER +* It is assumed without a check that either +* KBOT = N or H(KBOT+1,KBOT)=0. KBOT and KTOP together +* determine an isolated block along the diagonal of the +* Hessenberg matrix. +* +* NW (input) INTEGER +* Deflation window size. 1 .LE. NW .LE. (KBOT-KTOP+1). +* +* H (input/output) REAL array, dimension (LDH,N) +* On input the initial N-by-N section of H stores the +* Hessenberg matrix undergoing aggressive early deflation. +* On output H has been transformed by an orthogonal +* similarity transformation, perturbed, and the returned +* to Hessenberg form that (it is to be hoped) has some +* zero subdiagonal entries. +* +* LDH (input) integer +* Leading dimension of H just as declared in the calling +* subroutine. N .LE. LDH +* +* ILOZ (input) INTEGER +* IHIZ (input) INTEGER +* Specify the rows of Z to which transformations must be +* applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N. +* +* Z (input/output) REAL array, dimension (LDZ,IHI) +* IF WANTZ is .TRUE., then on output, the orthogonal +* similarity transformation mentioned above has been +* accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right. +* If WANTZ is .FALSE., then Z is unreferenced. +* +* LDZ (input) integer +* The leading dimension of Z just as declared in the +* calling subroutine. 1 .LE. LDZ. +* +* NS (output) integer +* The number of unconverged (ie approximate) eigenvalues +* returned in SR and SI that may be used as shifts by the +* calling subroutine. +* +* ND (output) integer +* The number of converged eigenvalues uncovered by this +* subroutine. +* +* SR (output) REAL array, dimension KBOT +* SI (output) REAL array, dimension KBOT +* On output, the real and imaginary parts of approximate +* eigenvalues that may be used for shifts are stored in +* SR(KBOT-ND-NS+1) through SR(KBOT-ND) and +* SI(KBOT-ND-NS+1) through SI(KBOT-ND), respectively. +* The real and imaginary parts of converged eigenvalues +* are stored in SR(KBOT-ND+1) through SR(KBOT) and +* SI(KBOT-ND+1) through SI(KBOT), respectively. +* +* V (workspace) REAL array, dimension (LDV,NW) +* An NW-by-NW work array. +* +* LDV (input) integer scalar +* The leading dimension of V just as declared in the +* calling subroutine. NW .LE. LDV +* +* NH (input) integer scalar +* The number of columns of T. NH.GE.NW. +* +* T (workspace) REAL array, dimension (LDT,NW) +* +* LDT (input) integer +* The leading dimension of T just as declared in the +* calling subroutine. NW .LE. LDT +* +* NV (input) integer +* The number of rows of work array WV available for +* workspace. NV.GE.NW. +* +* WV (workspace) REAL array, dimension (LDWV,NW) +* +* LDWV (input) integer +* The leading dimension of W just as declared in the +* calling subroutine. NW .LE. LDV +* +* WORK (workspace) REAL array, dimension LWORK. +* On exit, WORK(1) is set to an estimate of the optimal value +* of LWORK for the given values of N, NW, KTOP and KBOT. +* +* LWORK (input) integer +* The dimension of the work array WORK. LWORK = 2*NW +* suffices, but greater efficiency may result from larger +* values of LWORK. +* +* If LWORK = -1, then a workspace query is assumed; SLAQR3 +* only estimates the optimal workspace size for the given +* values of N, NW, KTOP and KBOT. The estimate is returned +* in WORK(1). No error message related to LWORK is issued +* by XERBLA. Neither H nor Z are accessed. +* +* ================================================================ +* Based on contributions by +* Karen Braman and Ralph Byers, Department of Mathematics, +* University of Kansas, USA +* +* ================================================================== +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0e0, ONE = 1.0e0 ) +* .. +* .. Local Scalars .. + REAL AA, BB, BETA, CC, CS, DD, EVI, EVK, FOO, S, + $ SAFMAX, SAFMIN, SMLNUM, SN, TAU, ULP + INTEGER I, IFST, ILST, INFO, INFQR, J, JW, K, KCOL, + $ KEND, KLN, KROW, KWTOP, LTOP, LWK1, LWK2, LWK3, + $ LWKOPT, NMIN + LOGICAL BULGE, SORTED +* .. +* .. External Functions .. + REAL SLAMCH + INTEGER ILAENV + EXTERNAL SLAMCH, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEHRD, SGEMM, SLABAD, SLACPY, SLAHQR, + $ SLANV2, SLAQR4, SLARF, SLARFG, SLASET, SORGHR, + $ STREXC +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, INT, MAX, MIN, REAL, SQRT +* .. +* .. Executable Statements .. +* +* ==== Estimate optimal workspace. ==== +* + JW = MIN( NW, KBOT-KTOP+1 ) + IF( JW.LE.2 ) THEN + LWKOPT = 1 + ELSE +* +* ==== Workspace query call to SGEHRD ==== +* + CALL SGEHRD( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO ) + LWK1 = INT( WORK( 1 ) ) +* +* ==== Workspace query call to SORGHR ==== +* + CALL SORGHR( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO ) + LWK2 = INT( WORK( 1 ) ) +* +* ==== Workspace query call to SLAQR4 ==== +* + CALL SLAQR4( .true., .true., JW, 1, JW, T, LDT, SR, SI, 1, JW, + $ V, LDV, WORK, -1, INFQR ) + LWK3 = INT( WORK( 1 ) ) +* +* ==== Optimal workspace ==== +* + LWKOPT = MAX( JW+MAX( LWK1, LWK2 ), LWK3 ) + END IF +* +* ==== Quick return in case of workspace query. ==== +* + IF( LWORK.EQ.-1 ) THEN + WORK( 1 ) = REAL( LWKOPT ) + RETURN + END IF +* +* ==== Nothing to do ... +* ... for an empty active block ... ==== + NS = 0 + ND = 0 + IF( KTOP.GT.KBOT ) + $ RETURN +* ... nor for an empty deflation window. ==== + IF( NW.LT.1 ) + $ RETURN +* +* ==== Machine constants ==== +* + SAFMIN = SLAMCH( 'SAFE MINIMUM' ) + SAFMAX = ONE / SAFMIN + CALL SLABAD( SAFMIN, SAFMAX ) + ULP = SLAMCH( 'PRECISION' ) + SMLNUM = SAFMIN*( REAL( N ) / ULP ) +* +* ==== Setup deflation window ==== +* + JW = MIN( NW, KBOT-KTOP+1 ) + KWTOP = KBOT - JW + 1 + IF( KWTOP.EQ.KTOP ) THEN + S = ZERO + ELSE + S = H( KWTOP, KWTOP-1 ) + END IF +* + IF( KBOT.EQ.KWTOP ) THEN +* +* ==== 1-by-1 deflation window: not much to do ==== +* + SR( KWTOP ) = H( KWTOP, KWTOP ) + SI( KWTOP ) = ZERO + NS = 1 + ND = 0 + IF( ABS( S ).LE.MAX( SMLNUM, ULP*ABS( H( KWTOP, KWTOP ) ) ) ) + $ THEN + NS = 0 + ND = 1 + IF( KWTOP.GT.KTOP ) + $ H( KWTOP, KWTOP-1 ) = ZERO + END IF + RETURN + END IF +* +* ==== Convert to spike-triangular form. (In case of a +* . rare QR failure, this routine continues to do +* . aggressive early deflation using that part of +* . the deflation window that converged using INFQR +* . here and there to keep track.) ==== +* + CALL SLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT ) + CALL SCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), LDT+1 ) +* + CALL SLASET( 'A', JW, JW, ZERO, ONE, V, LDV ) + NMIN = ILAENV( 12, 'SLAQR3', 'SV', JW, 1, JW, LWORK ) + IF( JW.GT.NMIN ) THEN + CALL SLAQR4( .true., .true., JW, 1, JW, T, LDT, SR( KWTOP ), + $ SI( KWTOP ), 1, JW, V, LDV, WORK, LWORK, INFQR ) + ELSE + CALL SLAHQR( .true., .true., JW, 1, JW, T, LDT, SR( KWTOP ), + $ SI( KWTOP ), 1, JW, V, LDV, INFQR ) + END IF +* +* ==== STREXC needs a clean margin near the diagonal ==== +* + DO 10 J = 1, JW - 3 + T( J+2, J ) = ZERO + T( J+3, J ) = ZERO + 10 CONTINUE + IF( JW.GT.2 ) + $ T( JW, JW-2 ) = ZERO +* +* ==== Deflation detection loop ==== +* + NS = JW + ILST = INFQR + 1 + 20 CONTINUE + IF( ILST.LE.NS ) THEN + IF( NS.EQ.1 ) THEN + BULGE = .FALSE. + ELSE + BULGE = T( NS, NS-1 ).NE.ZERO + END IF +* +* ==== Small spike tip test for deflation ==== +* + IF( .NOT.BULGE ) THEN +* +* ==== Real eigenvalue ==== +* + FOO = ABS( T( NS, NS ) ) + IF( FOO.EQ.ZERO ) + $ FOO = ABS( S ) + IF( ABS( S*V( 1, NS ) ).LE.MAX( SMLNUM, ULP*FOO ) ) THEN +* +* ==== Deflatable ==== +* + NS = NS - 1 + ELSE +* +* ==== Undeflatable. Move it up out of the way. +* . (STREXC can not fail in this case.) ==== +* + IFST = NS + CALL STREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK, + $ INFO ) + ILST = ILST + 1 + END IF + ELSE +* +* ==== Complex conjugate pair ==== +* + FOO = ABS( T( NS, NS ) ) + SQRT( ABS( T( NS, NS-1 ) ) )* + $ SQRT( ABS( T( NS-1, NS ) ) ) + IF( FOO.EQ.ZERO ) + $ FOO = ABS( S ) + IF( MAX( ABS( S*V( 1, NS ) ), ABS( S*V( 1, NS-1 ) ) ).LE. + $ MAX( SMLNUM, ULP*FOO ) ) THEN +* +* ==== Deflatable ==== +* + NS = NS - 2 + ELSE +* +* ==== Undflatable. Move them up out of the way. +* . Fortunately, STREXC does the right thing with +* . ILST in case of a rare exchange failure. ==== +* + IFST = NS + CALL STREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK, + $ INFO ) + ILST = ILST + 2 + END IF + END IF +* +* ==== End deflation detection loop ==== +* + GO TO 20 + END IF +* +* ==== Return to Hessenberg form ==== +* + IF( NS.EQ.0 ) + $ S = ZERO +* + IF( NS.LT.JW ) THEN +* +* ==== sorting diagonal blocks of T improves accuracy for +* . graded matrices. Bubble sort deals well with +* . exchange failures. ==== +* + SORTED = .false. + I = NS + 1 + 30 CONTINUE + IF( SORTED ) + $ GO TO 50 + SORTED = .true. +* + KEND = I - 1 + I = INFQR + 1 + IF( I.EQ.NS ) THEN + K = I + 1 + ELSE IF( T( I+1, I ).EQ.ZERO ) THEN + K = I + 1 + ELSE + K = I + 2 + END IF + 40 CONTINUE + IF( K.LE.KEND ) THEN + IF( K.EQ.I+1 ) THEN + EVI = ABS( T( I, I ) ) + ELSE + EVI = ABS( T( I, I ) ) + SQRT( ABS( T( I+1, I ) ) )* + $ SQRT( ABS( T( I, I+1 ) ) ) + END IF +* + IF( K.EQ.KEND ) THEN + EVK = ABS( T( K, K ) ) + ELSE IF( T( K+1, K ).EQ.ZERO ) THEN + EVK = ABS( T( K, K ) ) + ELSE + EVK = ABS( T( K, K ) ) + SQRT( ABS( T( K+1, K ) ) )* + $ SQRT( ABS( T( K, K+1 ) ) ) + END IF +* + IF( EVI.GE.EVK ) THEN + I = K + ELSE + SORTED = .false. + IFST = I + ILST = K + CALL STREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK, + $ INFO ) + IF( INFO.EQ.0 ) THEN + I = ILST + ELSE + I = K + END IF + END IF + IF( I.EQ.KEND ) THEN + K = I + 1 + ELSE IF( T( I+1, I ).EQ.ZERO ) THEN + K = I + 1 + ELSE + K = I + 2 + END IF + GO TO 40 + END IF + GO TO 30 + 50 CONTINUE + END IF +* +* ==== Restore shift/eigenvalue array from T ==== +* + I = JW + 60 CONTINUE + IF( I.GE.INFQR+1 ) THEN + IF( I.EQ.INFQR+1 ) THEN + SR( KWTOP+I-1 ) = T( I, I ) + SI( KWTOP+I-1 ) = ZERO + I = I - 1 + ELSE IF( T( I, I-1 ).EQ.ZERO ) THEN + SR( KWTOP+I-1 ) = T( I, I ) + SI( KWTOP+I-1 ) = ZERO + I = I - 1 + ELSE + AA = T( I-1, I-1 ) + CC = T( I, I-1 ) + BB = T( I-1, I ) + DD = T( I, I ) + CALL SLANV2( AA, BB, CC, DD, SR( KWTOP+I-2 ), + $ SI( KWTOP+I-2 ), SR( KWTOP+I-1 ), + $ SI( KWTOP+I-1 ), CS, SN ) + I = I - 2 + END IF + GO TO 60 + END IF +* + IF( NS.LT.JW .OR. S.EQ.ZERO ) THEN + IF( NS.GT.1 .AND. S.NE.ZERO ) THEN +* +* ==== Reflect spike back into lower triangle ==== +* + CALL SCOPY( NS, V, LDV, WORK, 1 ) + BETA = WORK( 1 ) + CALL SLARFG( NS, BETA, WORK( 2 ), 1, TAU ) + WORK( 1 ) = ONE +* + CALL SLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), LDT ) +* + CALL SLARF( 'L', NS, JW, WORK, 1, TAU, T, LDT, + $ WORK( JW+1 ) ) + CALL SLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT, + $ WORK( JW+1 ) ) + CALL SLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV, + $ WORK( JW+1 ) ) +* + CALL SGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ), + $ LWORK-JW, INFO ) + END IF +* +* ==== Copy updated reduced window into place ==== +* + IF( KWTOP.GT.1 ) + $ H( KWTOP, KWTOP-1 ) = S*V( 1, 1 ) + CALL SLACPY( 'U', JW, JW, T, LDT, H( KWTOP, KWTOP ), LDH ) + CALL SCOPY( JW-1, T( 2, 1 ), LDT+1, H( KWTOP+1, KWTOP ), + $ LDH+1 ) +* +* ==== Accumulate orthogonal matrix in order update +* . H and Z, if requested. (A modified version +* . of SORGHR that accumulates block Householder +* . transformations into V directly might be +* . marginally more efficient than the following.) ==== +* + IF( NS.GT.1 .AND. S.NE.ZERO ) THEN + CALL SORGHR( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ), + $ LWORK-JW, INFO ) + CALL SGEMM( 'N', 'N', JW, NS, NS, ONE, V, LDV, T, LDT, ZERO, + $ WV, LDWV ) + CALL SLACPY( 'A', JW, NS, WV, LDWV, V, LDV ) + END IF +* +* ==== Update vertical slab in H ==== +* + IF( WANTT ) THEN + LTOP = 1 + ELSE + LTOP = KTOP + END IF + DO 70 KROW = LTOP, KWTOP - 1, NV + KLN = MIN( NV, KWTOP-KROW ) + CALL SGEMM( 'N', 'N', KLN, JW, JW, ONE, H( KROW, KWTOP ), + $ LDH, V, LDV, ZERO, WV, LDWV ) + CALL SLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH ) + 70 CONTINUE +* +* ==== Update horizontal slab in H ==== +* + IF( WANTT ) THEN + DO 80 KCOL = KBOT + 1, N, NH + KLN = MIN( NH, N-KCOL+1 ) + CALL SGEMM( 'C', 'N', JW, KLN, JW, ONE, V, LDV, + $ H( KWTOP, KCOL ), LDH, ZERO, T, LDT ) + CALL SLACPY( 'A', JW, KLN, T, LDT, H( KWTOP, KCOL ), + $ LDH ) + 80 CONTINUE + END IF +* +* ==== Update vertical slab in Z ==== +* + IF( WANTZ ) THEN + DO 90 KROW = ILOZ, IHIZ, NV + KLN = MIN( NV, IHIZ-KROW+1 ) + CALL SGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, KWTOP ), + $ LDZ, V, LDV, ZERO, WV, LDWV ) + CALL SLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ), + $ LDZ ) + 90 CONTINUE + END IF + END IF +* +* ==== Return the number of deflations ... ==== +* + ND = JW - NS +* +* ==== ... and the number of shifts. (Subtracting +* . INFQR from the spike length takes care +* . of the case of a rare QR failure while +* . calculating eigenvalues of the deflation +* . window.) ==== +* + NS = NS - INFQR +* +* ==== Return optimal workspace. ==== +* + WORK( 1 ) = REAL( LWKOPT ) +* +* ==== End of SLAQR3 ==== +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slaqr4.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slaqr4.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,640 @@ + SUBROUTINE SLAQR4( WANTT, WANTZ, N, ILO, IHI, H, LDH, WR, WI, + $ ILOZ, IHIZ, Z, LDZ, WORK, LWORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER IHI, IHIZ, ILO, ILOZ, INFO, LDH, LDZ, LWORK, N + LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. + REAL H( LDH, * ), WI( * ), WORK( * ), WR( * ), + $ Z( LDZ, * ) +* .. +* +* This subroutine implements one level of recursion for SLAQR0. +* It is a complete implementation of the small bulge multi-shift +* QR algorithm. It may be called by SLAQR0 and, for large enough +* deflation window size, it may be called by SLAQR3. This +* subroutine is identical to SLAQR0 except that it calls SLAQR2 +* instead of SLAQR3. +* +* Purpose +* ======= +* +* SLAQR4 computes the eigenvalues of a Hessenberg matrix H +* and, optionally, the matrices T and Z from the Schur decomposition +* H = Z T Z**T, where T is an upper quasi-triangular matrix (the +* Schur form), and Z is the orthogonal matrix of Schur vectors. +* +* Optionally Z may be postmultiplied into an input orthogonal +* matrix Q so that this routine can give the Schur factorization +* of a matrix A which has been reduced to the Hessenberg form H +* by the orthogonal matrix Q: A = Q*H*Q**T = (QZ)*T*(QZ)**T. +* +* Arguments +* ========= +* +* WANTT (input) LOGICAL +* = .TRUE. : the full Schur form T is required; +* = .FALSE.: only eigenvalues are required. +* +* WANTZ (input) LOGICAL +* = .TRUE. : the matrix of Schur vectors Z is required; +* = .FALSE.: Schur vectors are not required. +* +* N (input) INTEGER +* The order of the matrix H. N .GE. 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* It is assumed that H is already upper triangular in rows +* and columns 1:ILO-1 and IHI+1:N and, if ILO.GT.1, +* H(ILO,ILO-1) is zero. ILO and IHI are normally set by a +* previous call to SGEBAL, and then passed to SGEHRD when the +* matrix output by SGEBAL is reduced to Hessenberg form. +* Otherwise, ILO and IHI should be set to 1 and N, +* respectively. If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N. +* If N = 0, then ILO = 1 and IHI = 0. +* +* H (input/output) REAL array, dimension (LDH,N) +* On entry, the upper Hessenberg matrix H. +* On exit, if INFO = 0 and WANTT is .TRUE., then H contains +* the upper quasi-triangular matrix T from the Schur +* decomposition (the Schur form); 2-by-2 diagonal blocks +* (corresponding to complex conjugate pairs of eigenvalues) +* are returned in standard form, with H(i,i) = H(i+1,i+1) +* and H(i+1,i)*H(i,i+1).LT.0. If INFO = 0 and WANTT is +* .FALSE., then the contents of H are unspecified on exit. +* (The output value of H when INFO.GT.0 is given under the +* description of INFO below.) +* +* This subroutine may explicitly set H(i,j) = 0 for i.GT.j and +* j = 1, 2, ... ILO-1 or j = IHI+1, IHI+2, ... N. +* +* LDH (input) INTEGER +* The leading dimension of the array H. LDH .GE. max(1,N). +* +* WR (output) REAL array, dimension (IHI) +* WI (output) REAL array, dimension (IHI) +* The real and imaginary parts, respectively, of the computed +* eigenvalues of H(ILO:IHI,ILO:IHI) are stored WR(ILO:IHI) +* and WI(ILO:IHI). If two eigenvalues are computed as a +* complex conjugate pair, they are stored in consecutive +* elements of WR and WI, say the i-th and (i+1)th, with +* WI(i) .GT. 0 and WI(i+1) .LT. 0. If WANTT is .TRUE., then +* the eigenvalues are stored in the same order as on the +* diagonal of the Schur form returned in H, with +* WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2 diagonal +* block, WI(i) = sqrt(-H(i+1,i)*H(i,i+1)) and +* WI(i+1) = -WI(i). +* +* ILOZ (input) INTEGER +* IHIZ (input) INTEGER +* Specify the rows of Z to which transformations must be +* applied if WANTZ is .TRUE.. +* 1 .LE. ILOZ .LE. ILO; IHI .LE. IHIZ .LE. N. +* +* Z (input/output) REAL array, dimension (LDZ,IHI) +* If WANTZ is .FALSE., then Z is not referenced. +* If WANTZ is .TRUE., then Z(ILO:IHI,ILOZ:IHIZ) is +* replaced by Z(ILO:IHI,ILOZ:IHIZ)*U where U is the +* orthogonal Schur factor of H(ILO:IHI,ILO:IHI). +* (The output value of Z when INFO.GT.0 is given under +* the description of INFO below.) +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. if WANTZ is .TRUE. +* then LDZ.GE.MAX(1,IHIZ). Otherwize, LDZ.GE.1. +* +* WORK (workspace/output) REAL array, dimension LWORK +* On exit, if LWORK = -1, WORK(1) returns an estimate of +* the optimal value for LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK .GE. max(1,N) +* is sufficient, but LWORK typically as large as 6*N may +* be required for optimal performance. A workspace query +* to determine the optimal workspace size is recommended. +* +* If LWORK = -1, then SLAQR4 does a workspace query. +* In this case, SLAQR4 checks the input parameters and +* estimates the optimal workspace size for the given +* values of N, ILO and IHI. The estimate is returned +* in WORK(1). No error message related to LWORK is +* issued by XERBLA. Neither H nor Z are accessed. +* +* +* INFO (output) INTEGER +* = 0: successful exit +* .GT. 0: if INFO = i, SLAQR4 failed to compute all of +* the eigenvalues. Elements 1:ilo-1 and i+1:n of WR +* and WI contain those eigenvalues which have been +* successfully computed. (Failures are rare.) +* +* If INFO .GT. 0 and WANT is .FALSE., then on exit, +* the remaining unconverged eigenvalues are the eigen- +* values of the upper Hessenberg matrix rows and +* columns ILO through INFO of the final, output +* value of H. +* +* If INFO .GT. 0 and WANTT is .TRUE., then on exit +* +* (*) (initial value of H)*U = U*(final value of H) +* +* where U is an orthogonal matrix. The final +* value of H is upper Hessenberg and quasi-triangular +* in rows and columns INFO+1 through IHI. +* +* If INFO .GT. 0 and WANTZ is .TRUE., then on exit +* +* (final value of Z(ILO:IHI,ILOZ:IHIZ) +* = (initial value of Z(ILO:IHI,ILOZ:IHIZ)*U +* +* where U is the orthogonal matrix in (*) (regard- +* less of the value of WANTT.) +* +* If INFO .GT. 0 and WANTZ is .FALSE., then Z is not +* accessed. +* +* ================================================================ +* Based on contributions by +* Karen Braman and Ralph Byers, Department of Mathematics, +* University of Kansas, USA +* +* ================================================================ +* References: +* K. Braman, R. Byers and R. Mathias, The Multi-Shift QR +* Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 +* Performance, SIAM Journal of Matrix Analysis, volume 23, pages +* 929--947, 2002. +* +* K. Braman, R. Byers and R. Mathias, The Multi-Shift QR +* Algorithm Part II: Aggressive Early Deflation, SIAM Journal +* of Matrix Analysis, volume 23, pages 948--973, 2002. +* +* ================================================================ +* .. Parameters .. +* +* ==== Matrices of order NTINY or smaller must be processed by +* . SLAHQR because of insufficient subdiagonal scratch space. +* . (This is a hard limit.) ==== +* +* ==== Exceptional deflation windows: try to cure rare +* . slow convergence by increasing the size of the +* . deflation window after KEXNW iterations. ===== +* +* ==== Exceptional shifts: try to cure rare slow convergence +* . with ad-hoc exceptional shifts every KEXSH iterations. +* . The constants WILK1 and WILK2 are used to form the +* . exceptional shifts. ==== +* + INTEGER NTINY + PARAMETER ( NTINY = 11 ) + INTEGER KEXNW, KEXSH + PARAMETER ( KEXNW = 5, KEXSH = 6 ) + REAL WILK1, WILK2 + PARAMETER ( WILK1 = 0.75e0, WILK2 = -0.4375e0 ) + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0e0, ONE = 1.0e0 ) +* .. +* .. Local Scalars .. + REAL AA, BB, CC, CS, DD, SN, SS, SWAP + INTEGER I, INF, IT, ITMAX, K, KACC22, KBOT, KDU, KS, + $ KT, KTOP, KU, KV, KWH, KWTOP, KWV, LD, LS, + $ LWKOPT, NDFL, NH, NHO, NIBBLE, NMIN, NS, NSMAX, + $ NSR, NVE, NW, NWMAX, NWR + LOGICAL NWINC, SORTED + CHARACTER JBCMPZ*2 +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Local Arrays .. + REAL ZDUM( 1, 1 ) +* .. +* .. External Subroutines .. + EXTERNAL SLACPY, SLAHQR, SLANV2, SLAQR2, SLAQR5 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, INT, MAX, MIN, MOD, REAL +* .. +* .. Executable Statements .. + INFO = 0 +* +* ==== Quick return for N = 0: nothing to do. ==== +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = ONE + RETURN + END IF +* +* ==== Set up job flags for ILAENV. ==== +* + IF( WANTT ) THEN + JBCMPZ( 1: 1 ) = 'S' + ELSE + JBCMPZ( 1: 1 ) = 'E' + END IF + IF( WANTZ ) THEN + JBCMPZ( 2: 2 ) = 'V' + ELSE + JBCMPZ( 2: 2 ) = 'N' + END IF +* +* ==== Tiny matrices must use SLAHQR. ==== +* + IF( N.LE.NTINY ) THEN +* +* ==== Estimate optimal workspace. ==== +* + LWKOPT = 1 + IF( LWORK.NE.-1 ) + $ CALL SLAHQR( WANTT, WANTZ, N, ILO, IHI, H, LDH, WR, WI, + $ ILOZ, IHIZ, Z, LDZ, INFO ) + ELSE +* +* ==== Use small bulge multi-shift QR with aggressive early +* . deflation on larger-than-tiny matrices. ==== +* +* ==== Hope for the best. ==== +* + INFO = 0 +* +* ==== NWR = recommended deflation window size. At this +* . point, N .GT. NTINY = 11, so there is enough +* . subdiagonal workspace for NWR.GE.2 as required. +* . (In fact, there is enough subdiagonal space for +* . NWR.GE.3.) ==== +* + NWR = ILAENV( 13, 'SLAQR4', JBCMPZ, N, ILO, IHI, LWORK ) + NWR = MAX( 2, NWR ) + NWR = MIN( IHI-ILO+1, ( N-1 ) / 3, NWR ) + NW = NWR +* +* ==== NSR = recommended number of simultaneous shifts. +* . At this point N .GT. NTINY = 11, so there is at +* . enough subdiagonal workspace for NSR to be even +* . and greater than or equal to two as required. ==== +* + NSR = ILAENV( 15, 'SLAQR4', JBCMPZ, N, ILO, IHI, LWORK ) + NSR = MIN( NSR, ( N+6 ) / 9, IHI-ILO ) + NSR = MAX( 2, NSR-MOD( NSR, 2 ) ) +* +* ==== Estimate optimal workspace ==== +* +* ==== Workspace query call to SLAQR2 ==== +* + CALL SLAQR2( WANTT, WANTZ, N, ILO, IHI, NWR+1, H, LDH, ILOZ, + $ IHIZ, Z, LDZ, LS, LD, WR, WI, H, LDH, N, H, LDH, + $ N, H, LDH, WORK, -1 ) +* +* ==== Optimal workspace = MAX(SLAQR5, SLAQR2) ==== +* + LWKOPT = MAX( 3*NSR / 2, INT( WORK( 1 ) ) ) +* +* ==== Quick return in case of workspace query. ==== +* + IF( LWORK.EQ.-1 ) THEN + WORK( 1 ) = REAL( LWKOPT ) + RETURN + END IF +* +* ==== SLAHQR/SLAQR0 crossover point ==== +* + NMIN = ILAENV( 12, 'SLAQR4', JBCMPZ, N, ILO, IHI, LWORK ) + NMIN = MAX( NTINY, NMIN ) +* +* ==== Nibble crossover point ==== +* + NIBBLE = ILAENV( 14, 'SLAQR4', JBCMPZ, N, ILO, IHI, LWORK ) + NIBBLE = MAX( 0, NIBBLE ) +* +* ==== Accumulate reflections during ttswp? Use block +* . 2-by-2 structure during matrix-matrix multiply? ==== +* + KACC22 = ILAENV( 16, 'SLAQR4', JBCMPZ, N, ILO, IHI, LWORK ) + KACC22 = MAX( 0, KACC22 ) + KACC22 = MIN( 2, KACC22 ) +* +* ==== NWMAX = the largest possible deflation window for +* . which there is sufficient workspace. ==== +* + NWMAX = MIN( ( N-1 ) / 3, LWORK / 2 ) +* +* ==== NSMAX = the Largest number of simultaneous shifts +* . for which there is sufficient workspace. ==== +* + NSMAX = MIN( ( N+6 ) / 9, 2*LWORK / 3 ) + NSMAX = NSMAX - MOD( NSMAX, 2 ) +* +* ==== NDFL: an iteration count restarted at deflation. ==== +* + NDFL = 1 +* +* ==== ITMAX = iteration limit ==== +* + ITMAX = MAX( 30, 2*KEXSH )*MAX( 10, ( IHI-ILO+1 ) ) +* +* ==== Last row and column in the active block ==== +* + KBOT = IHI +* +* ==== Main Loop ==== +* + DO 80 IT = 1, ITMAX +* +* ==== Done when KBOT falls below ILO ==== +* + IF( KBOT.LT.ILO ) + $ GO TO 90 +* +* ==== Locate active block ==== +* + DO 10 K = KBOT, ILO + 1, -1 + IF( H( K, K-1 ).EQ.ZERO ) + $ GO TO 20 + 10 CONTINUE + K = ILO + 20 CONTINUE + KTOP = K +* +* ==== Select deflation window size ==== +* + NH = KBOT - KTOP + 1 + IF( NDFL.LT.KEXNW .OR. NH.LT.NW ) THEN +* +* ==== Typical deflation window. If possible and +* . advisable, nibble the entire active block. +* . If not, use size NWR or NWR+1 depending upon +* . which has the smaller corresponding subdiagonal +* . entry (a heuristic). ==== +* + NWINC = .TRUE. + IF( NH.LE.MIN( NMIN, NWMAX ) ) THEN + NW = NH + ELSE + NW = MIN( NWR, NH, NWMAX ) + IF( NW.LT.NWMAX ) THEN + IF( NW.GE.NH-1 ) THEN + NW = NH + ELSE + KWTOP = KBOT - NW + 1 + IF( ABS( H( KWTOP, KWTOP-1 ) ).GT. + $ ABS( H( KWTOP-1, KWTOP-2 ) ) )NW = NW + 1 + END IF + END IF + END IF + ELSE +* +* ==== Exceptional deflation window. If there have +* . been no deflations in KEXNW or more iterations, +* . then vary the deflation window size. At first, +* . because, larger windows are, in general, more +* . powerful than smaller ones, rapidly increase the +* . window up to the maximum reasonable and possible. +* . Then maybe try a slightly smaller window. ==== +* + IF( NWINC .AND. NW.LT.MIN( NWMAX, NH ) ) THEN + NW = MIN( NWMAX, NH, 2*NW ) + ELSE + NWINC = .FALSE. + IF( NW.EQ.NH .AND. NH.GT.2 ) + $ NW = NH - 1 + END IF + END IF +* +* ==== Aggressive early deflation: +* . split workspace under the subdiagonal into +* . - an nw-by-nw work array V in the lower +* . left-hand-corner, +* . - an NW-by-at-least-NW-but-more-is-better +* . (NW-by-NHO) horizontal work array along +* . the bottom edge, +* . - an at-least-NW-but-more-is-better (NHV-by-NW) +* . vertical work array along the left-hand-edge. +* . ==== +* + KV = N - NW + 1 + KT = NW + 1 + NHO = ( N-NW-1 ) - KT + 1 + KWV = NW + 2 + NVE = ( N-NW ) - KWV + 1 +* +* ==== Aggressive early deflation ==== +* + CALL SLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, + $ IHIZ, Z, LDZ, LS, LD, WR, WI, H( KV, 1 ), LDH, + $ NHO, H( KV, KT ), LDH, NVE, H( KWV, 1 ), LDH, + $ WORK, LWORK ) +* +* ==== Adjust KBOT accounting for new deflations. ==== +* + KBOT = KBOT - LD +* +* ==== KS points to the shifts. ==== +* + KS = KBOT - LS + 1 +* +* ==== Skip an expensive QR sweep if there is a (partly +* . heuristic) reason to expect that many eigenvalues +* . will deflate without it. Here, the QR sweep is +* . skipped if many eigenvalues have just been deflated +* . or if the remaining active block is small. +* + IF( ( LD.EQ.0 ) .OR. ( ( 100*LD.LE.NW*NIBBLE ) .AND. ( KBOT- + $ KTOP+1.GT.MIN( NMIN, NWMAX ) ) ) ) THEN +* +* ==== NS = nominal number of simultaneous shifts. +* . This may be lowered (slightly) if SLAQR2 +* . did not provide that many shifts. ==== +* + NS = MIN( NSMAX, NSR, MAX( 2, KBOT-KTOP ) ) + NS = NS - MOD( NS, 2 ) +* +* ==== If there have been no deflations +* . in a multiple of KEXSH iterations, +* . then try exceptional shifts. +* . Otherwise use shifts provided by +* . SLAQR2 above or from the eigenvalues +* . of a trailing principal submatrix. ==== +* + IF( MOD( NDFL, KEXSH ).EQ.0 ) THEN + KS = KBOT - NS + 1 + DO 30 I = KBOT, MAX( KS+1, KTOP+2 ), -2 + SS = ABS( H( I, I-1 ) ) + ABS( H( I-1, I-2 ) ) + AA = WILK1*SS + H( I, I ) + BB = SS + CC = WILK2*SS + DD = AA + CALL SLANV2( AA, BB, CC, DD, WR( I-1 ), WI( I-1 ), + $ WR( I ), WI( I ), CS, SN ) + 30 CONTINUE + IF( KS.EQ.KTOP ) THEN + WR( KS+1 ) = H( KS+1, KS+1 ) + WI( KS+1 ) = ZERO + WR( KS ) = WR( KS+1 ) + WI( KS ) = WI( KS+1 ) + END IF + ELSE +* +* ==== Got NS/2 or fewer shifts? Use SLAHQR +* . on a trailing principal submatrix to +* . get more. (Since NS.LE.NSMAX.LE.(N+6)/9, +* . there is enough space below the subdiagonal +* . to fit an NS-by-NS scratch array.) ==== +* + IF( KBOT-KS+1.LE.NS / 2 ) THEN + KS = KBOT - NS + 1 + KT = N - NS + 1 + CALL SLACPY( 'A', NS, NS, H( KS, KS ), LDH, + $ H( KT, 1 ), LDH ) + CALL SLAHQR( .false., .false., NS, 1, NS, + $ H( KT, 1 ), LDH, WR( KS ), WI( KS ), + $ 1, 1, ZDUM, 1, INF ) + KS = KS + INF +* +* ==== In case of a rare QR failure use +* . eigenvalues of the trailing 2-by-2 +* . principal submatrix. ==== +* + IF( KS.GE.KBOT ) THEN + AA = H( KBOT-1, KBOT-1 ) + CC = H( KBOT, KBOT-1 ) + BB = H( KBOT-1, KBOT ) + DD = H( KBOT, KBOT ) + CALL SLANV2( AA, BB, CC, DD, WR( KBOT-1 ), + $ WI( KBOT-1 ), WR( KBOT ), + $ WI( KBOT ), CS, SN ) + KS = KBOT - 1 + END IF + END IF +* + IF( KBOT-KS+1.GT.NS ) THEN +* +* ==== Sort the shifts (Helps a little) +* . Bubble sort keeps complex conjugate +* . pairs together. ==== +* + SORTED = .false. + DO 50 K = KBOT, KS + 1, -1 + IF( SORTED ) + $ GO TO 60 + SORTED = .true. + DO 40 I = KS, K - 1 + IF( ABS( WR( I ) )+ABS( WI( I ) ).LT. + $ ABS( WR( I+1 ) )+ABS( WI( I+1 ) ) ) THEN + SORTED = .false. +* + SWAP = WR( I ) + WR( I ) = WR( I+1 ) + WR( I+1 ) = SWAP +* + SWAP = WI( I ) + WI( I ) = WI( I+1 ) + WI( I+1 ) = SWAP + END IF + 40 CONTINUE + 50 CONTINUE + 60 CONTINUE + END IF +* +* ==== Shuffle shifts into pairs of real shifts +* . and pairs of complex conjugate shifts +* . assuming complex conjugate shifts are +* . already adjacent to one another. (Yes, +* . they are.) ==== +* + DO 70 I = KBOT, KS + 2, -2 + IF( WI( I ).NE.-WI( I-1 ) ) THEN +* + SWAP = WR( I ) + WR( I ) = WR( I-1 ) + WR( I-1 ) = WR( I-2 ) + WR( I-2 ) = SWAP +* + SWAP = WI( I ) + WI( I ) = WI( I-1 ) + WI( I-1 ) = WI( I-2 ) + WI( I-2 ) = SWAP + END IF + 70 CONTINUE + END IF +* +* ==== If there are only two shifts and both are +* . real, then use only one. ==== +* + IF( KBOT-KS+1.EQ.2 ) THEN + IF( WI( KBOT ).EQ.ZERO ) THEN + IF( ABS( WR( KBOT )-H( KBOT, KBOT ) ).LT. + $ ABS( WR( KBOT-1 )-H( KBOT, KBOT ) ) ) THEN + WR( KBOT-1 ) = WR( KBOT ) + ELSE + WR( KBOT ) = WR( KBOT-1 ) + END IF + END IF + END IF +* +* ==== Use up to NS of the the smallest magnatiude +* . shifts. If there aren't NS shifts available, +* . then use them all, possibly dropping one to +* . make the number of shifts even. ==== +* + NS = MIN( NS, KBOT-KS+1 ) + NS = NS - MOD( NS, 2 ) + KS = KBOT - NS + 1 +* +* ==== Small-bulge multi-shift QR sweep: +* . split workspace under the subdiagonal into +* . - a KDU-by-KDU work array U in the lower +* . left-hand-corner, +* . - a KDU-by-at-least-KDU-but-more-is-better +* . (KDU-by-NHo) horizontal work array WH along +* . the bottom edge, +* . - and an at-least-KDU-but-more-is-better-by-KDU +* . (NVE-by-KDU) vertical work WV arrow along +* . the left-hand-edge. ==== +* + KDU = 3*NS - 3 + KU = N - KDU + 1 + KWH = KDU + 1 + NHO = ( N-KDU+1-4 ) - ( KDU+1 ) + 1 + KWV = KDU + 4 + NVE = N - KDU - KWV + 1 +* +* ==== Small-bulge multi-shift QR sweep ==== +* + CALL SLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NS, + $ WR( KS ), WI( KS ), H, LDH, ILOZ, IHIZ, Z, + $ LDZ, WORK, 3, H( KU, 1 ), LDH, NVE, + $ H( KWV, 1 ), LDH, NHO, H( KU, KWH ), LDH ) + END IF +* +* ==== Note progress (or the lack of it). ==== +* + IF( LD.GT.0 ) THEN + NDFL = 1 + ELSE + NDFL = NDFL + 1 + END IF +* +* ==== End of main loop ==== + 80 CONTINUE +* +* ==== Iteration limit exceeded. Set INFO to show where +* . the problem occurred and exit. ==== +* + INFO = KBOT + 90 CONTINUE + END IF +* +* ==== Return the optimal value of LWORK. ==== +* + WORK( 1 ) = REAL( LWKOPT ) +* +* ==== End of SLAQR4 ==== +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slaqr5.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slaqr5.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,812 @@ + SUBROUTINE SLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, + $ SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U, + $ LDU, NV, WV, LDWV, NH, WH, LDWH ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV, + $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV + LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. + REAL H( LDH, * ), SI( * ), SR( * ), U( LDU, * ), + $ V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ), + $ Z( LDZ, * ) +* .. +* +* This auxiliary subroutine called by SLAQR0 performs a +* single small-bulge multi-shift QR sweep. +* +* WANTT (input) logical scalar +* WANTT = .true. if the quasi-triangular Schur factor +* is being computed. WANTT is set to .false. otherwise. +* +* WANTZ (input) logical scalar +* WANTZ = .true. if the orthogonal Schur factor is being +* computed. WANTZ is set to .false. otherwise. +* +* KACC22 (input) integer with value 0, 1, or 2. +* Specifies the computation mode of far-from-diagonal +* orthogonal updates. +* = 0: SLAQR5 does not accumulate reflections and does not +* use matrix-matrix multiply to update far-from-diagonal +* matrix entries. +* = 1: SLAQR5 accumulates reflections and uses matrix-matrix +* multiply to update the far-from-diagonal matrix entries. +* = 2: SLAQR5 accumulates reflections, uses matrix-matrix +* multiply to update the far-from-diagonal matrix entries, +* and takes advantage of 2-by-2 block structure during +* matrix multiplies. +* +* N (input) integer scalar +* N is the order of the Hessenberg matrix H upon which this +* subroutine operates. +* +* KTOP (input) integer scalar +* KBOT (input) integer scalar +* These are the first and last rows and columns of an +* isolated diagonal block upon which the QR sweep is to be +* applied. It is assumed without a check that +* either KTOP = 1 or H(KTOP,KTOP-1) = 0 +* and +* either KBOT = N or H(KBOT+1,KBOT) = 0. +* +* NSHFTS (input) integer scalar +* NSHFTS gives the number of simultaneous shifts. NSHFTS +* must be positive and even. +* +* SR (input) REAL array of size (NSHFTS) +* SI (input) REAL array of size (NSHFTS) +* SR contains the real parts and SI contains the imaginary +* parts of the NSHFTS shifts of origin that define the +* multi-shift QR sweep. +* +* H (input/output) REAL array of size (LDH,N) +* On input H contains a Hessenberg matrix. On output a +* multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied +* to the isolated diagonal block in rows and columns KTOP +* through KBOT. +* +* LDH (input) integer scalar +* LDH is the leading dimension of H just as declared in the +* calling procedure. LDH.GE.MAX(1,N). +* +* ILOZ (input) INTEGER +* IHIZ (input) INTEGER +* Specify the rows of Z to which transformations must be +* applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N +* +* Z (input/output) REAL array of size (LDZ,IHI) +* If WANTZ = .TRUE., then the QR Sweep orthogonal +* similarity transformation is accumulated into +* Z(ILOZ:IHIZ,ILO:IHI) from the right. +* If WANTZ = .FALSE., then Z is unreferenced. +* +* LDZ (input) integer scalar +* LDA is the leading dimension of Z just as declared in +* the calling procedure. LDZ.GE.N. +* +* V (workspace) REAL array of size (LDV,NSHFTS/2) +* +* LDV (input) integer scalar +* LDV is the leading dimension of V as declared in the +* calling procedure. LDV.GE.3. +* +* U (workspace) REAL array of size +* (LDU,3*NSHFTS-3) +* +* LDU (input) integer scalar +* LDU is the leading dimension of U just as declared in the +* in the calling subroutine. LDU.GE.3*NSHFTS-3. +* +* NH (input) integer scalar +* NH is the number of columns in array WH available for +* workspace. NH.GE.1. +* +* WH (workspace) REAL array of size (LDWH,NH) +* +* LDWH (input) integer scalar +* Leading dimension of WH just as declared in the +* calling procedure. LDWH.GE.3*NSHFTS-3. +* +* NV (input) integer scalar +* NV is the number of rows in WV agailable for workspace. +* NV.GE.1. +* +* WV (workspace) REAL array of size +* (LDWV,3*NSHFTS-3) +* +* LDWV (input) integer scalar +* LDWV is the leading dimension of WV as declared in the +* in the calling subroutine. LDWV.GE.NV. +* +* +* ================================================================ +* Based on contributions by +* Karen Braman and Ralph Byers, Department of Mathematics, +* University of Kansas, USA +* +* ============================================================ +* Reference: +* +* K. Braman, R. Byers and R. Mathias, The Multi-Shift QR +* Algorithm Part I: Maintaining Well Focused Shifts, and +* Level 3 Performance, SIAM Journal of Matrix Analysis, +* volume 23, pages 929--947, 2002. +* +* ============================================================ +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0e0, ONE = 1.0e0 ) +* .. +* .. Local Scalars .. + REAL ALPHA, BETA, H11, H12, H21, H22, REFSUM, + $ SAFMAX, SAFMIN, SCL, SMLNUM, SWAP, TST1, TST2, + $ ULP + INTEGER I, I2, I4, INCOL, J, J2, J4, JBOT, JCOL, JLEN, + $ JROW, JTOP, K, K1, KDU, KMS, KNZ, KRCOL, KZS, + $ M, M22, MBOT, MEND, MSTART, MTOP, NBMPS, NDCOL, + $ NS, NU + LOGICAL ACCUM, BLK22, BMP22 +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. Intrinsic Functions .. +* + INTRINSIC ABS, MAX, MIN, MOD, REAL +* .. +* .. Local Arrays .. + REAL VT( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL SGEMM, SLABAD, SLACPY, SLAQR1, SLARFG, SLASET, + $ STRMM +* .. +* .. Executable Statements .. +* +* ==== If there are no shifts, then there is nothing to do. ==== +* + IF( NSHFTS.LT.2 ) + $ RETURN +* +* ==== If the active block is empty or 1-by-1, then there +* . is nothing to do. ==== +* + IF( KTOP.GE.KBOT ) + $ RETURN +* +* ==== Shuffle shifts into pairs of real shifts and pairs +* . of complex conjugate shifts assuming complex +* . conjugate shifts are already adjacent to one +* . another. ==== +* + DO 10 I = 1, NSHFTS - 2, 2 + IF( SI( I ).NE.-SI( I+1 ) ) THEN +* + SWAP = SR( I ) + SR( I ) = SR( I+1 ) + SR( I+1 ) = SR( I+2 ) + SR( I+2 ) = SWAP +* + SWAP = SI( I ) + SI( I ) = SI( I+1 ) + SI( I+1 ) = SI( I+2 ) + SI( I+2 ) = SWAP + END IF + 10 CONTINUE +* +* ==== NSHFTS is supposed to be even, but if is odd, +* . then simply reduce it by one. The shuffle above +* . ensures that the dropped shift is real and that +* . the remaining shifts are paired. ==== +* + NS = NSHFTS - MOD( NSHFTS, 2 ) +* +* ==== Machine constants for deflation ==== +* + SAFMIN = SLAMCH( 'SAFE MINIMUM' ) + SAFMAX = ONE / SAFMIN + CALL SLABAD( SAFMIN, SAFMAX ) + ULP = SLAMCH( 'PRECISION' ) + SMLNUM = SAFMIN*( REAL( N ) / ULP ) +* +* ==== Use accumulated reflections to update far-from-diagonal +* . entries ? ==== +* + ACCUM = ( KACC22.EQ.1 ) .OR. ( KACC22.EQ.2 ) +* +* ==== If so, exploit the 2-by-2 block structure? ==== +* + BLK22 = ( NS.GT.2 ) .AND. ( KACC22.EQ.2 ) +* +* ==== clear trash ==== +* + IF( KTOP+2.LE.KBOT ) + $ H( KTOP+2, KTOP ) = ZERO +* +* ==== NBMPS = number of 2-shift bulges in the chain ==== +* + NBMPS = NS / 2 +* +* ==== KDU = width of slab ==== +* + KDU = 6*NBMPS - 3 +* +* ==== Create and chase chains of NBMPS bulges ==== +* + DO 220 INCOL = 3*( 1-NBMPS ) + KTOP - 1, KBOT - 2, 3*NBMPS - 2 + NDCOL = INCOL + KDU + IF( ACCUM ) + $ CALL SLASET( 'ALL', KDU, KDU, ZERO, ONE, U, LDU ) +* +* ==== Near-the-diagonal bulge chase. The following loop +* . performs the near-the-diagonal part of a small bulge +* . multi-shift QR sweep. Each 6*NBMPS-2 column diagonal +* . chunk extends from column INCOL to column NDCOL +* . (including both column INCOL and column NDCOL). The +* . following loop chases a 3*NBMPS column long chain of +* . NBMPS bulges 3*NBMPS-2 columns to the right. (INCOL +* . may be less than KTOP and and NDCOL may be greater than +* . KBOT indicating phantom columns from which to chase +* . bulges before they are actually introduced or to which +* . to chase bulges beyond column KBOT.) ==== +* + DO 150 KRCOL = INCOL, MIN( INCOL+3*NBMPS-3, KBOT-2 ) +* +* ==== Bulges number MTOP to MBOT are active double implicit +* . shift bulges. There may or may not also be small +* . 2-by-2 bulge, if there is room. The inactive bulges +* . (if any) must wait until the active bulges have moved +* . down the diagonal to make room. The phantom matrix +* . paradigm described above helps keep track. ==== +* + MTOP = MAX( 1, ( ( KTOP-1 )-KRCOL+2 ) / 3+1 ) + MBOT = MIN( NBMPS, ( KBOT-KRCOL ) / 3 ) + M22 = MBOT + 1 + BMP22 = ( MBOT.LT.NBMPS ) .AND. ( KRCOL+3*( M22-1 ) ).EQ. + $ ( KBOT-2 ) +* +* ==== Generate reflections to chase the chain right +* . one column. (The minimum value of K is KTOP-1.) ==== +* + DO 20 M = MTOP, MBOT + K = KRCOL + 3*( M-1 ) + IF( K.EQ.KTOP-1 ) THEN + CALL SLAQR1( 3, H( KTOP, KTOP ), LDH, SR( 2*M-1 ), + $ SI( 2*M-1 ), SR( 2*M ), SI( 2*M ), + $ V( 1, M ) ) + ALPHA = V( 1, M ) + CALL SLARFG( 3, ALPHA, V( 2, M ), 1, V( 1, M ) ) + ELSE + BETA = H( K+1, K ) + V( 2, M ) = H( K+2, K ) + V( 3, M ) = H( K+3, K ) + CALL SLARFG( 3, BETA, V( 2, M ), 1, V( 1, M ) ) +* +* ==== A Bulge may collapse because of vigilant +* . deflation or destructive underflow. (The +* . initial bulge is always collapsed.) Use +* . the two-small-subdiagonals trick to try +* . to get it started again. If V(2,M).NE.0 and +* . V(3,M) = H(K+3,K+1) = H(K+3,K+2) = 0, then +* . this bulge is collapsing into a zero +* . subdiagonal. It will be restarted next +* . trip through the loop.) +* + IF( V( 1, M ).NE.ZERO .AND. + $ ( V( 3, M ).NE.ZERO .OR. ( H( K+3, + $ K+1 ).EQ.ZERO .AND. H( K+3, K+2 ).EQ.ZERO ) ) ) + $ THEN +* +* ==== Typical case: not collapsed (yet). ==== +* + H( K+1, K ) = BETA + H( K+2, K ) = ZERO + H( K+3, K ) = ZERO + ELSE +* +* ==== Atypical case: collapsed. Attempt to +* . reintroduce ignoring H(K+1,K). If the +* . fill resulting from the new reflector +* . is too large, then abandon it. +* . Otherwise, use the new one. ==== +* + CALL SLAQR1( 3, H( K+1, K+1 ), LDH, SR( 2*M-1 ), + $ SI( 2*M-1 ), SR( 2*M ), SI( 2*M ), + $ VT ) + SCL = ABS( VT( 1 ) ) + ABS( VT( 2 ) ) + + $ ABS( VT( 3 ) ) + IF( SCL.NE.ZERO ) THEN + VT( 1 ) = VT( 1 ) / SCL + VT( 2 ) = VT( 2 ) / SCL + VT( 3 ) = VT( 3 ) / SCL + END IF +* +* ==== The following is the traditional and +* . conservative two-small-subdiagonals +* . test. ==== +* . + IF( ABS( H( K+1, K ) )*( ABS( VT( 2 ) )+ + $ ABS( VT( 3 ) ) ).GT.ULP*ABS( VT( 1 ) )* + $ ( ABS( H( K, K ) )+ABS( H( K+1, + $ K+1 ) )+ABS( H( K+2, K+2 ) ) ) ) THEN +* +* ==== Starting a new bulge here would +* . create non-negligible fill. If +* . the old reflector is diagonal (only +* . possible with underflows), then +* . change it to I. Otherwise, use +* . it with trepidation. ==== +* + IF( V( 2, M ).EQ.ZERO .AND. V( 3, M ).EQ.ZERO ) + $ THEN + V( 1, M ) = ZERO + ELSE + H( K+1, K ) = BETA + H( K+2, K ) = ZERO + H( K+3, K ) = ZERO + END IF + ELSE +* +* ==== Stating a new bulge here would +* . create only negligible fill. +* . Replace the old reflector with +* . the new one. ==== +* + ALPHA = VT( 1 ) + CALL SLARFG( 3, ALPHA, VT( 2 ), 1, VT( 1 ) ) + REFSUM = H( K+1, K ) + H( K+2, K )*VT( 2 ) + + $ H( K+3, K )*VT( 3 ) + H( K+1, K ) = H( K+1, K ) - VT( 1 )*REFSUM + H( K+2, K ) = ZERO + H( K+3, K ) = ZERO + V( 1, M ) = VT( 1 ) + V( 2, M ) = VT( 2 ) + V( 3, M ) = VT( 3 ) + END IF + END IF + END IF + 20 CONTINUE +* +* ==== Generate a 2-by-2 reflection, if needed. ==== +* + K = KRCOL + 3*( M22-1 ) + IF( BMP22 ) THEN + IF( K.EQ.KTOP-1 ) THEN + CALL SLAQR1( 2, H( K+1, K+1 ), LDH, SR( 2*M22-1 ), + $ SI( 2*M22-1 ), SR( 2*M22 ), SI( 2*M22 ), + $ V( 1, M22 ) ) + BETA = V( 1, M22 ) + CALL SLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) ) + ELSE + BETA = H( K+1, K ) + V( 2, M22 ) = H( K+2, K ) + CALL SLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) ) + H( K+1, K ) = BETA + H( K+2, K ) = ZERO + END IF + ELSE +* +* ==== Initialize V(1,M22) here to avoid possible undefined +* . variable problems later. ==== +* + V( 1, M22 ) = ZERO + END IF +* +* ==== Multiply H by reflections from the left ==== +* + IF( ACCUM ) THEN + JBOT = MIN( NDCOL, KBOT ) + ELSE IF( WANTT ) THEN + JBOT = N + ELSE + JBOT = KBOT + END IF + DO 40 J = MAX( KTOP, KRCOL ), JBOT + MEND = MIN( MBOT, ( J-KRCOL+2 ) / 3 ) + DO 30 M = MTOP, MEND + K = KRCOL + 3*( M-1 ) + REFSUM = V( 1, M )*( H( K+1, J )+V( 2, M )* + $ H( K+2, J )+V( 3, M )*H( K+3, J ) ) + H( K+1, J ) = H( K+1, J ) - REFSUM + H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M ) + H( K+3, J ) = H( K+3, J ) - REFSUM*V( 3, M ) + 30 CONTINUE + 40 CONTINUE + IF( BMP22 ) THEN + K = KRCOL + 3*( M22-1 ) + DO 50 J = MAX( K+1, KTOP ), JBOT + REFSUM = V( 1, M22 )*( H( K+1, J )+V( 2, M22 )* + $ H( K+2, J ) ) + H( K+1, J ) = H( K+1, J ) - REFSUM + H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M22 ) + 50 CONTINUE + END IF +* +* ==== Multiply H by reflections from the right. +* . Delay filling in the last row until the +* . vigilant deflation check is complete. ==== +* + IF( ACCUM ) THEN + JTOP = MAX( KTOP, INCOL ) + ELSE IF( WANTT ) THEN + JTOP = 1 + ELSE + JTOP = KTOP + END IF + DO 90 M = MTOP, MBOT + IF( V( 1, M ).NE.ZERO ) THEN + K = KRCOL + 3*( M-1 ) + DO 60 J = JTOP, MIN( KBOT, K+3 ) + REFSUM = V( 1, M )*( H( J, K+1 )+V( 2, M )* + $ H( J, K+2 )+V( 3, M )*H( J, K+3 ) ) + H( J, K+1 ) = H( J, K+1 ) - REFSUM + H( J, K+2 ) = H( J, K+2 ) - REFSUM*V( 2, M ) + H( J, K+3 ) = H( J, K+3 ) - REFSUM*V( 3, M ) + 60 CONTINUE +* + IF( ACCUM ) THEN +* +* ==== Accumulate U. (If necessary, update Z later +* . with with an efficient matrix-matrix +* . multiply.) ==== +* + KMS = K - INCOL + DO 70 J = MAX( 1, KTOP-INCOL ), KDU + REFSUM = V( 1, M )*( U( J, KMS+1 )+V( 2, M )* + $ U( J, KMS+2 )+V( 3, M )*U( J, KMS+3 ) ) + U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM + U( J, KMS+2 ) = U( J, KMS+2 ) - REFSUM*V( 2, M ) + U( J, KMS+3 ) = U( J, KMS+3 ) - REFSUM*V( 3, M ) + 70 CONTINUE + ELSE IF( WANTZ ) THEN +* +* ==== U is not accumulated, so update Z +* . now by multiplying by reflections +* . from the right. ==== +* + DO 80 J = ILOZ, IHIZ + REFSUM = V( 1, M )*( Z( J, K+1 )+V( 2, M )* + $ Z( J, K+2 )+V( 3, M )*Z( J, K+3 ) ) + Z( J, K+1 ) = Z( J, K+1 ) - REFSUM + Z( J, K+2 ) = Z( J, K+2 ) - REFSUM*V( 2, M ) + Z( J, K+3 ) = Z( J, K+3 ) - REFSUM*V( 3, M ) + 80 CONTINUE + END IF + END IF + 90 CONTINUE +* +* ==== Special case: 2-by-2 reflection (if needed) ==== +* + K = KRCOL + 3*( M22-1 ) + IF( BMP22 .AND. ( V( 1, M22 ).NE.ZERO ) ) THEN + DO 100 J = JTOP, MIN( KBOT, K+3 ) + REFSUM = V( 1, M22 )*( H( J, K+1 )+V( 2, M22 )* + $ H( J, K+2 ) ) + H( J, K+1 ) = H( J, K+1 ) - REFSUM + H( J, K+2 ) = H( J, K+2 ) - REFSUM*V( 2, M22 ) + 100 CONTINUE +* + IF( ACCUM ) THEN + KMS = K - INCOL + DO 110 J = MAX( 1, KTOP-INCOL ), KDU + REFSUM = V( 1, M22 )*( U( J, KMS+1 )+V( 2, M22 )* + $ U( J, KMS+2 ) ) + U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM + U( J, KMS+2 ) = U( J, KMS+2 ) - REFSUM*V( 2, M22 ) + 110 CONTINUE + ELSE IF( WANTZ ) THEN + DO 120 J = ILOZ, IHIZ + REFSUM = V( 1, M22 )*( Z( J, K+1 )+V( 2, M22 )* + $ Z( J, K+2 ) ) + Z( J, K+1 ) = Z( J, K+1 ) - REFSUM + Z( J, K+2 ) = Z( J, K+2 ) - REFSUM*V( 2, M22 ) + 120 CONTINUE + END IF + END IF +* +* ==== Vigilant deflation check ==== +* + MSTART = MTOP + IF( KRCOL+3*( MSTART-1 ).LT.KTOP ) + $ MSTART = MSTART + 1 + MEND = MBOT + IF( BMP22 ) + $ MEND = MEND + 1 + IF( KRCOL.EQ.KBOT-2 ) + $ MEND = MEND + 1 + DO 130 M = MSTART, MEND + K = MIN( KBOT-1, KRCOL+3*( M-1 ) ) +* +* ==== The following convergence test requires that +* . the tradition small-compared-to-nearby-diagonals +* . criterion and the Ahues & Tisseur (LAWN 122, 1997) +* . criteria both be satisfied. The latter improves +* . accuracy in some examples. Falling back on an +* . alternate convergence criterion when TST1 or TST2 +* . is zero (as done here) is traditional but probably +* . unnecessary. ==== +* + IF( H( K+1, K ).NE.ZERO ) THEN + TST1 = ABS( H( K, K ) ) + ABS( H( K+1, K+1 ) ) + IF( TST1.EQ.ZERO ) THEN + IF( K.GE.KTOP+1 ) + $ TST1 = TST1 + ABS( H( K, K-1 ) ) + IF( K.GE.KTOP+2 ) + $ TST1 = TST1 + ABS( H( K, K-2 ) ) + IF( K.GE.KTOP+3 ) + $ TST1 = TST1 + ABS( H( K, K-3 ) ) + IF( K.LE.KBOT-2 ) + $ TST1 = TST1 + ABS( H( K+2, K+1 ) ) + IF( K.LE.KBOT-3 ) + $ TST1 = TST1 + ABS( H( K+3, K+1 ) ) + IF( K.LE.KBOT-4 ) + $ TST1 = TST1 + ABS( H( K+4, K+1 ) ) + END IF + IF( ABS( H( K+1, K ) ).LE.MAX( SMLNUM, ULP*TST1 ) ) + $ THEN + H12 = MAX( ABS( H( K+1, K ) ), ABS( H( K, K+1 ) ) ) + H21 = MIN( ABS( H( K+1, K ) ), ABS( H( K, K+1 ) ) ) + H11 = MAX( ABS( H( K+1, K+1 ) ), + $ ABS( H( K, K )-H( K+1, K+1 ) ) ) + H22 = MIN( ABS( H( K+1, K+1 ) ), + $ ABS( H( K, K )-H( K+1, K+1 ) ) ) + SCL = H11 + H12 + TST2 = H22*( H11 / SCL ) +* + IF( TST2.EQ.ZERO .OR. H21*( H12 / SCL ).LE. + $ MAX( SMLNUM, ULP*TST2 ) )H( K+1, K ) = ZERO + END IF + END IF + 130 CONTINUE +* +* ==== Fill in the last row of each bulge. ==== +* + MEND = MIN( NBMPS, ( KBOT-KRCOL-1 ) / 3 ) + DO 140 M = MTOP, MEND + K = KRCOL + 3*( M-1 ) + REFSUM = V( 1, M )*V( 3, M )*H( K+4, K+3 ) + H( K+4, K+1 ) = -REFSUM + H( K+4, K+2 ) = -REFSUM*V( 2, M ) + H( K+4, K+3 ) = H( K+4, K+3 ) - REFSUM*V( 3, M ) + 140 CONTINUE +* +* ==== End of near-the-diagonal bulge chase. ==== +* + 150 CONTINUE +* +* ==== Use U (if accumulated) to update far-from-diagonal +* . entries in H. If required, use U to update Z as +* . well. ==== +* + IF( ACCUM ) THEN + IF( WANTT ) THEN + JTOP = 1 + JBOT = N + ELSE + JTOP = KTOP + JBOT = KBOT + END IF + IF( ( .NOT.BLK22 ) .OR. ( INCOL.LT.KTOP ) .OR. + $ ( NDCOL.GT.KBOT ) .OR. ( NS.LE.2 ) ) THEN +* +* ==== Updates not exploiting the 2-by-2 block +* . structure of U. K1 and NU keep track of +* . the location and size of U in the special +* . cases of introducing bulges and chasing +* . bulges off the bottom. In these special +* . cases and in case the number of shifts +* . is NS = 2, there is no 2-by-2 block +* . structure to exploit. ==== +* + K1 = MAX( 1, KTOP-INCOL ) + NU = ( KDU-MAX( 0, NDCOL-KBOT ) ) - K1 + 1 +* +* ==== Horizontal Multiply ==== +* + DO 160 JCOL = MIN( NDCOL, KBOT ) + 1, JBOT, NH + JLEN = MIN( NH, JBOT-JCOL+1 ) + CALL SGEMM( 'C', 'N', NU, JLEN, NU, ONE, U( K1, K1 ), + $ LDU, H( INCOL+K1, JCOL ), LDH, ZERO, WH, + $ LDWH ) + CALL SLACPY( 'ALL', NU, JLEN, WH, LDWH, + $ H( INCOL+K1, JCOL ), LDH ) + 160 CONTINUE +* +* ==== Vertical multiply ==== +* + DO 170 JROW = JTOP, MAX( KTOP, INCOL ) - 1, NV + JLEN = MIN( NV, MAX( KTOP, INCOL )-JROW ) + CALL SGEMM( 'N', 'N', JLEN, NU, NU, ONE, + $ H( JROW, INCOL+K1 ), LDH, U( K1, K1 ), + $ LDU, ZERO, WV, LDWV ) + CALL SLACPY( 'ALL', JLEN, NU, WV, LDWV, + $ H( JROW, INCOL+K1 ), LDH ) + 170 CONTINUE +* +* ==== Z multiply (also vertical) ==== +* + IF( WANTZ ) THEN + DO 180 JROW = ILOZ, IHIZ, NV + JLEN = MIN( NV, IHIZ-JROW+1 ) + CALL SGEMM( 'N', 'N', JLEN, NU, NU, ONE, + $ Z( JROW, INCOL+K1 ), LDZ, U( K1, K1 ), + $ LDU, ZERO, WV, LDWV ) + CALL SLACPY( 'ALL', JLEN, NU, WV, LDWV, + $ Z( JROW, INCOL+K1 ), LDZ ) + 180 CONTINUE + END IF + ELSE +* +* ==== Updates exploiting U's 2-by-2 block structure. +* . (I2, I4, J2, J4 are the last rows and columns +* . of the blocks.) ==== +* + I2 = ( KDU+1 ) / 2 + I4 = KDU + J2 = I4 - I2 + J4 = KDU +* +* ==== KZS and KNZ deal with the band of zeros +* . along the diagonal of one of the triangular +* . blocks. ==== +* + KZS = ( J4-J2 ) - ( NS+1 ) + KNZ = NS + 1 +* +* ==== Horizontal multiply ==== +* + DO 190 JCOL = MIN( NDCOL, KBOT ) + 1, JBOT, NH + JLEN = MIN( NH, JBOT-JCOL+1 ) +* +* ==== Copy bottom of H to top+KZS of scratch ==== +* (The first KZS rows get multiplied by zero.) ==== +* + CALL SLACPY( 'ALL', KNZ, JLEN, H( INCOL+1+J2, JCOL ), + $ LDH, WH( KZS+1, 1 ), LDWH ) +* +* ==== Multiply by U21' ==== +* + CALL SLASET( 'ALL', KZS, JLEN, ZERO, ZERO, WH, LDWH ) + CALL STRMM( 'L', 'U', 'C', 'N', KNZ, JLEN, ONE, + $ U( J2+1, 1+KZS ), LDU, WH( KZS+1, 1 ), + $ LDWH ) +* +* ==== Multiply top of H by U11' ==== +* + CALL SGEMM( 'C', 'N', I2, JLEN, J2, ONE, U, LDU, + $ H( INCOL+1, JCOL ), LDH, ONE, WH, LDWH ) +* +* ==== Copy top of H bottom of WH ==== +* + CALL SLACPY( 'ALL', J2, JLEN, H( INCOL+1, JCOL ), LDH, + $ WH( I2+1, 1 ), LDWH ) +* +* ==== Multiply by U21' ==== +* + CALL STRMM( 'L', 'L', 'C', 'N', J2, JLEN, ONE, + $ U( 1, I2+1 ), LDU, WH( I2+1, 1 ), LDWH ) +* +* ==== Multiply by U22 ==== +* + CALL SGEMM( 'C', 'N', I4-I2, JLEN, J4-J2, ONE, + $ U( J2+1, I2+1 ), LDU, + $ H( INCOL+1+J2, JCOL ), LDH, ONE, + $ WH( I2+1, 1 ), LDWH ) +* +* ==== Copy it back ==== +* + CALL SLACPY( 'ALL', KDU, JLEN, WH, LDWH, + $ H( INCOL+1, JCOL ), LDH ) + 190 CONTINUE +* +* ==== Vertical multiply ==== +* + DO 200 JROW = JTOP, MAX( INCOL, KTOP ) - 1, NV + JLEN = MIN( NV, MAX( INCOL, KTOP )-JROW ) +* +* ==== Copy right of H to scratch (the first KZS +* . columns get multiplied by zero) ==== +* + CALL SLACPY( 'ALL', JLEN, KNZ, H( JROW, INCOL+1+J2 ), + $ LDH, WV( 1, 1+KZS ), LDWV ) +* +* ==== Multiply by U21 ==== +* + CALL SLASET( 'ALL', JLEN, KZS, ZERO, ZERO, WV, LDWV ) + CALL STRMM( 'R', 'U', 'N', 'N', JLEN, KNZ, ONE, + $ U( J2+1, 1+KZS ), LDU, WV( 1, 1+KZS ), + $ LDWV ) +* +* ==== Multiply by U11 ==== +* + CALL SGEMM( 'N', 'N', JLEN, I2, J2, ONE, + $ H( JROW, INCOL+1 ), LDH, U, LDU, ONE, WV, + $ LDWV ) +* +* ==== Copy left of H to right of scratch ==== +* + CALL SLACPY( 'ALL', JLEN, J2, H( JROW, INCOL+1 ), LDH, + $ WV( 1, 1+I2 ), LDWV ) +* +* ==== Multiply by U21 ==== +* + CALL STRMM( 'R', 'L', 'N', 'N', JLEN, I4-I2, ONE, + $ U( 1, I2+1 ), LDU, WV( 1, 1+I2 ), LDWV ) +* +* ==== Multiply by U22 ==== +* + CALL SGEMM( 'N', 'N', JLEN, I4-I2, J4-J2, ONE, + $ H( JROW, INCOL+1+J2 ), LDH, + $ U( J2+1, I2+1 ), LDU, ONE, WV( 1, 1+I2 ), + $ LDWV ) +* +* ==== Copy it back ==== +* + CALL SLACPY( 'ALL', JLEN, KDU, WV, LDWV, + $ H( JROW, INCOL+1 ), LDH ) + 200 CONTINUE +* +* ==== Multiply Z (also vertical) ==== +* + IF( WANTZ ) THEN + DO 210 JROW = ILOZ, IHIZ, NV + JLEN = MIN( NV, IHIZ-JROW+1 ) +* +* ==== Copy right of Z to left of scratch (first +* . KZS columns get multiplied by zero) ==== +* + CALL SLACPY( 'ALL', JLEN, KNZ, + $ Z( JROW, INCOL+1+J2 ), LDZ, + $ WV( 1, 1+KZS ), LDWV ) +* +* ==== Multiply by U12 ==== +* + CALL SLASET( 'ALL', JLEN, KZS, ZERO, ZERO, WV, + $ LDWV ) + CALL STRMM( 'R', 'U', 'N', 'N', JLEN, KNZ, ONE, + $ U( J2+1, 1+KZS ), LDU, WV( 1, 1+KZS ), + $ LDWV ) +* +* ==== Multiply by U11 ==== +* + CALL SGEMM( 'N', 'N', JLEN, I2, J2, ONE, + $ Z( JROW, INCOL+1 ), LDZ, U, LDU, ONE, + $ WV, LDWV ) +* +* ==== Copy left of Z to right of scratch ==== +* + CALL SLACPY( 'ALL', JLEN, J2, Z( JROW, INCOL+1 ), + $ LDZ, WV( 1, 1+I2 ), LDWV ) +* +* ==== Multiply by U21 ==== +* + CALL STRMM( 'R', 'L', 'N', 'N', JLEN, I4-I2, ONE, + $ U( 1, I2+1 ), LDU, WV( 1, 1+I2 ), + $ LDWV ) +* +* ==== Multiply by U22 ==== +* + CALL SGEMM( 'N', 'N', JLEN, I4-I2, J4-J2, ONE, + $ Z( JROW, INCOL+1+J2 ), LDZ, + $ U( J2+1, I2+1 ), LDU, ONE, + $ WV( 1, 1+I2 ), LDWV ) +* +* ==== Copy the result back to Z ==== +* + CALL SLACPY( 'ALL', JLEN, KDU, WV, LDWV, + $ Z( JROW, INCOL+1 ), LDZ ) + 210 CONTINUE + END IF + END IF + END IF + 220 CONTINUE +* +* ==== End of SLAQR5 ==== +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slarf.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slarf.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,115 @@ + SUBROUTINE SLARF( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER INCV, LDC, M, N + REAL TAU +* .. +* .. Array Arguments .. + REAL C( LDC, * ), V( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SLARF applies a real elementary reflector H to a real m by n matrix +* C, from either the left or the right. H is represented in the form +* +* H = I - tau * v * v' +* +* where tau is a real scalar and v is a real vector. +* +* If tau = 0, then H is taken to be the unit matrix. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': form H * C +* = 'R': form C * H +* +* M (input) INTEGER +* The number of rows of the matrix C. +* +* N (input) INTEGER +* The number of columns of the matrix C. +* +* V (input) REAL array, dimension +* (1 + (M-1)*abs(INCV)) if SIDE = 'L' +* or (1 + (N-1)*abs(INCV)) if SIDE = 'R' +* The vector v in the representation of H. V is not used if +* TAU = 0. +* +* INCV (input) INTEGER +* The increment between elements of v. INCV <> 0. +* +* TAU (input) REAL +* The value tau in the representation of H. +* +* C (input/output) REAL array, dimension (LDC,N) +* On entry, the m by n matrix C. +* On exit, C is overwritten by the matrix H * C if SIDE = 'L', +* or C * H if SIDE = 'R'. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace) REAL array, dimension +* (N) if SIDE = 'L' +* or (M) if SIDE = 'R' +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. External Subroutines .. + EXTERNAL SGEMV, SGER +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C +* + IF( TAU.NE.ZERO ) THEN +* +* w := C' * v +* + CALL SGEMV( 'Transpose', M, N, ONE, C, LDC, V, INCV, ZERO, + $ WORK, 1 ) +* +* C := C - v * w' +* + CALL SGER( M, N, -TAU, V, INCV, WORK, 1, C, LDC ) + END IF + ELSE +* +* Form C * H +* + IF( TAU.NE.ZERO ) THEN +* +* w := C * v +* + CALL SGEMV( 'No transpose', M, N, ONE, C, LDC, V, INCV, + $ ZERO, WORK, 1 ) +* +* C := C - w * v' +* + CALL SGER( M, N, -TAU, WORK, 1, V, INCV, C, LDC ) + END IF + END IF + RETURN +* +* End of SLARF +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slarfb.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slarfb.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,587 @@ + SUBROUTINE SLARFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, LDV, + $ T, LDT, C, LDC, WORK, LDWORK ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, SIDE, STOREV, TRANS + INTEGER K, LDC, LDT, LDV, LDWORK, M, N +* .. +* .. Array Arguments .. + REAL C( LDC, * ), T( LDT, * ), V( LDV, * ), + $ WORK( LDWORK, * ) +* .. +* +* Purpose +* ======= +* +* SLARFB applies a real block reflector H or its transpose H' to a +* real m by n matrix C, from either the left or the right. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply H or H' from the Left +* = 'R': apply H or H' from the Right +* +* TRANS (input) CHARACTER*1 +* = 'N': apply H (No transpose) +* = 'T': apply H' (Transpose) +* +* DIRECT (input) CHARACTER*1 +* Indicates how H is formed from a product of elementary +* reflectors +* = 'F': H = H(1) H(2) . . . H(k) (Forward) +* = 'B': H = H(k) . . . H(2) H(1) (Backward) +* +* STOREV (input) CHARACTER*1 +* Indicates how the vectors which define the elementary +* reflectors are stored: +* = 'C': Columnwise +* = 'R': Rowwise +* +* M (input) INTEGER +* The number of rows of the matrix C. +* +* N (input) INTEGER +* The number of columns of the matrix C. +* +* K (input) INTEGER +* The order of the matrix T (= the number of elementary +* reflectors whose product defines the block reflector). +* +* V (input) REAL array, dimension +* (LDV,K) if STOREV = 'C' +* (LDV,M) if STOREV = 'R' and SIDE = 'L' +* (LDV,N) if STOREV = 'R' and SIDE = 'R' +* The matrix V. See further details. +* +* LDV (input) INTEGER +* The leading dimension of the array V. +* If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M); +* if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N); +* if STOREV = 'R', LDV >= K. +* +* T (input) REAL array, dimension (LDT,K) +* The triangular k by k matrix T in the representation of the +* block reflector. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= K. +* +* C (input/output) REAL array, dimension (LDC,N) +* On entry, the m by n matrix C. +* On exit, C is overwritten by H*C or H'*C or C*H or C*H'. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDA >= max(1,M). +* +* WORK (workspace) REAL array, dimension (LDWORK,K) +* +* LDWORK (input) INTEGER +* The leading dimension of the array WORK. +* If SIDE = 'L', LDWORK >= max(1,N); +* if SIDE = 'R', LDWORK >= max(1,M). +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + CHARACTER TRANST + INTEGER I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEMM, STRMM +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) + $ RETURN +* + IF( LSAME( TRANS, 'N' ) ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF +* + IF( LSAME( STOREV, 'C' ) ) THEN +* + IF( LSAME( DIRECT, 'F' ) ) THEN +* +* Let V = ( V1 ) (first K rows) +* ( V2 ) +* where V1 is unit lower triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H' * C where C = ( C1 ) +* ( C2 ) +* +* W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK) +* +* W := C1' +* + DO 10 J = 1, K + CALL SCOPY( N, C( J, 1 ), LDC, WORK( 1, J ), 1 ) + 10 CONTINUE +* +* W := W * V1 +* + CALL STRMM( 'Right', 'Lower', 'No transpose', 'Unit', N, + $ K, ONE, V, LDV, WORK, LDWORK ) + IF( M.GT.K ) THEN +* +* W := W + C2'*V2 +* + CALL SGEMM( 'Transpose', 'No transpose', N, K, M-K, + $ ONE, C( K+1, 1 ), LDC, V( K+1, 1 ), LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T' or W * T +* + CALL STRMM( 'Right', 'Upper', TRANST, 'Non-unit', N, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V * W' +* + IF( M.GT.K ) THEN +* +* C2 := C2 - V2 * W' +* + CALL SGEMM( 'No transpose', 'Transpose', M-K, N, K, + $ -ONE, V( K+1, 1 ), LDV, WORK, LDWORK, ONE, + $ C( K+1, 1 ), LDC ) + END IF +* +* W := W * V1' +* + CALL STRMM( 'Right', 'Lower', 'Transpose', 'Unit', N, K, + $ ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W' +* + DO 30 J = 1, K + DO 20 I = 1, N + C( J, I ) = C( J, I ) - WORK( I, J ) + 20 CONTINUE + 30 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H' where C = ( C1 C2 ) +* +* W := C * V = (C1*V1 + C2*V2) (stored in WORK) +* +* W := C1 +* + DO 40 J = 1, K + CALL SCOPY( M, C( 1, J ), 1, WORK( 1, J ), 1 ) + 40 CONTINUE +* +* W := W * V1 +* + CALL STRMM( 'Right', 'Lower', 'No transpose', 'Unit', M, + $ K, ONE, V, LDV, WORK, LDWORK ) + IF( N.GT.K ) THEN +* +* W := W + C2 * V2 +* + CALL SGEMM( 'No transpose', 'No transpose', M, K, N-K, + $ ONE, C( 1, K+1 ), LDC, V( K+1, 1 ), LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T' +* + CALL STRMM( 'Right', 'Upper', TRANS, 'Non-unit', M, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V' +* + IF( N.GT.K ) THEN +* +* C2 := C2 - W * V2' +* + CALL SGEMM( 'No transpose', 'Transpose', M, N-K, K, + $ -ONE, WORK, LDWORK, V( K+1, 1 ), LDV, ONE, + $ C( 1, K+1 ), LDC ) + END IF +* +* W := W * V1' +* + CALL STRMM( 'Right', 'Lower', 'Transpose', 'Unit', M, K, + $ ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W +* + DO 60 J = 1, K + DO 50 I = 1, M + C( I, J ) = C( I, J ) - WORK( I, J ) + 50 CONTINUE + 60 CONTINUE + END IF +* + ELSE +* +* Let V = ( V1 ) +* ( V2 ) (last K rows) +* where V2 is unit upper triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H' * C where C = ( C1 ) +* ( C2 ) +* +* W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK) +* +* W := C2' +* + DO 70 J = 1, K + CALL SCOPY( N, C( M-K+J, 1 ), LDC, WORK( 1, J ), 1 ) + 70 CONTINUE +* +* W := W * V2 +* + CALL STRMM( 'Right', 'Upper', 'No transpose', 'Unit', N, + $ K, ONE, V( M-K+1, 1 ), LDV, WORK, LDWORK ) + IF( M.GT.K ) THEN +* +* W := W + C1'*V1 +* + CALL SGEMM( 'Transpose', 'No transpose', N, K, M-K, + $ ONE, C, LDC, V, LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T' or W * T +* + CALL STRMM( 'Right', 'Lower', TRANST, 'Non-unit', N, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V * W' +* + IF( M.GT.K ) THEN +* +* C1 := C1 - V1 * W' +* + CALL SGEMM( 'No transpose', 'Transpose', M-K, N, K, + $ -ONE, V, LDV, WORK, LDWORK, ONE, C, LDC ) + END IF +* +* W := W * V2' +* + CALL STRMM( 'Right', 'Upper', 'Transpose', 'Unit', N, K, + $ ONE, V( M-K+1, 1 ), LDV, WORK, LDWORK ) +* +* C2 := C2 - W' +* + DO 90 J = 1, K + DO 80 I = 1, N + C( M-K+J, I ) = C( M-K+J, I ) - WORK( I, J ) + 80 CONTINUE + 90 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H' where C = ( C1 C2 ) +* +* W := C * V = (C1*V1 + C2*V2) (stored in WORK) +* +* W := C2 +* + DO 100 J = 1, K + CALL SCOPY( M, C( 1, N-K+J ), 1, WORK( 1, J ), 1 ) + 100 CONTINUE +* +* W := W * V2 +* + CALL STRMM( 'Right', 'Upper', 'No transpose', 'Unit', M, + $ K, ONE, V( N-K+1, 1 ), LDV, WORK, LDWORK ) + IF( N.GT.K ) THEN +* +* W := W + C1 * V1 +* + CALL SGEMM( 'No transpose', 'No transpose', M, K, N-K, + $ ONE, C, LDC, V, LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T' +* + CALL STRMM( 'Right', 'Lower', TRANS, 'Non-unit', M, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V' +* + IF( N.GT.K ) THEN +* +* C1 := C1 - W * V1' +* + CALL SGEMM( 'No transpose', 'Transpose', M, N-K, K, + $ -ONE, WORK, LDWORK, V, LDV, ONE, C, LDC ) + END IF +* +* W := W * V2' +* + CALL STRMM( 'Right', 'Upper', 'Transpose', 'Unit', M, K, + $ ONE, V( N-K+1, 1 ), LDV, WORK, LDWORK ) +* +* C2 := C2 - W +* + DO 120 J = 1, K + DO 110 I = 1, M + C( I, N-K+J ) = C( I, N-K+J ) - WORK( I, J ) + 110 CONTINUE + 120 CONTINUE + END IF + END IF +* + ELSE IF( LSAME( STOREV, 'R' ) ) THEN +* + IF( LSAME( DIRECT, 'F' ) ) THEN +* +* Let V = ( V1 V2 ) (V1: first K columns) +* where V1 is unit upper triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H' * C where C = ( C1 ) +* ( C2 ) +* +* W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK) +* +* W := C1' +* + DO 130 J = 1, K + CALL SCOPY( N, C( J, 1 ), LDC, WORK( 1, J ), 1 ) + 130 CONTINUE +* +* W := W * V1' +* + CALL STRMM( 'Right', 'Upper', 'Transpose', 'Unit', N, K, + $ ONE, V, LDV, WORK, LDWORK ) + IF( M.GT.K ) THEN +* +* W := W + C2'*V2' +* + CALL SGEMM( 'Transpose', 'Transpose', N, K, M-K, ONE, + $ C( K+1, 1 ), LDC, V( 1, K+1 ), LDV, ONE, + $ WORK, LDWORK ) + END IF +* +* W := W * T' or W * T +* + CALL STRMM( 'Right', 'Upper', TRANST, 'Non-unit', N, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V' * W' +* + IF( M.GT.K ) THEN +* +* C2 := C2 - V2' * W' +* + CALL SGEMM( 'Transpose', 'Transpose', M-K, N, K, -ONE, + $ V( 1, K+1 ), LDV, WORK, LDWORK, ONE, + $ C( K+1, 1 ), LDC ) + END IF +* +* W := W * V1 +* + CALL STRMM( 'Right', 'Upper', 'No transpose', 'Unit', N, + $ K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W' +* + DO 150 J = 1, K + DO 140 I = 1, N + C( J, I ) = C( J, I ) - WORK( I, J ) + 140 CONTINUE + 150 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H' where C = ( C1 C2 ) +* +* W := C * V' = (C1*V1' + C2*V2') (stored in WORK) +* +* W := C1 +* + DO 160 J = 1, K + CALL SCOPY( M, C( 1, J ), 1, WORK( 1, J ), 1 ) + 160 CONTINUE +* +* W := W * V1' +* + CALL STRMM( 'Right', 'Upper', 'Transpose', 'Unit', M, K, + $ ONE, V, LDV, WORK, LDWORK ) + IF( N.GT.K ) THEN +* +* W := W + C2 * V2' +* + CALL SGEMM( 'No transpose', 'Transpose', M, K, N-K, + $ ONE, C( 1, K+1 ), LDC, V( 1, K+1 ), LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T' +* + CALL STRMM( 'Right', 'Upper', TRANS, 'Non-unit', M, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V +* + IF( N.GT.K ) THEN +* +* C2 := C2 - W * V2 +* + CALL SGEMM( 'No transpose', 'No transpose', M, N-K, K, + $ -ONE, WORK, LDWORK, V( 1, K+1 ), LDV, ONE, + $ C( 1, K+1 ), LDC ) + END IF +* +* W := W * V1 +* + CALL STRMM( 'Right', 'Upper', 'No transpose', 'Unit', M, + $ K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W +* + DO 180 J = 1, K + DO 170 I = 1, M + C( I, J ) = C( I, J ) - WORK( I, J ) + 170 CONTINUE + 180 CONTINUE +* + END IF +* + ELSE +* +* Let V = ( V1 V2 ) (V2: last K columns) +* where V2 is unit lower triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H' * C where C = ( C1 ) +* ( C2 ) +* +* W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK) +* +* W := C2' +* + DO 190 J = 1, K + CALL SCOPY( N, C( M-K+J, 1 ), LDC, WORK( 1, J ), 1 ) + 190 CONTINUE +* +* W := W * V2' +* + CALL STRMM( 'Right', 'Lower', 'Transpose', 'Unit', N, K, + $ ONE, V( 1, M-K+1 ), LDV, WORK, LDWORK ) + IF( M.GT.K ) THEN +* +* W := W + C1'*V1' +* + CALL SGEMM( 'Transpose', 'Transpose', N, K, M-K, ONE, + $ C, LDC, V, LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T' or W * T +* + CALL STRMM( 'Right', 'Lower', TRANST, 'Non-unit', N, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V' * W' +* + IF( M.GT.K ) THEN +* +* C1 := C1 - V1' * W' +* + CALL SGEMM( 'Transpose', 'Transpose', M-K, N, K, -ONE, + $ V, LDV, WORK, LDWORK, ONE, C, LDC ) + END IF +* +* W := W * V2 +* + CALL STRMM( 'Right', 'Lower', 'No transpose', 'Unit', N, + $ K, ONE, V( 1, M-K+1 ), LDV, WORK, LDWORK ) +* +* C2 := C2 - W' +* + DO 210 J = 1, K + DO 200 I = 1, N + C( M-K+J, I ) = C( M-K+J, I ) - WORK( I, J ) + 200 CONTINUE + 210 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H' where C = ( C1 C2 ) +* +* W := C * V' = (C1*V1' + C2*V2') (stored in WORK) +* +* W := C2 +* + DO 220 J = 1, K + CALL SCOPY( M, C( 1, N-K+J ), 1, WORK( 1, J ), 1 ) + 220 CONTINUE +* +* W := W * V2' +* + CALL STRMM( 'Right', 'Lower', 'Transpose', 'Unit', M, K, + $ ONE, V( 1, N-K+1 ), LDV, WORK, LDWORK ) + IF( N.GT.K ) THEN +* +* W := W + C1 * V1' +* + CALL SGEMM( 'No transpose', 'Transpose', M, K, N-K, + $ ONE, C, LDC, V, LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T' +* + CALL STRMM( 'Right', 'Lower', TRANS, 'Non-unit', M, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V +* + IF( N.GT.K ) THEN +* +* C1 := C1 - W * V1 +* + CALL SGEMM( 'No transpose', 'No transpose', M, N-K, K, + $ -ONE, WORK, LDWORK, V, LDV, ONE, C, LDC ) + END IF +* +* W := W * V2 +* + CALL STRMM( 'Right', 'Lower', 'No transpose', 'Unit', M, + $ K, ONE, V( 1, N-K+1 ), LDV, WORK, LDWORK ) +* +* C1 := C1 - W +* + DO 240 J = 1, K + DO 230 I = 1, M + C( I, N-K+J ) = C( I, N-K+J ) - WORK( I, J ) + 230 CONTINUE + 240 CONTINUE +* + END IF +* + END IF + END IF +* + RETURN +* +* End of SLARFB +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slarfg.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slarfg.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,137 @@ + SUBROUTINE SLARFG( N, ALPHA, X, INCX, TAU ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INCX, N + REAL ALPHA, TAU +* .. +* .. Array Arguments .. + REAL X( * ) +* .. +* +* Purpose +* ======= +* +* SLARFG generates a real elementary reflector H of order n, such +* that +* +* H * ( alpha ) = ( beta ), H' * H = I. +* ( x ) ( 0 ) +* +* where alpha and beta are scalars, and x is an (n-1)-element real +* vector. H is represented in the form +* +* H = I - tau * ( 1 ) * ( 1 v' ) , +* ( v ) +* +* where tau is a real scalar and v is a real (n-1)-element +* vector. +* +* If the elements of x are all zero, then tau = 0 and H is taken to be +* the unit matrix. +* +* Otherwise 1 <= tau <= 2. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the elementary reflector. +* +* ALPHA (input/output) REAL +* On entry, the value alpha. +* On exit, it is overwritten with the value beta. +* +* X (input/output) REAL array, dimension +* (1+(N-2)*abs(INCX)) +* On entry, the vector x. +* On exit, it is overwritten with the vector v. +* +* INCX (input) INTEGER +* The increment between elements of X. INCX > 0. +* +* TAU (output) REAL +* The value tau. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER J, KNT + REAL BETA, RSAFMN, SAFMIN, XNORM +* .. +* .. External Functions .. + REAL SLAMCH, SLAPY2, SNRM2 + EXTERNAL SLAMCH, SLAPY2, SNRM2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SIGN +* .. +* .. External Subroutines .. + EXTERNAL SSCAL +* .. +* .. Executable Statements .. +* + IF( N.LE.1 ) THEN + TAU = ZERO + RETURN + END IF +* + XNORM = SNRM2( N-1, X, INCX ) +* + IF( XNORM.EQ.ZERO ) THEN +* +* H = I +* + TAU = ZERO + ELSE +* +* general case +* + BETA = -SIGN( SLAPY2( ALPHA, XNORM ), ALPHA ) + SAFMIN = SLAMCH( 'S' ) / SLAMCH( 'E' ) + IF( ABS( BETA ).LT.SAFMIN ) THEN +* +* XNORM, BETA may be inaccurate; scale X and recompute them +* + RSAFMN = ONE / SAFMIN + KNT = 0 + 10 CONTINUE + KNT = KNT + 1 + CALL SSCAL( N-1, RSAFMN, X, INCX ) + BETA = BETA*RSAFMN + ALPHA = ALPHA*RSAFMN + IF( ABS( BETA ).LT.SAFMIN ) + $ GO TO 10 +* +* New BETA is at most 1, at least SAFMIN +* + XNORM = SNRM2( N-1, X, INCX ) + BETA = -SIGN( SLAPY2( ALPHA, XNORM ), ALPHA ) + TAU = ( BETA-ALPHA ) / BETA + CALL SSCAL( N-1, ONE / ( ALPHA-BETA ), X, INCX ) +* +* If ALPHA is subnormal, it may lose relative accuracy +* + ALPHA = BETA + DO 20 J = 1, KNT + ALPHA = ALPHA*SAFMIN + 20 CONTINUE + ELSE + TAU = ( BETA-ALPHA ) / BETA + CALL SSCAL( N-1, ONE / ( ALPHA-BETA ), X, INCX ) + ALPHA = BETA + END IF + END IF +* + RETURN +* +* End of SLARFG +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slarft.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slarft.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,217 @@ + SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, STOREV + INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. + REAL T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* Purpose +* ======= +* +* SLARFT forms the triangular factor T of a real block reflector H +* of order n, which is defined as a product of k elementary reflectors. +* +* If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; +* +* If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. +* +* If STOREV = 'C', the vector which defines the elementary reflector +* H(i) is stored in the i-th column of the array V, and +* +* H = I - V * T * V' +* +* If STOREV = 'R', the vector which defines the elementary reflector +* H(i) is stored in the i-th row of the array V, and +* +* H = I - V' * T * V +* +* Arguments +* ========= +* +* DIRECT (input) CHARACTER*1 +* Specifies the order in which the elementary reflectors are +* multiplied to form the block reflector: +* = 'F': H = H(1) H(2) . . . H(k) (Forward) +* = 'B': H = H(k) . . . H(2) H(1) (Backward) +* +* STOREV (input) CHARACTER*1 +* Specifies how the vectors which define the elementary +* reflectors are stored (see also Further Details): +* = 'C': columnwise +* = 'R': rowwise +* +* N (input) INTEGER +* The order of the block reflector H. N >= 0. +* +* K (input) INTEGER +* The order of the triangular factor T (= the number of +* elementary reflectors). K >= 1. +* +* V (input/output) REAL array, dimension +* (LDV,K) if STOREV = 'C' +* (LDV,N) if STOREV = 'R' +* The matrix V. See further details. +* +* LDV (input) INTEGER +* The leading dimension of the array V. +* If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. +* +* TAU (input) REAL array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i). +* +* T (output) REAL array, dimension (LDT,K) +* The k by k triangular factor T of the block reflector. +* If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is +* lower triangular. The rest of the array is not used. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= K. +* +* Further Details +* =============== +* +* The shape of the matrix V and the storage of the vectors which define +* the H(i) is best illustrated by the following example with n = 5 and +* k = 3. The elements equal to 1 are not stored; the corresponding +* array elements are modified but restored on exit. The rest of the +* array is not used. +* +* DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': +* +* V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) +* ( v1 1 ) ( 1 v2 v2 v2 ) +* ( v1 v2 1 ) ( 1 v3 v3 ) +* ( v1 v2 v3 ) +* ( v1 v2 v3 ) +* +* DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': +* +* V = ( v1 v2 v3 ) V = ( v1 v1 1 ) +* ( v1 v2 v3 ) ( v2 v2 v2 1 ) +* ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) +* ( 1 v3 ) +* ( 1 ) +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL VII +* .. +* .. External Subroutines .. + EXTERNAL SGEMV, STRMV +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 20 I = 1, K + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO 10 J = 1, I + T( J, I ) = ZERO + 10 CONTINUE + ELSE +* +* general case +* + VII = V( I, I ) + V( I, I ) = ONE + IF( LSAME( STOREV, 'C' ) ) THEN +* +* T(1:i-1,i) := - tau(i) * V(i:n,1:i-1)' * V(i:n,i) +* + CALL SGEMV( 'Transpose', N-I+1, I-1, -TAU( I ), + $ V( I, 1 ), LDV, V( I, I ), 1, ZERO, + $ T( 1, I ), 1 ) + ELSE +* +* T(1:i-1,i) := - tau(i) * V(1:i-1,i:n) * V(i,i:n)' +* + CALL SGEMV( 'No transpose', I-1, N-I+1, -TAU( I ), + $ V( 1, I ), LDV, V( I, I ), LDV, ZERO, + $ T( 1, I ), 1 ) + END IF + V( I, I ) = VII +* +* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) +* + CALL STRMV( 'Upper', 'No transpose', 'Non-unit', I-1, T, + $ LDT, T( 1, I ), 1 ) + T( I, I ) = TAU( I ) + END IF + 20 CONTINUE + ELSE + DO 40 I = K, 1, -1 + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO 30 J = I, K + T( J, I ) = ZERO + 30 CONTINUE + ELSE +* +* general case +* + IF( I.LT.K ) THEN + IF( LSAME( STOREV, 'C' ) ) THEN + VII = V( N-K+I, I ) + V( N-K+I, I ) = ONE +* +* T(i+1:k,i) := +* - tau(i) * V(1:n-k+i,i+1:k)' * V(1:n-k+i,i) +* + CALL SGEMV( 'Transpose', N-K+I, K-I, -TAU( I ), + $ V( 1, I+1 ), LDV, V( 1, I ), 1, ZERO, + $ T( I+1, I ), 1 ) + V( N-K+I, I ) = VII + ELSE + VII = V( I, N-K+I ) + V( I, N-K+I ) = ONE +* +* T(i+1:k,i) := +* - tau(i) * V(i+1:k,1:n-k+i) * V(i,1:n-k+i)' +* + CALL SGEMV( 'No transpose', K-I, N-K+I, -TAU( I ), + $ V( I+1, 1 ), LDV, V( I, 1 ), LDV, ZERO, + $ T( I+1, I ), 1 ) + V( I, N-K+I ) = VII + END IF +* +* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) +* + CALL STRMV( 'Lower', 'No transpose', 'Non-unit', K-I, + $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 ) + END IF + T( I, I ) = TAU( I ) + END IF + 40 CONTINUE + END IF + RETURN +* +* End of SLARFT +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slarfx.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slarfx.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,637 @@ + SUBROUTINE SLARFX( SIDE, M, N, V, TAU, C, LDC, WORK ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER LDC, M, N + REAL TAU +* .. +* .. Array Arguments .. + REAL C( LDC, * ), V( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SLARFX applies a real elementary reflector H to a real m by n +* matrix C, from either the left or the right. H is represented in the +* form +* +* H = I - tau * v * v' +* +* where tau is a real scalar and v is a real vector. +* +* If tau = 0, then H is taken to be the unit matrix +* +* This version uses inline code if H has order < 11. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': form H * C +* = 'R': form C * H +* +* M (input) INTEGER +* The number of rows of the matrix C. +* +* N (input) INTEGER +* The number of columns of the matrix C. +* +* V (input) REAL array, dimension (M) if SIDE = 'L' +* or (N) if SIDE = 'R' +* The vector v in the representation of H. +* +* TAU (input) REAL +* The value tau in the representation of H. +* +* C (input/output) REAL array, dimension (LDC,N) +* On entry, the m by n matrix C. +* On exit, C is overwritten by the matrix H * C if SIDE = 'L', +* or C * H if SIDE = 'R'. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDA >= (1,M). +* +* WORK (workspace) REAL array, dimension +* (N) if SIDE = 'L' +* or (M) if SIDE = 'R' +* WORK is not referenced if H has order < 11. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER J + REAL SUM, T1, T10, T2, T3, T4, T5, T6, T7, T8, T9, + $ V1, V10, V2, V3, V4, V5, V6, V7, V8, V9 +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SGEMV, SGER +* .. +* .. Executable Statements .. +* + IF( TAU.EQ.ZERO ) + $ RETURN + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C, where H has order m. +* + GO TO ( 10, 30, 50, 70, 90, 110, 130, 150, + $ 170, 190 )M +* +* Code for general M +* +* w := C'*v +* + CALL SGEMV( 'Transpose', M, N, ONE, C, LDC, V, 1, ZERO, WORK, + $ 1 ) +* +* C := C - tau * v * w' +* + CALL SGER( M, N, -TAU, V, 1, WORK, 1, C, LDC ) + GO TO 410 + 10 CONTINUE +* +* Special code for 1 x 1 Householder +* + T1 = ONE - TAU*V( 1 )*V( 1 ) + DO 20 J = 1, N + C( 1, J ) = T1*C( 1, J ) + 20 CONTINUE + GO TO 410 + 30 CONTINUE +* +* Special code for 2 x 2 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + DO 40 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + 40 CONTINUE + GO TO 410 + 50 CONTINUE +* +* Special code for 3 x 3 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + DO 60 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + 60 CONTINUE + GO TO 410 + 70 CONTINUE +* +* Special code for 4 x 4 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + DO 80 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + 80 CONTINUE + GO TO 410 + 90 CONTINUE +* +* Special code for 5 x 5 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + DO 100 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + 100 CONTINUE + GO TO 410 + 110 CONTINUE +* +* Special code for 6 x 6 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + V6 = V( 6 ) + T6 = TAU*V6 + DO 120 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + C( 6, J ) = C( 6, J ) - SUM*T6 + 120 CONTINUE + GO TO 410 + 130 CONTINUE +* +* Special code for 7 x 7 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + V6 = V( 6 ) + T6 = TAU*V6 + V7 = V( 7 ) + T7 = TAU*V7 + DO 140 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) + + $ V7*C( 7, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + C( 6, J ) = C( 6, J ) - SUM*T6 + C( 7, J ) = C( 7, J ) - SUM*T7 + 140 CONTINUE + GO TO 410 + 150 CONTINUE +* +* Special code for 8 x 8 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + V6 = V( 6 ) + T6 = TAU*V6 + V7 = V( 7 ) + T7 = TAU*V7 + V8 = V( 8 ) + T8 = TAU*V8 + DO 160 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) + + $ V7*C( 7, J ) + V8*C( 8, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + C( 6, J ) = C( 6, J ) - SUM*T6 + C( 7, J ) = C( 7, J ) - SUM*T7 + C( 8, J ) = C( 8, J ) - SUM*T8 + 160 CONTINUE + GO TO 410 + 170 CONTINUE +* +* Special code for 9 x 9 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + V6 = V( 6 ) + T6 = TAU*V6 + V7 = V( 7 ) + T7 = TAU*V7 + V8 = V( 8 ) + T8 = TAU*V8 + V9 = V( 9 ) + T9 = TAU*V9 + DO 180 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) + + $ V7*C( 7, J ) + V8*C( 8, J ) + V9*C( 9, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + C( 6, J ) = C( 6, J ) - SUM*T6 + C( 7, J ) = C( 7, J ) - SUM*T7 + C( 8, J ) = C( 8, J ) - SUM*T8 + C( 9, J ) = C( 9, J ) - SUM*T9 + 180 CONTINUE + GO TO 410 + 190 CONTINUE +* +* Special code for 10 x 10 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + V6 = V( 6 ) + T6 = TAU*V6 + V7 = V( 7 ) + T7 = TAU*V7 + V8 = V( 8 ) + T8 = TAU*V8 + V9 = V( 9 ) + T9 = TAU*V9 + V10 = V( 10 ) + T10 = TAU*V10 + DO 200 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) + + $ V7*C( 7, J ) + V8*C( 8, J ) + V9*C( 9, J ) + + $ V10*C( 10, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + C( 6, J ) = C( 6, J ) - SUM*T6 + C( 7, J ) = C( 7, J ) - SUM*T7 + C( 8, J ) = C( 8, J ) - SUM*T8 + C( 9, J ) = C( 9, J ) - SUM*T9 + C( 10, J ) = C( 10, J ) - SUM*T10 + 200 CONTINUE + GO TO 410 + ELSE +* +* Form C * H, where H has order n. +* + GO TO ( 210, 230, 250, 270, 290, 310, 330, 350, + $ 370, 390 )N +* +* Code for general N +* +* w := C * v +* + CALL SGEMV( 'No transpose', M, N, ONE, C, LDC, V, 1, ZERO, + $ WORK, 1 ) +* +* C := C - tau * w * v' +* + CALL SGER( M, N, -TAU, WORK, 1, V, 1, C, LDC ) + GO TO 410 + 210 CONTINUE +* +* Special code for 1 x 1 Householder +* + T1 = ONE - TAU*V( 1 )*V( 1 ) + DO 220 J = 1, M + C( J, 1 ) = T1*C( J, 1 ) + 220 CONTINUE + GO TO 410 + 230 CONTINUE +* +* Special code for 2 x 2 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + DO 240 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + 240 CONTINUE + GO TO 410 + 250 CONTINUE +* +* Special code for 3 x 3 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + DO 260 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + 260 CONTINUE + GO TO 410 + 270 CONTINUE +* +* Special code for 4 x 4 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + DO 280 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + 280 CONTINUE + GO TO 410 + 290 CONTINUE +* +* Special code for 5 x 5 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + DO 300 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + 300 CONTINUE + GO TO 410 + 310 CONTINUE +* +* Special code for 6 x 6 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + V6 = V( 6 ) + T6 = TAU*V6 + DO 320 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + C( J, 6 ) = C( J, 6 ) - SUM*T6 + 320 CONTINUE + GO TO 410 + 330 CONTINUE +* +* Special code for 7 x 7 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + V6 = V( 6 ) + T6 = TAU*V6 + V7 = V( 7 ) + T7 = TAU*V7 + DO 340 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) + + $ V7*C( J, 7 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + C( J, 6 ) = C( J, 6 ) - SUM*T6 + C( J, 7 ) = C( J, 7 ) - SUM*T7 + 340 CONTINUE + GO TO 410 + 350 CONTINUE +* +* Special code for 8 x 8 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + V6 = V( 6 ) + T6 = TAU*V6 + V7 = V( 7 ) + T7 = TAU*V7 + V8 = V( 8 ) + T8 = TAU*V8 + DO 360 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) + + $ V7*C( J, 7 ) + V8*C( J, 8 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + C( J, 6 ) = C( J, 6 ) - SUM*T6 + C( J, 7 ) = C( J, 7 ) - SUM*T7 + C( J, 8 ) = C( J, 8 ) - SUM*T8 + 360 CONTINUE + GO TO 410 + 370 CONTINUE +* +* Special code for 9 x 9 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + V6 = V( 6 ) + T6 = TAU*V6 + V7 = V( 7 ) + T7 = TAU*V7 + V8 = V( 8 ) + T8 = TAU*V8 + V9 = V( 9 ) + T9 = TAU*V9 + DO 380 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) + + $ V7*C( J, 7 ) + V8*C( J, 8 ) + V9*C( J, 9 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + C( J, 6 ) = C( J, 6 ) - SUM*T6 + C( J, 7 ) = C( J, 7 ) - SUM*T7 + C( J, 8 ) = C( J, 8 ) - SUM*T8 + C( J, 9 ) = C( J, 9 ) - SUM*T9 + 380 CONTINUE + GO TO 410 + 390 CONTINUE +* +* Special code for 10 x 10 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + V6 = V( 6 ) + T6 = TAU*V6 + V7 = V( 7 ) + T7 = TAU*V7 + V8 = V( 8 ) + T8 = TAU*V8 + V9 = V( 9 ) + T9 = TAU*V9 + V10 = V( 10 ) + T10 = TAU*V10 + DO 400 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) + + $ V7*C( J, 7 ) + V8*C( J, 8 ) + V9*C( J, 9 ) + + $ V10*C( J, 10 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + C( J, 6 ) = C( J, 6 ) - SUM*T6 + C( J, 7 ) = C( J, 7 ) - SUM*T7 + C( J, 8 ) = C( J, 8 ) - SUM*T8 + C( J, 9 ) = C( J, 9 ) - SUM*T9 + C( J, 10 ) = C( J, 10 ) - SUM*T10 + 400 CONTINUE + GO TO 410 + END IF + 410 RETURN +* +* End of SLARFX +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slartg.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slartg.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,145 @@ + SUBROUTINE SLARTG( F, G, CS, SN, R ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + REAL CS, F, G, R, SN +* .. +* +* Purpose +* ======= +* +* SLARTG generate a plane rotation so that +* +* [ CS SN ] . [ F ] = [ R ] where CS**2 + SN**2 = 1. +* [ -SN CS ] [ G ] [ 0 ] +* +* This is a slower, more accurate version of the BLAS1 routine SROTG, +* with the following other differences: +* F and G are unchanged on return. +* If G=0, then CS=1 and SN=0. +* If F=0 and (G .ne. 0), then CS=0 and SN=1 without doing any +* floating point operations (saves work in SBDSQR when +* there are zeros on the diagonal). +* +* If F exceeds G in magnitude, CS will be positive. +* +* Arguments +* ========= +* +* F (input) REAL +* The first component of vector to be rotated. +* +* G (input) REAL +* The second component of vector to be rotated. +* +* CS (output) REAL +* The cosine of the rotation. +* +* SN (output) REAL +* The sine of the rotation. +* +* R (output) REAL +* The nonzero component of the rotated vector. +* +* This version has a few statements commented out for thread safety +* (machine parameters are computed on each entry). 10 feb 03, SJH. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E0 ) + REAL ONE + PARAMETER ( ONE = 1.0E0 ) + REAL TWO + PARAMETER ( TWO = 2.0E0 ) +* .. +* .. Local Scalars .. +* LOGICAL FIRST + INTEGER COUNT, I + REAL EPS, F1, G1, SAFMIN, SAFMN2, SAFMX2, SCALE +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, INT, LOG, MAX, SQRT +* .. +* .. Save statement .. +* SAVE FIRST, SAFMX2, SAFMIN, SAFMN2 +* .. +* .. Data statements .. +* DATA FIRST / .TRUE. / +* .. +* .. Executable Statements .. +* +* IF( FIRST ) THEN + SAFMIN = SLAMCH( 'S' ) + EPS = SLAMCH( 'E' ) + SAFMN2 = SLAMCH( 'B' )**INT( LOG( SAFMIN / EPS ) / + $ LOG( SLAMCH( 'B' ) ) / TWO ) + SAFMX2 = ONE / SAFMN2 +* FIRST = .FALSE. +* END IF + IF( G.EQ.ZERO ) THEN + CS = ONE + SN = ZERO + R = F + ELSE IF( F.EQ.ZERO ) THEN + CS = ZERO + SN = ONE + R = G + ELSE + F1 = F + G1 = G + SCALE = MAX( ABS( F1 ), ABS( G1 ) ) + IF( SCALE.GE.SAFMX2 ) THEN + COUNT = 0 + 10 CONTINUE + COUNT = COUNT + 1 + F1 = F1*SAFMN2 + G1 = G1*SAFMN2 + SCALE = MAX( ABS( F1 ), ABS( G1 ) ) + IF( SCALE.GE.SAFMX2 ) + $ GO TO 10 + R = SQRT( F1**2+G1**2 ) + CS = F1 / R + SN = G1 / R + DO 20 I = 1, COUNT + R = R*SAFMX2 + 20 CONTINUE + ELSE IF( SCALE.LE.SAFMN2 ) THEN + COUNT = 0 + 30 CONTINUE + COUNT = COUNT + 1 + F1 = F1*SAFMX2 + G1 = G1*SAFMX2 + SCALE = MAX( ABS( F1 ), ABS( G1 ) ) + IF( SCALE.LE.SAFMN2 ) + $ GO TO 30 + R = SQRT( F1**2+G1**2 ) + CS = F1 / R + SN = G1 / R + DO 40 I = 1, COUNT + R = R*SAFMN2 + 40 CONTINUE + ELSE + R = SQRT( F1**2+G1**2 ) + CS = F1 / R + SN = G1 / R + END IF + IF( ABS( F ).GT.ABS( G ) .AND. CS.LT.ZERO ) THEN + CS = -CS + SN = -SN + R = -R + END IF + END IF + RETURN +* +* End of SLARTG +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slarz.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slarz.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,152 @@ + SUBROUTINE SLARZ( SIDE, M, N, L, V, INCV, TAU, C, LDC, WORK ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER INCV, L, LDC, M, N + REAL TAU +* .. +* .. Array Arguments .. + REAL C( LDC, * ), V( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SLARZ applies a real elementary reflector H to a real M-by-N +* matrix C, from either the left or the right. H is represented in the +* form +* +* H = I - tau * v * v' +* +* where tau is a real scalar and v is a real vector. +* +* If tau = 0, then H is taken to be the unit matrix. +* +* +* H is a product of k elementary reflectors as returned by STZRZF. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': form H * C +* = 'R': form C * H +* +* M (input) INTEGER +* The number of rows of the matrix C. +* +* N (input) INTEGER +* The number of columns of the matrix C. +* +* L (input) INTEGER +* The number of entries of the vector V containing +* the meaningful part of the Householder vectors. +* If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. +* +* V (input) REAL array, dimension (1+(L-1)*abs(INCV)) +* The vector v in the representation of H as returned by +* STZRZF. V is not used if TAU = 0. +* +* INCV (input) INTEGER +* The increment between elements of v. INCV <> 0. +* +* TAU (input) REAL +* The value tau in the representation of H. +* +* C (input/output) REAL array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by the matrix H * C if SIDE = 'L', +* or C * H if SIDE = 'R'. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace) REAL array, dimension +* (N) if SIDE = 'L' +* or (M) if SIDE = 'R' +* +* Further Details +* =============== +* +* Based on contributions by +* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SGEMV, SGER +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C +* + IF( TAU.NE.ZERO ) THEN +* +* w( 1:n ) = C( 1, 1:n ) +* + CALL SCOPY( N, C, LDC, WORK, 1 ) +* +* w( 1:n ) = w( 1:n ) + C( m-l+1:m, 1:n )' * v( 1:l ) +* + CALL SGEMV( 'Transpose', L, N, ONE, C( M-L+1, 1 ), LDC, V, + $ INCV, ONE, WORK, 1 ) +* +* C( 1, 1:n ) = C( 1, 1:n ) - tau * w( 1:n ) +* + CALL SAXPY( N, -TAU, WORK, 1, C, LDC ) +* +* C( m-l+1:m, 1:n ) = C( m-l+1:m, 1:n ) - ... +* tau * v( 1:l ) * w( 1:n )' +* + CALL SGER( L, N, -TAU, V, INCV, WORK, 1, C( M-L+1, 1 ), + $ LDC ) + END IF +* + ELSE +* +* Form C * H +* + IF( TAU.NE.ZERO ) THEN +* +* w( 1:m ) = C( 1:m, 1 ) +* + CALL SCOPY( M, C, 1, WORK, 1 ) +* +* w( 1:m ) = w( 1:m ) + C( 1:m, n-l+1:n, 1:n ) * v( 1:l ) +* + CALL SGEMV( 'No transpose', M, L, ONE, C( 1, N-L+1 ), LDC, + $ V, INCV, ONE, WORK, 1 ) +* +* C( 1:m, 1 ) = C( 1:m, 1 ) - tau * w( 1:m ) +* + CALL SAXPY( M, -TAU, WORK, 1, C, 1 ) +* +* C( 1:m, n-l+1:n ) = C( 1:m, n-l+1:n ) - ... +* tau * w( 1:m ) * v( 1:l )' +* + CALL SGER( M, L, -TAU, WORK, 1, V, INCV, C( 1, N-L+1 ), + $ LDC ) +* + END IF +* + END IF +* + RETURN +* +* End of SLARZ +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slarzb.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slarzb.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,220 @@ + SUBROUTINE SLARZB( SIDE, TRANS, DIRECT, STOREV, M, N, K, L, V, + $ LDV, T, LDT, C, LDC, WORK, LDWORK ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, SIDE, STOREV, TRANS + INTEGER K, L, LDC, LDT, LDV, LDWORK, M, N +* .. +* .. Array Arguments .. + REAL C( LDC, * ), T( LDT, * ), V( LDV, * ), + $ WORK( LDWORK, * ) +* .. +* +* Purpose +* ======= +* +* SLARZB applies a real block reflector H or its transpose H**T to +* a real distributed M-by-N C from the left or the right. +* +* Currently, only STOREV = 'R' and DIRECT = 'B' are supported. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply H or H' from the Left +* = 'R': apply H or H' from the Right +* +* TRANS (input) CHARACTER*1 +* = 'N': apply H (No transpose) +* = 'C': apply H' (Transpose) +* +* DIRECT (input) CHARACTER*1 +* Indicates how H is formed from a product of elementary +* reflectors +* = 'F': H = H(1) H(2) . . . H(k) (Forward, not supported yet) +* = 'B': H = H(k) . . . H(2) H(1) (Backward) +* +* STOREV (input) CHARACTER*1 +* Indicates how the vectors which define the elementary +* reflectors are stored: +* = 'C': Columnwise (not supported yet) +* = 'R': Rowwise +* +* M (input) INTEGER +* The number of rows of the matrix C. +* +* N (input) INTEGER +* The number of columns of the matrix C. +* +* K (input) INTEGER +* The order of the matrix T (= the number of elementary +* reflectors whose product defines the block reflector). +* +* L (input) INTEGER +* The number of columns of the matrix V containing the +* meaningful part of the Householder reflectors. +* If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. +* +* V (input) REAL array, dimension (LDV,NV). +* If STOREV = 'C', NV = K; if STOREV = 'R', NV = L. +* +* LDV (input) INTEGER +* The leading dimension of the array V. +* If STOREV = 'C', LDV >= L; if STOREV = 'R', LDV >= K. +* +* T (input) REAL array, dimension (LDT,K) +* The triangular K-by-K matrix T in the representation of the +* block reflector. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= K. +* +* C (input/output) REAL array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by H*C or H'*C or C*H or C*H'. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace) REAL array, dimension (LDWORK,K) +* +* LDWORK (input) INTEGER +* The leading dimension of the array WORK. +* If SIDE = 'L', LDWORK >= max(1,N); +* if SIDE = 'R', LDWORK >= max(1,M). +* +* Further Details +* =============== +* +* Based on contributions by +* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + CHARACTER TRANST + INTEGER I, INFO, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEMM, STRMM, XERBLA +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) + $ RETURN +* +* Check for currently supported options +* + INFO = 0 + IF( .NOT.LSAME( DIRECT, 'B' ) ) THEN + INFO = -3 + ELSE IF( .NOT.LSAME( STOREV, 'R' ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLARZB', -INFO ) + RETURN + END IF +* + IF( LSAME( TRANS, 'N' ) ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H' * C +* +* W( 1:n, 1:k ) = C( 1:k, 1:n )' +* + DO 10 J = 1, K + CALL SCOPY( N, C( J, 1 ), LDC, WORK( 1, J ), 1 ) + 10 CONTINUE +* +* W( 1:n, 1:k ) = W( 1:n, 1:k ) + ... +* C( m-l+1:m, 1:n )' * V( 1:k, 1:l )' +* + IF( L.GT.0 ) + $ CALL SGEMM( 'Transpose', 'Transpose', N, K, L, ONE, + $ C( M-L+1, 1 ), LDC, V, LDV, ONE, WORK, LDWORK ) +* +* W( 1:n, 1:k ) = W( 1:n, 1:k ) * T' or W( 1:m, 1:k ) * T +* + CALL STRMM( 'Right', 'Lower', TRANST, 'Non-unit', N, K, ONE, T, + $ LDT, WORK, LDWORK ) +* +* C( 1:k, 1:n ) = C( 1:k, 1:n ) - W( 1:n, 1:k )' +* + DO 30 J = 1, N + DO 20 I = 1, K + C( I, J ) = C( I, J ) - WORK( J, I ) + 20 CONTINUE + 30 CONTINUE +* +* C( m-l+1:m, 1:n ) = C( m-l+1:m, 1:n ) - ... +* V( 1:k, 1:l )' * W( 1:n, 1:k )' +* + IF( L.GT.0 ) + $ CALL SGEMM( 'Transpose', 'Transpose', L, N, K, -ONE, V, LDV, + $ WORK, LDWORK, ONE, C( M-L+1, 1 ), LDC ) +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H' +* +* W( 1:m, 1:k ) = C( 1:m, 1:k ) +* + DO 40 J = 1, K + CALL SCOPY( M, C( 1, J ), 1, WORK( 1, J ), 1 ) + 40 CONTINUE +* +* W( 1:m, 1:k ) = W( 1:m, 1:k ) + ... +* C( 1:m, n-l+1:n ) * V( 1:k, 1:l )' +* + IF( L.GT.0 ) + $ CALL SGEMM( 'No transpose', 'Transpose', M, K, L, ONE, + $ C( 1, N-L+1 ), LDC, V, LDV, ONE, WORK, LDWORK ) +* +* W( 1:m, 1:k ) = W( 1:m, 1:k ) * T or W( 1:m, 1:k ) * T' +* + CALL STRMM( 'Right', 'Lower', TRANS, 'Non-unit', M, K, ONE, T, + $ LDT, WORK, LDWORK ) +* +* C( 1:m, 1:k ) = C( 1:m, 1:k ) - W( 1:m, 1:k ) +* + DO 60 J = 1, K + DO 50 I = 1, M + C( I, J ) = C( I, J ) - WORK( I, J ) + 50 CONTINUE + 60 CONTINUE +* +* C( 1:m, n-l+1:n ) = C( 1:m, n-l+1:n ) - ... +* W( 1:m, 1:k ) * V( 1:k, 1:l ) +* + IF( L.GT.0 ) + $ CALL SGEMM( 'No transpose', 'No transpose', M, L, K, -ONE, + $ WORK, LDWORK, V, LDV, ONE, C( 1, N-L+1 ), LDC ) +* + END IF +* + RETURN +* +* End of SLARZB +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slarzt.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slarzt.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,184 @@ + SUBROUTINE SLARZT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, STOREV + INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. + REAL T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* Purpose +* ======= +* +* SLARZT forms the triangular factor T of a real block reflector +* H of order > n, which is defined as a product of k elementary +* reflectors. +* +* If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; +* +* If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. +* +* If STOREV = 'C', the vector which defines the elementary reflector +* H(i) is stored in the i-th column of the array V, and +* +* H = I - V * T * V' +* +* If STOREV = 'R', the vector which defines the elementary reflector +* H(i) is stored in the i-th row of the array V, and +* +* H = I - V' * T * V +* +* Currently, only STOREV = 'R' and DIRECT = 'B' are supported. +* +* Arguments +* ========= +* +* DIRECT (input) CHARACTER*1 +* Specifies the order in which the elementary reflectors are +* multiplied to form the block reflector: +* = 'F': H = H(1) H(2) . . . H(k) (Forward, not supported yet) +* = 'B': H = H(k) . . . H(2) H(1) (Backward) +* +* STOREV (input) CHARACTER*1 +* Specifies how the vectors which define the elementary +* reflectors are stored (see also Further Details): +* = 'C': columnwise (not supported yet) +* = 'R': rowwise +* +* N (input) INTEGER +* The order of the block reflector H. N >= 0. +* +* K (input) INTEGER +* The order of the triangular factor T (= the number of +* elementary reflectors). K >= 1. +* +* V (input/output) REAL array, dimension +* (LDV,K) if STOREV = 'C' +* (LDV,N) if STOREV = 'R' +* The matrix V. See further details. +* +* LDV (input) INTEGER +* The leading dimension of the array V. +* If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. +* +* TAU (input) REAL array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i). +* +* T (output) REAL array, dimension (LDT,K) +* The k by k triangular factor T of the block reflector. +* If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is +* lower triangular. The rest of the array is not used. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= K. +* +* Further Details +* =============== +* +* Based on contributions by +* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +* The shape of the matrix V and the storage of the vectors which define +* the H(i) is best illustrated by the following example with n = 5 and +* k = 3. The elements equal to 1 are not stored; the corresponding +* array elements are modified but restored on exit. The rest of the +* array is not used. +* +* DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': +* +* ______V_____ +* ( v1 v2 v3 ) / \ +* ( v1 v2 v3 ) ( v1 v1 v1 v1 v1 . . . . 1 ) +* V = ( v1 v2 v3 ) ( v2 v2 v2 v2 v2 . . . 1 ) +* ( v1 v2 v3 ) ( v3 v3 v3 v3 v3 . . 1 ) +* ( v1 v2 v3 ) +* . . . +* . . . +* 1 . . +* 1 . +* 1 +* +* DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': +* +* ______V_____ +* 1 / \ +* . 1 ( 1 . . . . v1 v1 v1 v1 v1 ) +* . . 1 ( . 1 . . . v2 v2 v2 v2 v2 ) +* . . . ( . . 1 . . v3 v3 v3 v3 v3 ) +* . . . +* ( v1 v2 v3 ) +* ( v1 v2 v3 ) +* V = ( v1 v2 v3 ) +* ( v1 v2 v3 ) +* ( v1 v2 v3 ) +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, INFO, J +* .. +* .. External Subroutines .. + EXTERNAL SGEMV, STRMV, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Check for currently supported options +* + INFO = 0 + IF( .NOT.LSAME( DIRECT, 'B' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( STOREV, 'R' ) ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLARZT', -INFO ) + RETURN + END IF +* + DO 20 I = K, 1, -1 + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO 10 J = I, K + T( J, I ) = ZERO + 10 CONTINUE + ELSE +* +* general case +* + IF( I.LT.K ) THEN +* +* T(i+1:k,i) = - tau(i) * V(i+1:k,1:n) * V(i,1:n)' +* + CALL SGEMV( 'No transpose', K-I, N, -TAU( I ), + $ V( I+1, 1 ), LDV, V( I, 1 ), LDV, ZERO, + $ T( I+1, I ), 1 ) +* +* T(i+1:k,i) = T(i+1:k,i+1:k) * T(i+1:k,i) +* + CALL STRMV( 'Lower', 'No transpose', 'Non-unit', K-I, + $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 ) + END IF + T( I, I ) = TAU( I ) + END IF + 20 CONTINUE + RETURN +* +* End of SLARZT +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slas2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slas2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,121 @@ + SUBROUTINE SLAS2( F, G, H, SSMIN, SSMAX ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + REAL F, G, H, SSMAX, SSMIN +* .. +* +* Purpose +* ======= +* +* SLAS2 computes the singular values of the 2-by-2 matrix +* [ F G ] +* [ 0 H ]. +* On return, SSMIN is the smaller singular value and SSMAX is the +* larger singular value. +* +* Arguments +* ========= +* +* F (input) REAL +* The (1,1) element of the 2-by-2 matrix. +* +* G (input) REAL +* The (1,2) element of the 2-by-2 matrix. +* +* H (input) REAL +* The (2,2) element of the 2-by-2 matrix. +* +* SSMIN (output) REAL +* The smaller singular value. +* +* SSMAX (output) REAL +* The larger singular value. +* +* Further Details +* =============== +* +* Barring over/underflow, all output quantities are correct to within +* a few units in the last place (ulps), even in the absence of a guard +* digit in addition/subtraction. +* +* In IEEE arithmetic, the code works correctly if one matrix element is +* infinite. +* +* Overflow will not occur unless the largest singular value itself +* overflows, or is within a few ulps of overflow. (On machines with +* partial overflow, like the Cray, overflow may occur if the largest +* singular value is within a factor of 2 of overflow.) +* +* Underflow is harmless if underflow is gradual. Otherwise, results +* may correspond to a matrix modified by perturbations of size near +* the underflow threshold. +* +* ==================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E0 ) + REAL ONE + PARAMETER ( ONE = 1.0E0 ) + REAL TWO + PARAMETER ( TWO = 2.0E0 ) +* .. +* .. Local Scalars .. + REAL AS, AT, AU, C, FA, FHMN, FHMX, GA, HA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + FA = ABS( F ) + GA = ABS( G ) + HA = ABS( H ) + FHMN = MIN( FA, HA ) + FHMX = MAX( FA, HA ) + IF( FHMN.EQ.ZERO ) THEN + SSMIN = ZERO + IF( FHMX.EQ.ZERO ) THEN + SSMAX = GA + ELSE + SSMAX = MAX( FHMX, GA )*SQRT( ONE+ + $ ( MIN( FHMX, GA ) / MAX( FHMX, GA ) )**2 ) + END IF + ELSE + IF( GA.LT.FHMX ) THEN + AS = ONE + FHMN / FHMX + AT = ( FHMX-FHMN ) / FHMX + AU = ( GA / FHMX )**2 + C = TWO / ( SQRT( AS*AS+AU )+SQRT( AT*AT+AU ) ) + SSMIN = FHMN*C + SSMAX = FHMX / C + ELSE + AU = FHMX / GA + IF( AU.EQ.ZERO ) THEN +* +* Avoid possible harmful underflow if exponent range +* asymmetric (true SSMIN may not underflow even if +* AU underflows) +* + SSMIN = ( FHMN*FHMX ) / GA + SSMAX = GA + ELSE + AS = ONE + FHMN / FHMX + AT = ( FHMX-FHMN ) / FHMX + C = ONE / ( SQRT( ONE+( AS*AU )**2 )+ + $ SQRT( ONE+( AT*AU )**2 ) ) + SSMIN = ( FHMN*C )*AU + SSMIN = SSMIN + SSMIN + SSMAX = GA / ( C+C ) + END IF + END IF + END IF + RETURN +* +* End of SLAS2 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slascl.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slascl.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,267 @@ + SUBROUTINE SLASCL( TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER TYPE + INTEGER INFO, KL, KU, LDA, M, N + REAL CFROM, CTO +* .. +* .. Array Arguments .. + REAL A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* SLASCL multiplies the M by N real matrix A by the real scalar +* CTO/CFROM. This is done without over/underflow as long as the final +* result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that +* A may be full, upper triangular, lower triangular, upper Hessenberg, +* or banded. +* +* Arguments +* ========= +* +* TYPE (input) CHARACTER*1 +* TYPE indices the storage type of the input matrix. +* = 'G': A is a full matrix. +* = 'L': A is a lower triangular matrix. +* = 'U': A is an upper triangular matrix. +* = 'H': A is an upper Hessenberg matrix. +* = 'B': A is a symmetric band matrix with lower bandwidth KL +* and upper bandwidth KU and with the only the lower +* half stored. +* = 'Q': A is a symmetric band matrix with lower bandwidth KL +* and upper bandwidth KU and with the only the upper +* half stored. +* = 'Z': A is a band matrix with lower bandwidth KL and upper +* bandwidth KU. +* +* KL (input) INTEGER +* The lower bandwidth of A. Referenced only if TYPE = 'B', +* 'Q' or 'Z'. +* +* KU (input) INTEGER +* The upper bandwidth of A. Referenced only if TYPE = 'B', +* 'Q' or 'Z'. +* +* CFROM (input) REAL +* CTO (input) REAL +* The matrix A is multiplied by CTO/CFROM. A(I,J) is computed +* without over/underflow if the final result CTO*A(I,J)/CFROM +* can be represented without over/underflow. CFROM must be +* nonzero. +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* The matrix to be multiplied by CTO/CFROM. See TYPE for the +* storage type. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* INFO (output) INTEGER +* 0 - successful exit +* <0 - if INFO = -i, the i-th argument had an illegal value. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL DONE + INTEGER I, ITYPE, J, K1, K2, K3, K4 + REAL BIGNUM, CFROM1, CFROMC, CTO1, CTOC, MUL, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 +* + IF( LSAME( TYPE, 'G' ) ) THEN + ITYPE = 0 + ELSE IF( LSAME( TYPE, 'L' ) ) THEN + ITYPE = 1 + ELSE IF( LSAME( TYPE, 'U' ) ) THEN + ITYPE = 2 + ELSE IF( LSAME( TYPE, 'H' ) ) THEN + ITYPE = 3 + ELSE IF( LSAME( TYPE, 'B' ) ) THEN + ITYPE = 4 + ELSE IF( LSAME( TYPE, 'Q' ) ) THEN + ITYPE = 5 + ELSE IF( LSAME( TYPE, 'Z' ) ) THEN + ITYPE = 6 + ELSE + ITYPE = -1 + END IF +* + IF( ITYPE.EQ.-1 ) THEN + INFO = -1 + ELSE IF( CFROM.EQ.ZERO ) THEN + INFO = -4 + ELSE IF( M.LT.0 ) THEN + INFO = -6 + ELSE IF( N.LT.0 .OR. ( ITYPE.EQ.4 .AND. N.NE.M ) .OR. + $ ( ITYPE.EQ.5 .AND. N.NE.M ) ) THEN + INFO = -7 + ELSE IF( ITYPE.LE.3 .AND. LDA.LT.MAX( 1, M ) ) THEN + INFO = -9 + ELSE IF( ITYPE.GE.4 ) THEN + IF( KL.LT.0 .OR. KL.GT.MAX( M-1, 0 ) ) THEN + INFO = -2 + ELSE IF( KU.LT.0 .OR. KU.GT.MAX( N-1, 0 ) .OR. + $ ( ( ITYPE.EQ.4 .OR. ITYPE.EQ.5 ) .AND. KL.NE.KU ) ) + $ THEN + INFO = -3 + ELSE IF( ( ITYPE.EQ.4 .AND. LDA.LT.KL+1 ) .OR. + $ ( ITYPE.EQ.5 .AND. LDA.LT.KU+1 ) .OR. + $ ( ITYPE.EQ.6 .AND. LDA.LT.2*KL+KU+1 ) ) THEN + INFO = -9 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLASCL', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. M.EQ.0 ) + $ RETURN +* +* Get machine parameters +* + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM +* + CFROMC = CFROM + CTOC = CTO +* + 10 CONTINUE + CFROM1 = CFROMC*SMLNUM + CTO1 = CTOC / BIGNUM + IF( ABS( CFROM1 ).GT.ABS( CTOC ) .AND. CTOC.NE.ZERO ) THEN + MUL = SMLNUM + DONE = .FALSE. + CFROMC = CFROM1 + ELSE IF( ABS( CTO1 ).GT.ABS( CFROMC ) ) THEN + MUL = BIGNUM + DONE = .FALSE. + CTOC = CTO1 + ELSE + MUL = CTOC / CFROMC + DONE = .TRUE. + END IF +* + IF( ITYPE.EQ.0 ) THEN +* +* Full matrix +* + DO 30 J = 1, N + DO 20 I = 1, M + A( I, J ) = A( I, J )*MUL + 20 CONTINUE + 30 CONTINUE +* + ELSE IF( ITYPE.EQ.1 ) THEN +* +* Lower triangular matrix +* + DO 50 J = 1, N + DO 40 I = J, M + A( I, J ) = A( I, J )*MUL + 40 CONTINUE + 50 CONTINUE +* + ELSE IF( ITYPE.EQ.2 ) THEN +* +* Upper triangular matrix +* + DO 70 J = 1, N + DO 60 I = 1, MIN( J, M ) + A( I, J ) = A( I, J )*MUL + 60 CONTINUE + 70 CONTINUE +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* Upper Hessenberg matrix +* + DO 90 J = 1, N + DO 80 I = 1, MIN( J+1, M ) + A( I, J ) = A( I, J )*MUL + 80 CONTINUE + 90 CONTINUE +* + ELSE IF( ITYPE.EQ.4 ) THEN +* +* Lower half of a symmetric band matrix +* + K3 = KL + 1 + K4 = N + 1 + DO 110 J = 1, N + DO 100 I = 1, MIN( K3, K4-J ) + A( I, J ) = A( I, J )*MUL + 100 CONTINUE + 110 CONTINUE +* + ELSE IF( ITYPE.EQ.5 ) THEN +* +* Upper half of a symmetric band matrix +* + K1 = KU + 2 + K3 = KU + 1 + DO 130 J = 1, N + DO 120 I = MAX( K1-J, 1 ), K3 + A( I, J ) = A( I, J )*MUL + 120 CONTINUE + 130 CONTINUE +* + ELSE IF( ITYPE.EQ.6 ) THEN +* +* Band matrix +* + K1 = KL + KU + 2 + K2 = KL + 1 + K3 = 2*KL + KU + 1 + K4 = KL + KU + 1 + M + DO 150 J = 1, N + DO 140 I = MAX( K1-J, K2 ), MIN( K3, K4-J ) + A( I, J ) = A( I, J )*MUL + 140 CONTINUE + 150 CONTINUE +* + END IF +* + IF( .NOT.DONE ) + $ GO TO 10 +* + RETURN +* +* End of SLASCL +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slasd0.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slasd0.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,228 @@ + SUBROUTINE SLASD0( N, SQRE, D, E, U, LDU, VT, LDVT, SMLSIZ, IWORK, + $ WORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, LDU, LDVT, N, SMLSIZ, SQRE +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL D( * ), E( * ), U( LDU, * ), VT( LDVT, * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* Using a divide and conquer approach, SLASD0 computes the singular +* value decomposition (SVD) of a real upper bidiagonal N-by-M +* matrix B with diagonal D and offdiagonal E, where M = N + SQRE. +* The algorithm computes orthogonal matrices U and VT such that +* B = U * S * VT. The singular values S are overwritten on D. +* +* A related subroutine, SLASDA, computes only the singular values, +* and optionally, the singular vectors in compact form. +* +* Arguments +* ========= +* +* N (input) INTEGER +* On entry, the row dimension of the upper bidiagonal matrix. +* This is also the dimension of the main diagonal array D. +* +* SQRE (input) INTEGER +* Specifies the column dimension of the bidiagonal matrix. +* = 0: The bidiagonal matrix has column dimension M = N; +* = 1: The bidiagonal matrix has column dimension M = N+1; +* +* D (input/output) REAL array, dimension (N) +* On entry D contains the main diagonal of the bidiagonal +* matrix. +* On exit D, if INFO = 0, contains its singular values. +* +* E (input) REAL array, dimension (M-1) +* Contains the subdiagonal entries of the bidiagonal matrix. +* On exit, E has been destroyed. +* +* U (output) REAL array, dimension at least (LDQ, N) +* On exit, U contains the left singular vectors. +* +* LDU (input) INTEGER +* On entry, leading dimension of U. +* +* VT (output) REAL array, dimension at least (LDVT, M) +* On exit, VT' contains the right singular vectors. +* +* LDVT (input) INTEGER +* On entry, leading dimension of VT. +* +* SMLSIZ (input) INTEGER +* On entry, maximum size of the subproblems at the +* bottom of the computation tree. +* +* IWORK (workspace) INTEGER array, dimension (8*N) +* +* WORK (workspace) REAL array, dimension (3*M**2+2*M) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = 1, an singular value did not converge +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Huan Ren, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, I1, IC, IDXQ, IDXQC, IM1, INODE, ITEMP, IWK, + $ J, LF, LL, LVL, M, NCC, ND, NDB1, NDIML, NDIMR, + $ NL, NLF, NLP1, NLVL, NR, NRF, NRP1, SQREI + REAL ALPHA, BETA +* .. +* .. External Subroutines .. + EXTERNAL SLASD1, SLASDQ, SLASDT, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN + INFO = -2 + END IF +* + M = N + SQRE +* + IF( LDU.LT.N ) THEN + INFO = -6 + ELSE IF( LDVT.LT.M ) THEN + INFO = -8 + ELSE IF( SMLSIZ.LT.3 ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLASD0', -INFO ) + RETURN + END IF +* +* If the input matrix is too small, call SLASDQ to find the SVD. +* + IF( N.LE.SMLSIZ ) THEN + CALL SLASDQ( 'U', SQRE, N, M, N, 0, D, E, VT, LDVT, U, LDU, U, + $ LDU, WORK, INFO ) + RETURN + END IF +* +* Set up the computation tree. +* + INODE = 1 + NDIML = INODE + N + NDIMR = NDIML + N + IDXQ = NDIMR + N + IWK = IDXQ + N + CALL SLASDT( N, NLVL, ND, IWORK( INODE ), IWORK( NDIML ), + $ IWORK( NDIMR ), SMLSIZ ) +* +* For the nodes on bottom level of the tree, solve +* their subproblems by SLASDQ. +* + NDB1 = ( ND+1 ) / 2 + NCC = 0 + DO 30 I = NDB1, ND +* +* IC : center row of each node +* NL : number of rows of left subproblem +* NR : number of rows of right subproblem +* NLF: starting row of the left subproblem +* NRF: starting row of the right subproblem +* + I1 = I - 1 + IC = IWORK( INODE+I1 ) + NL = IWORK( NDIML+I1 ) + NLP1 = NL + 1 + NR = IWORK( NDIMR+I1 ) + NRP1 = NR + 1 + NLF = IC - NL + NRF = IC + 1 + SQREI = 1 + CALL SLASDQ( 'U', SQREI, NL, NLP1, NL, NCC, D( NLF ), E( NLF ), + $ VT( NLF, NLF ), LDVT, U( NLF, NLF ), LDU, + $ U( NLF, NLF ), LDU, WORK, INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + ITEMP = IDXQ + NLF - 2 + DO 10 J = 1, NL + IWORK( ITEMP+J ) = J + 10 CONTINUE + IF( I.EQ.ND ) THEN + SQREI = SQRE + ELSE + SQREI = 1 + END IF + NRP1 = NR + SQREI + CALL SLASDQ( 'U', SQREI, NR, NRP1, NR, NCC, D( NRF ), E( NRF ), + $ VT( NRF, NRF ), LDVT, U( NRF, NRF ), LDU, + $ U( NRF, NRF ), LDU, WORK, INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + ITEMP = IDXQ + IC + DO 20 J = 1, NR + IWORK( ITEMP+J-1 ) = J + 20 CONTINUE + 30 CONTINUE +* +* Now conquer each subproblem bottom-up. +* + DO 50 LVL = NLVL, 1, -1 +* +* Find the first node LF and last node LL on the +* current level LVL. +* + IF( LVL.EQ.1 ) THEN + LF = 1 + LL = 1 + ELSE + LF = 2**( LVL-1 ) + LL = 2*LF - 1 + END IF + DO 40 I = LF, LL + IM1 = I - 1 + IC = IWORK( INODE+IM1 ) + NL = IWORK( NDIML+IM1 ) + NR = IWORK( NDIMR+IM1 ) + NLF = IC - NL + IF( ( SQRE.EQ.0 ) .AND. ( I.EQ.LL ) ) THEN + SQREI = SQRE + ELSE + SQREI = 1 + END IF + IDXQC = IDXQ + NLF - 1 + ALPHA = D( IC ) + BETA = E( IC ) + CALL SLASD1( NL, NR, SQREI, D( NLF ), ALPHA, BETA, + $ U( NLF, NLF ), LDU, VT( NLF, NLF ), LDVT, + $ IWORK( IDXQC ), IWORK( IWK ), WORK, INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + 40 CONTINUE + 50 CONTINUE +* + RETURN +* +* End of SLASD0 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slasd1.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slasd1.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,232 @@ + SUBROUTINE SLASD1( NL, NR, SQRE, D, ALPHA, BETA, U, LDU, VT, LDVT, + $ IDXQ, IWORK, WORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, LDU, LDVT, NL, NR, SQRE + REAL ALPHA, BETA +* .. +* .. Array Arguments .. + INTEGER IDXQ( * ), IWORK( * ) + REAL D( * ), U( LDU, * ), VT( LDVT, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SLASD1 computes the SVD of an upper bidiagonal N-by-M matrix B, +* where N = NL + NR + 1 and M = N + SQRE. SLASD1 is called from SLASD0. +* +* A related subroutine SLASD7 handles the case in which the singular +* values (and the singular vectors in factored form) are desired. +* +* SLASD1 computes the SVD as follows: +* +* ( D1(in) 0 0 0 ) +* B = U(in) * ( Z1' a Z2' b ) * VT(in) +* ( 0 0 D2(in) 0 ) +* +* = U(out) * ( D(out) 0) * VT(out) +* +* where Z' = (Z1' a Z2' b) = u' VT', and u is a vector of dimension M +* with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros +* elsewhere; and the entry b is empty if SQRE = 0. +* +* The left singular vectors of the original matrix are stored in U, and +* the transpose of the right singular vectors are stored in VT, and the +* singular values are in D. The algorithm consists of three stages: +* +* The first stage consists of deflating the size of the problem +* when there are multiple singular values or when there are zeros in +* the Z vector. For each such occurence the dimension of the +* secular equation problem is reduced by one. This stage is +* performed by the routine SLASD2. +* +* The second stage consists of calculating the updated +* singular values. This is done by finding the square roots of the +* roots of the secular equation via the routine SLASD4 (as called +* by SLASD3). This routine also calculates the singular vectors of +* the current problem. +* +* The final stage consists of computing the updated singular vectors +* directly using the updated singular values. The singular vectors +* for the current problem are multiplied with the singular vectors +* from the overall problem. +* +* Arguments +* ========= +* +* NL (input) INTEGER +* The row dimension of the upper block. NL >= 1. +* +* NR (input) INTEGER +* The row dimension of the lower block. NR >= 1. +* +* SQRE (input) INTEGER +* = 0: the lower block is an NR-by-NR square matrix. +* = 1: the lower block is an NR-by-(NR+1) rectangular matrix. +* +* The bidiagonal matrix has row dimension N = NL + NR + 1, +* and column dimension M = N + SQRE. +* +* D (input/output) REAL array, dimension (NL+NR+1). +* N = NL+NR+1 +* On entry D(1:NL,1:NL) contains the singular values of the +* upper block; and D(NL+2:N) contains the singular values of +* the lower block. On exit D(1:N) contains the singular values +* of the modified matrix. +* +* ALPHA (input/output) REAL +* Contains the diagonal element associated with the added row. +* +* BETA (input/output) REAL +* Contains the off-diagonal element associated with the added +* row. +* +* U (input/output) REAL array, dimension (LDU,N) +* On entry U(1:NL, 1:NL) contains the left singular vectors of +* the upper block; U(NL+2:N, NL+2:N) contains the left singular +* vectors of the lower block. On exit U contains the left +* singular vectors of the bidiagonal matrix. +* +* LDU (input) INTEGER +* The leading dimension of the array U. LDU >= max( 1, N ). +* +* VT (input/output) REAL array, dimension (LDVT,M) +* where M = N + SQRE. +* On entry VT(1:NL+1, 1:NL+1)' contains the right singular +* vectors of the upper block; VT(NL+2:M, NL+2:M)' contains +* the right singular vectors of the lower block. On exit +* VT' contains the right singular vectors of the +* bidiagonal matrix. +* +* LDVT (input) INTEGER +* The leading dimension of the array VT. LDVT >= max( 1, M ). +* +* IDXQ (output) INTEGER array, dimension (N) +* This contains the permutation which will reintegrate the +* subproblem just solved back into sorted order, i.e. +* D( IDXQ( I = 1, N ) ) will be in ascending order. +* +* IWORK (workspace) INTEGER array, dimension (4*N) +* +* WORK (workspace) REAL array, dimension (3*M**2+2*M) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = 1, an singular value did not converge +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Huan Ren, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. +* + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER COLTYP, I, IDX, IDXC, IDXP, IQ, ISIGMA, IU2, + $ IVT2, IZ, K, LDQ, LDU2, LDVT2, M, N, N1, N2 + REAL ORGNRM +* .. +* .. External Subroutines .. + EXTERNAL SLAMRG, SLASCL, SLASD2, SLASD3, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( NL.LT.1 ) THEN + INFO = -1 + ELSE IF( NR.LT.1 ) THEN + INFO = -2 + ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLASD1', -INFO ) + RETURN + END IF +* + N = NL + NR + 1 + M = N + SQRE +* +* The following values are for bookkeeping purposes only. They are +* integer pointers which indicate the portion of the workspace +* used by a particular array in SLASD2 and SLASD3. +* + LDU2 = N + LDVT2 = M +* + IZ = 1 + ISIGMA = IZ + M + IU2 = ISIGMA + N + IVT2 = IU2 + LDU2*N + IQ = IVT2 + LDVT2*M +* + IDX = 1 + IDXC = IDX + N + COLTYP = IDXC + N + IDXP = COLTYP + N +* +* Scale. +* + ORGNRM = MAX( ABS( ALPHA ), ABS( BETA ) ) + D( NL+1 ) = ZERO + DO 10 I = 1, N + IF( ABS( D( I ) ).GT.ORGNRM ) THEN + ORGNRM = ABS( D( I ) ) + END IF + 10 CONTINUE + CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, N, 1, D, N, INFO ) + ALPHA = ALPHA / ORGNRM + BETA = BETA / ORGNRM +* +* Deflate singular values. +* + CALL SLASD2( NL, NR, SQRE, K, D, WORK( IZ ), ALPHA, BETA, U, LDU, + $ VT, LDVT, WORK( ISIGMA ), WORK( IU2 ), LDU2, + $ WORK( IVT2 ), LDVT2, IWORK( IDXP ), IWORK( IDX ), + $ IWORK( IDXC ), IDXQ, IWORK( COLTYP ), INFO ) +* +* Solve Secular Equation and update singular vectors. +* + LDQ = K + CALL SLASD3( NL, NR, SQRE, K, D, WORK( IQ ), LDQ, WORK( ISIGMA ), + $ U, LDU, WORK( IU2 ), LDU2, VT, LDVT, WORK( IVT2 ), + $ LDVT2, IWORK( IDXC ), IWORK( COLTYP ), WORK( IZ ), + $ INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF +* +* Unscale. +* + CALL SLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO ) +* +* Prepare the IDXQ sorting permutation. +* + N1 = K + N2 = N - K + CALL SLAMRG( N1, N2, D, 1, -1, IDXQ ) +* + RETURN +* +* End of SLASD1 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slasd2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slasd2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,512 @@ + SUBROUTINE SLASD2( NL, NR, SQRE, K, D, Z, ALPHA, BETA, U, LDU, VT, + $ LDVT, DSIGMA, U2, LDU2, VT2, LDVT2, IDXP, IDX, + $ IDXC, IDXQ, COLTYP, INFO ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDU, LDU2, LDVT, LDVT2, NL, NR, SQRE + REAL ALPHA, BETA +* .. +* .. Array Arguments .. + INTEGER COLTYP( * ), IDX( * ), IDXC( * ), IDXP( * ), + $ IDXQ( * ) + REAL D( * ), DSIGMA( * ), U( LDU, * ), + $ U2( LDU2, * ), VT( LDVT, * ), VT2( LDVT2, * ), + $ Z( * ) +* .. +* +* Purpose +* ======= +* +* SLASD2 merges the two sets of singular values together into a single +* sorted set. Then it tries to deflate the size of the problem. +* There are two ways in which deflation can occur: when two or more +* singular values are close together or if there is a tiny entry in the +* Z vector. For each such occurrence the order of the related secular +* equation problem is reduced by one. +* +* SLASD2 is called from SLASD1. +* +* Arguments +* ========= +* +* NL (input) INTEGER +* The row dimension of the upper block. NL >= 1. +* +* NR (input) INTEGER +* The row dimension of the lower block. NR >= 1. +* +* SQRE (input) INTEGER +* = 0: the lower block is an NR-by-NR square matrix. +* = 1: the lower block is an NR-by-(NR+1) rectangular matrix. +* +* The bidiagonal matrix has N = NL + NR + 1 rows and +* M = N + SQRE >= N columns. +* +* K (output) INTEGER +* Contains the dimension of the non-deflated matrix, +* This is the order of the related secular equation. 1 <= K <=N. +* +* D (input/output) REAL array, dimension (N) +* On entry D contains the singular values of the two submatrices +* to be combined. On exit D contains the trailing (N-K) updated +* singular values (those which were deflated) sorted into +* increasing order. +* +* Z (output) REAL array, dimension (N) +* On exit Z contains the updating row vector in the secular +* equation. +* +* ALPHA (input) REAL +* Contains the diagonal element associated with the added row. +* +* BETA (input) REAL +* Contains the off-diagonal element associated with the added +* row. +* +* U (input/output) REAL array, dimension (LDU,N) +* On entry U contains the left singular vectors of two +* submatrices in the two square blocks with corners at (1,1), +* (NL, NL), and (NL+2, NL+2), (N,N). +* On exit U contains the trailing (N-K) updated left singular +* vectors (those which were deflated) in its last N-K columns. +* +* LDU (input) INTEGER +* The leading dimension of the array U. LDU >= N. +* +* VT (input/output) REAL array, dimension (LDVT,M) +* On entry VT' contains the right singular vectors of two +* submatrices in the two square blocks with corners at (1,1), +* (NL+1, NL+1), and (NL+2, NL+2), (M,M). +* On exit VT' contains the trailing (N-K) updated right singular +* vectors (those which were deflated) in its last N-K columns. +* In case SQRE =1, the last row of VT spans the right null +* space. +* +* LDVT (input) INTEGER +* The leading dimension of the array VT. LDVT >= M. +* +* DSIGMA (output) REAL array, dimension (N) +* Contains a copy of the diagonal elements (K-1 singular values +* and one zero) in the secular equation. +* +* U2 (output) REAL array, dimension (LDU2,N) +* Contains a copy of the first K-1 left singular vectors which +* will be used by SLASD3 in a matrix multiply (SGEMM) to solve +* for the new left singular vectors. U2 is arranged into four +* blocks. The first block contains a column with 1 at NL+1 and +* zero everywhere else; the second block contains non-zero +* entries only at and above NL; the third contains non-zero +* entries only below NL+1; and the fourth is dense. +* +* LDU2 (input) INTEGER +* The leading dimension of the array U2. LDU2 >= N. +* +* VT2 (output) REAL array, dimension (LDVT2,N) +* VT2' contains a copy of the first K right singular vectors +* which will be used by SLASD3 in a matrix multiply (SGEMM) to +* solve for the new right singular vectors. VT2 is arranged into +* three blocks. The first block contains a row that corresponds +* to the special 0 diagonal element in SIGMA; the second block +* contains non-zeros only at and before NL +1; the third block +* contains non-zeros only at and after NL +2. +* +* LDVT2 (input) INTEGER +* The leading dimension of the array VT2. LDVT2 >= M. +* +* IDXP (workspace) INTEGER array, dimension (N) +* This will contain the permutation used to place deflated +* values of D at the end of the array. On output IDXP(2:K) +* points to the nondeflated D-values and IDXP(K+1:N) +* points to the deflated singular values. +* +* IDX (workspace) INTEGER array, dimension (N) +* This will contain the permutation used to sort the contents of +* D into ascending order. +* +* IDXC (output) INTEGER array, dimension (N) +* This will contain the permutation used to arrange the columns +* of the deflated U matrix into three groups: the first group +* contains non-zero entries only at and above NL, the second +* contains non-zero entries only below NL+2, and the third is +* dense. +* +* IDXQ (input/output) INTEGER array, dimension (N) +* This contains the permutation which separately sorts the two +* sub-problems in D into ascending order. Note that entries in +* the first hlaf of this permutation must first be moved one +* position backward; and entries in the second half +* must first have NL+1 added to their values. +* +* COLTYP (workspace/output) INTEGER array, dimension (N) +* As workspace, this will contain a label which will indicate +* which of the following types a column in the U2 matrix or a +* row in the VT2 matrix is: +* 1 : non-zero in the upper half only +* 2 : non-zero in the lower half only +* 3 : dense +* 4 : deflated +* +* On exit, it is an array of dimension 4, with COLTYP(I) being +* the dimension of the I-th type columns. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Huan Ren, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO, EIGHT + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0, + $ EIGHT = 8.0E+0 ) +* .. +* .. Local Arrays .. + INTEGER CTOT( 4 ), PSM( 4 ) +* .. +* .. Local Scalars .. + INTEGER CT, I, IDXI, IDXJ, IDXJP, J, JP, JPREV, K2, M, + $ N, NLP1, NLP2 + REAL C, EPS, HLFTOL, S, TAU, TOL, Z1 +* .. +* .. External Functions .. + REAL SLAMCH, SLAPY2 + EXTERNAL SLAMCH, SLAPY2 +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLACPY, SLAMRG, SLASET, SROT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( NL.LT.1 ) THEN + INFO = -1 + ELSE IF( NR.LT.1 ) THEN + INFO = -2 + ELSE IF( ( SQRE.NE.1 ) .AND. ( SQRE.NE.0 ) ) THEN + INFO = -3 + END IF +* + N = NL + NR + 1 + M = N + SQRE +* + IF( LDU.LT.N ) THEN + INFO = -10 + ELSE IF( LDVT.LT.M ) THEN + INFO = -12 + ELSE IF( LDU2.LT.N ) THEN + INFO = -15 + ELSE IF( LDVT2.LT.M ) THEN + INFO = -17 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLASD2', -INFO ) + RETURN + END IF +* + NLP1 = NL + 1 + NLP2 = NL + 2 +* +* Generate the first part of the vector Z; and move the singular +* values in the first part of D one position backward. +* + Z1 = ALPHA*VT( NLP1, NLP1 ) + Z( 1 ) = Z1 + DO 10 I = NL, 1, -1 + Z( I+1 ) = ALPHA*VT( I, NLP1 ) + D( I+1 ) = D( I ) + IDXQ( I+1 ) = IDXQ( I ) + 1 + 10 CONTINUE +* +* Generate the second part of the vector Z. +* + DO 20 I = NLP2, M + Z( I ) = BETA*VT( I, NLP2 ) + 20 CONTINUE +* +* Initialize some reference arrays. +* + DO 30 I = 2, NLP1 + COLTYP( I ) = 1 + 30 CONTINUE + DO 40 I = NLP2, N + COLTYP( I ) = 2 + 40 CONTINUE +* +* Sort the singular values into increasing order +* + DO 50 I = NLP2, N + IDXQ( I ) = IDXQ( I ) + NLP1 + 50 CONTINUE +* +* DSIGMA, IDXC, IDXC, and the first column of U2 +* are used as storage space. +* + DO 60 I = 2, N + DSIGMA( I ) = D( IDXQ( I ) ) + U2( I, 1 ) = Z( IDXQ( I ) ) + IDXC( I ) = COLTYP( IDXQ( I ) ) + 60 CONTINUE +* + CALL SLAMRG( NL, NR, DSIGMA( 2 ), 1, 1, IDX( 2 ) ) +* + DO 70 I = 2, N + IDXI = 1 + IDX( I ) + D( I ) = DSIGMA( IDXI ) + Z( I ) = U2( IDXI, 1 ) + COLTYP( I ) = IDXC( IDXI ) + 70 CONTINUE +* +* Calculate the allowable deflation tolerance +* + EPS = SLAMCH( 'Epsilon' ) + TOL = MAX( ABS( ALPHA ), ABS( BETA ) ) + TOL = EIGHT*EPS*MAX( ABS( D( N ) ), TOL ) +* +* There are 2 kinds of deflation -- first a value in the z-vector +* is small, second two (or more) singular values are very close +* together (their difference is small). +* +* If the value in the z-vector is small, we simply permute the +* array so that the corresponding singular value is moved to the +* end. +* +* If two values in the D-vector are close, we perform a two-sided +* rotation designed to make one of the corresponding z-vector +* entries zero, and then permute the array so that the deflated +* singular value is moved to the end. +* +* If there are multiple singular values then the problem deflates. +* Here the number of equal singular values are found. As each equal +* singular value is found, an elementary reflector is computed to +* rotate the corresponding singular subspace so that the +* corresponding components of Z are zero in this new basis. +* + K = 1 + K2 = N + 1 + DO 80 J = 2, N + IF( ABS( Z( J ) ).LE.TOL ) THEN +* +* Deflate due to small z component. +* + K2 = K2 - 1 + IDXP( K2 ) = J + COLTYP( J ) = 4 + IF( J.EQ.N ) + $ GO TO 120 + ELSE + JPREV = J + GO TO 90 + END IF + 80 CONTINUE + 90 CONTINUE + J = JPREV + 100 CONTINUE + J = J + 1 + IF( J.GT.N ) + $ GO TO 110 + IF( ABS( Z( J ) ).LE.TOL ) THEN +* +* Deflate due to small z component. +* + K2 = K2 - 1 + IDXP( K2 ) = J + COLTYP( J ) = 4 + ELSE +* +* Check if singular values are close enough to allow deflation. +* + IF( ABS( D( J )-D( JPREV ) ).LE.TOL ) THEN +* +* Deflation is possible. +* + S = Z( JPREV ) + C = Z( J ) +* +* Find sqrt(a**2+b**2) without overflow or +* destructive underflow. +* + TAU = SLAPY2( C, S ) + C = C / TAU + S = -S / TAU + Z( J ) = TAU + Z( JPREV ) = ZERO +* +* Apply back the Givens rotation to the left and right +* singular vector matrices. +* + IDXJP = IDXQ( IDX( JPREV )+1 ) + IDXJ = IDXQ( IDX( J )+1 ) + IF( IDXJP.LE.NLP1 ) THEN + IDXJP = IDXJP - 1 + END IF + IF( IDXJ.LE.NLP1 ) THEN + IDXJ = IDXJ - 1 + END IF + CALL SROT( N, U( 1, IDXJP ), 1, U( 1, IDXJ ), 1, C, S ) + CALL SROT( M, VT( IDXJP, 1 ), LDVT, VT( IDXJ, 1 ), LDVT, C, + $ S ) + IF( COLTYP( J ).NE.COLTYP( JPREV ) ) THEN + COLTYP( J ) = 3 + END IF + COLTYP( JPREV ) = 4 + K2 = K2 - 1 + IDXP( K2 ) = JPREV + JPREV = J + ELSE + K = K + 1 + U2( K, 1 ) = Z( JPREV ) + DSIGMA( K ) = D( JPREV ) + IDXP( K ) = JPREV + JPREV = J + END IF + END IF + GO TO 100 + 110 CONTINUE +* +* Record the last singular value. +* + K = K + 1 + U2( K, 1 ) = Z( JPREV ) + DSIGMA( K ) = D( JPREV ) + IDXP( K ) = JPREV +* + 120 CONTINUE +* +* Count up the total number of the various types of columns, then +* form a permutation which positions the four column types into +* four groups of uniform structure (although one or more of these +* groups may be empty). +* + DO 130 J = 1, 4 + CTOT( J ) = 0 + 130 CONTINUE + DO 140 J = 2, N + CT = COLTYP( J ) + CTOT( CT ) = CTOT( CT ) + 1 + 140 CONTINUE +* +* PSM(*) = Position in SubMatrix (of types 1 through 4) +* + PSM( 1 ) = 2 + PSM( 2 ) = 2 + CTOT( 1 ) + PSM( 3 ) = PSM( 2 ) + CTOT( 2 ) + PSM( 4 ) = PSM( 3 ) + CTOT( 3 ) +* +* Fill out the IDXC array so that the permutation which it induces +* will place all type-1 columns first, all type-2 columns next, +* then all type-3's, and finally all type-4's, starting from the +* second column. This applies similarly to the rows of VT. +* + DO 150 J = 2, N + JP = IDXP( J ) + CT = COLTYP( JP ) + IDXC( PSM( CT ) ) = J + PSM( CT ) = PSM( CT ) + 1 + 150 CONTINUE +* +* Sort the singular values and corresponding singular vectors into +* DSIGMA, U2, and VT2 respectively. The singular values/vectors +* which were not deflated go into the first K slots of DSIGMA, U2, +* and VT2 respectively, while those which were deflated go into the +* last N - K slots, except that the first column/row will be treated +* separately. +* + DO 160 J = 2, N + JP = IDXP( J ) + DSIGMA( J ) = D( JP ) + IDXJ = IDXQ( IDX( IDXP( IDXC( J ) ) )+1 ) + IF( IDXJ.LE.NLP1 ) THEN + IDXJ = IDXJ - 1 + END IF + CALL SCOPY( N, U( 1, IDXJ ), 1, U2( 1, J ), 1 ) + CALL SCOPY( M, VT( IDXJ, 1 ), LDVT, VT2( J, 1 ), LDVT2 ) + 160 CONTINUE +* +* Determine DSIGMA(1), DSIGMA(2) and Z(1) +* + DSIGMA( 1 ) = ZERO + HLFTOL = TOL / TWO + IF( ABS( DSIGMA( 2 ) ).LE.HLFTOL ) + $ DSIGMA( 2 ) = HLFTOL + IF( M.GT.N ) THEN + Z( 1 ) = SLAPY2( Z1, Z( M ) ) + IF( Z( 1 ).LE.TOL ) THEN + C = ONE + S = ZERO + Z( 1 ) = TOL + ELSE + C = Z1 / Z( 1 ) + S = Z( M ) / Z( 1 ) + END IF + ELSE + IF( ABS( Z1 ).LE.TOL ) THEN + Z( 1 ) = TOL + ELSE + Z( 1 ) = Z1 + END IF + END IF +* +* Move the rest of the updating row to Z. +* + CALL SCOPY( K-1, U2( 2, 1 ), 1, Z( 2 ), 1 ) +* +* Determine the first column of U2, the first row of VT2 and the +* last row of VT. +* + CALL SLASET( 'A', N, 1, ZERO, ZERO, U2, LDU2 ) + U2( NLP1, 1 ) = ONE + IF( M.GT.N ) THEN + DO 170 I = 1, NLP1 + VT( M, I ) = -S*VT( NLP1, I ) + VT2( 1, I ) = C*VT( NLP1, I ) + 170 CONTINUE + DO 180 I = NLP2, M + VT2( 1, I ) = S*VT( M, I ) + VT( M, I ) = C*VT( M, I ) + 180 CONTINUE + ELSE + CALL SCOPY( M, VT( NLP1, 1 ), LDVT, VT2( 1, 1 ), LDVT2 ) + END IF + IF( M.GT.N ) THEN + CALL SCOPY( M, VT( M, 1 ), LDVT, VT2( M, 1 ), LDVT2 ) + END IF +* +* The deflated singular values and their corresponding vectors go +* into the back of D, U, and V respectively. +* + IF( N.GT.K ) THEN + CALL SCOPY( N-K, DSIGMA( K+1 ), 1, D( K+1 ), 1 ) + CALL SLACPY( 'A', N, N-K, U2( 1, K+1 ), LDU2, U( 1, K+1 ), + $ LDU ) + CALL SLACPY( 'A', N-K, M, VT2( K+1, 1 ), LDVT2, VT( K+1, 1 ), + $ LDVT ) + END IF +* +* Copy CTOT into COLTYP for referencing in SLASD3. +* + DO 190 J = 1, 4 + COLTYP( J ) = CTOT( J ) + 190 CONTINUE +* + RETURN +* +* End of SLASD2 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slasd3.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slasd3.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,358 @@ + SUBROUTINE SLASD3( NL, NR, SQRE, K, D, Q, LDQ, DSIGMA, U, LDU, U2, + $ LDU2, VT, LDVT, VT2, LDVT2, IDXC, CTOT, Z, + $ INFO ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDQ, LDU, LDU2, LDVT, LDVT2, NL, NR, + $ SQRE +* .. +* .. Array Arguments .. + INTEGER CTOT( * ), IDXC( * ) + REAL D( * ), DSIGMA( * ), Q( LDQ, * ), U( LDU, * ), + $ U2( LDU2, * ), VT( LDVT, * ), VT2( LDVT2, * ), + $ Z( * ) +* .. +* +* Purpose +* ======= +* +* SLASD3 finds all the square roots of the roots of the secular +* equation, as defined by the values in D and Z. It makes the +* appropriate calls to SLASD4 and then updates the singular +* vectors by matrix multiplication. +* +* This code makes very mild assumptions about floating point +* arithmetic. It will work on machines with a guard digit in +* add/subtract, or on those binary machines without guard digits +* which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. +* It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. +* +* SLASD3 is called from SLASD1. +* +* Arguments +* ========= +* +* NL (input) INTEGER +* The row dimension of the upper block. NL >= 1. +* +* NR (input) INTEGER +* The row dimension of the lower block. NR >= 1. +* +* SQRE (input) INTEGER +* = 0: the lower block is an NR-by-NR square matrix. +* = 1: the lower block is an NR-by-(NR+1) rectangular matrix. +* +* The bidiagonal matrix has N = NL + NR + 1 rows and +* M = N + SQRE >= N columns. +* +* K (input) INTEGER +* The size of the secular equation, 1 =< K = < N. +* +* D (output) REAL array, dimension(K) +* On exit the square roots of the roots of the secular equation, +* in ascending order. +* +* Q (workspace) REAL array, +* dimension at least (LDQ,K). +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= K. +* +* DSIGMA (input/output) REAL array, dimension(K) +* The first K elements of this array contain the old roots +* of the deflated updating problem. These are the poles +* of the secular equation. +* +* U (output) REAL array, dimension (LDU, N) +* The last N - K columns of this matrix contain the deflated +* left singular vectors. +* +* LDU (input) INTEGER +* The leading dimension of the array U. LDU >= N. +* +* U2 (input) REAL array, dimension (LDU2, N) +* The first K columns of this matrix contain the non-deflated +* left singular vectors for the split problem. +* +* LDU2 (input) INTEGER +* The leading dimension of the array U2. LDU2 >= N. +* +* VT (output) REAL array, dimension (LDVT, M) +* The last M - K columns of VT' contain the deflated +* right singular vectors. +* +* LDVT (input) INTEGER +* The leading dimension of the array VT. LDVT >= N. +* +* VT2 (input/output) REAL array, dimension (LDVT2, N) +* The first K columns of VT2' contain the non-deflated +* right singular vectors for the split problem. +* +* LDVT2 (input) INTEGER +* The leading dimension of the array VT2. LDVT2 >= N. +* +* IDXC (input) INTEGER array, dimension (N) +* The permutation used to arrange the columns of U (and rows of +* VT) into three groups: the first group contains non-zero +* entries only at and above (or before) NL +1; the second +* contains non-zero entries only at and below (or after) NL+2; +* and the third is dense. The first column of U and the row of +* VT are treated separately, however. +* +* The rows of the singular vectors found by SLASD4 +* must be likewise permuted before the matrix multiplies can +* take place. +* +* CTOT (input) INTEGER array, dimension (4) +* A count of the total number of the various types of columns +* in U (or rows in VT), as described in IDXC. The fourth column +* type is any column which has been deflated. +* +* Z (input/output) REAL array, dimension (K) +* The first K elements of this array contain the components +* of the deflation-adjusted updating row vector. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = 1, an singular value did not converge +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Huan Ren, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO, NEGONE + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0, + $ NEGONE = -1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER CTEMP, I, J, JC, KTEMP, M, N, NLP1, NLP2, NRP1 + REAL RHO, TEMP +* .. +* .. External Functions .. + REAL SLAMC3, SNRM2 + EXTERNAL SLAMC3, SNRM2 +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEMM, SLACPY, SLASCL, SLASD4, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( NL.LT.1 ) THEN + INFO = -1 + ELSE IF( NR.LT.1 ) THEN + INFO = -2 + ELSE IF( ( SQRE.NE.1 ) .AND. ( SQRE.NE.0 ) ) THEN + INFO = -3 + END IF +* + N = NL + NR + 1 + M = N + SQRE + NLP1 = NL + 1 + NLP2 = NL + 2 +* + IF( ( K.LT.1 ) .OR. ( K.GT.N ) ) THEN + INFO = -4 + ELSE IF( LDQ.LT.K ) THEN + INFO = -7 + ELSE IF( LDU.LT.N ) THEN + INFO = -10 + ELSE IF( LDU2.LT.N ) THEN + INFO = -12 + ELSE IF( LDVT.LT.M ) THEN + INFO = -14 + ELSE IF( LDVT2.LT.M ) THEN + INFO = -16 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLASD3', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( K.EQ.1 ) THEN + D( 1 ) = ABS( Z( 1 ) ) + CALL SCOPY( M, VT2( 1, 1 ), LDVT2, VT( 1, 1 ), LDVT ) + IF( Z( 1 ).GT.ZERO ) THEN + CALL SCOPY( N, U2( 1, 1 ), 1, U( 1, 1 ), 1 ) + ELSE + DO 10 I = 1, N + U( I, 1 ) = -U2( I, 1 ) + 10 CONTINUE + END IF + RETURN + END IF +* +* Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can +* be computed with high relative accuracy (barring over/underflow). +* This is a problem on machines without a guard digit in +* add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). +* The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I), +* which on any of these machines zeros out the bottommost +* bit of DSIGMA(I) if it is 1; this makes the subsequent +* subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation +* occurs. On binary machines with a guard digit (almost all +* machines) it does not change DSIGMA(I) at all. On hexadecimal +* and decimal machines with a guard digit, it slightly +* changes the bottommost bits of DSIGMA(I). It does not account +* for hexadecimal or decimal machines without guard digits +* (we know of none). We use a subroutine call to compute +* 2*DSIGMA(I) to prevent optimizing compilers from eliminating +* this code. +* + DO 20 I = 1, K + DSIGMA( I ) = SLAMC3( DSIGMA( I ), DSIGMA( I ) ) - DSIGMA( I ) + 20 CONTINUE +* +* Keep a copy of Z. +* + CALL SCOPY( K, Z, 1, Q, 1 ) +* +* Normalize Z. +* + RHO = SNRM2( K, Z, 1 ) + CALL SLASCL( 'G', 0, 0, RHO, ONE, K, 1, Z, K, INFO ) + RHO = RHO*RHO +* +* Find the new singular values. +* + DO 30 J = 1, K + CALL SLASD4( K, J, DSIGMA, Z, U( 1, J ), RHO, D( J ), + $ VT( 1, J ), INFO ) +* +* If the zero finder fails, the computation is terminated. +* + IF( INFO.NE.0 ) THEN + RETURN + END IF + 30 CONTINUE +* +* Compute updated Z. +* + DO 60 I = 1, K + Z( I ) = U( I, K )*VT( I, K ) + DO 40 J = 1, I - 1 + Z( I ) = Z( I )*( U( I, J )*VT( I, J ) / + $ ( DSIGMA( I )-DSIGMA( J ) ) / + $ ( DSIGMA( I )+DSIGMA( J ) ) ) + 40 CONTINUE + DO 50 J = I, K - 1 + Z( I ) = Z( I )*( U( I, J )*VT( I, J ) / + $ ( DSIGMA( I )-DSIGMA( J+1 ) ) / + $ ( DSIGMA( I )+DSIGMA( J+1 ) ) ) + 50 CONTINUE + Z( I ) = SIGN( SQRT( ABS( Z( I ) ) ), Q( I, 1 ) ) + 60 CONTINUE +* +* Compute left singular vectors of the modified diagonal matrix, +* and store related information for the right singular vectors. +* + DO 90 I = 1, K + VT( 1, I ) = Z( 1 ) / U( 1, I ) / VT( 1, I ) + U( 1, I ) = NEGONE + DO 70 J = 2, K + VT( J, I ) = Z( J ) / U( J, I ) / VT( J, I ) + U( J, I ) = DSIGMA( J )*VT( J, I ) + 70 CONTINUE + TEMP = SNRM2( K, U( 1, I ), 1 ) + Q( 1, I ) = U( 1, I ) / TEMP + DO 80 J = 2, K + JC = IDXC( J ) + Q( J, I ) = U( JC, I ) / TEMP + 80 CONTINUE + 90 CONTINUE +* +* Update the left singular vector matrix. +* + IF( K.EQ.2 ) THEN + CALL SGEMM( 'N', 'N', N, K, K, ONE, U2, LDU2, Q, LDQ, ZERO, U, + $ LDU ) + GO TO 100 + END IF + IF( CTOT( 1 ).GT.0 ) THEN + CALL SGEMM( 'N', 'N', NL, K, CTOT( 1 ), ONE, U2( 1, 2 ), LDU2, + $ Q( 2, 1 ), LDQ, ZERO, U( 1, 1 ), LDU ) + IF( CTOT( 3 ).GT.0 ) THEN + KTEMP = 2 + CTOT( 1 ) + CTOT( 2 ) + CALL SGEMM( 'N', 'N', NL, K, CTOT( 3 ), ONE, U2( 1, KTEMP ), + $ LDU2, Q( KTEMP, 1 ), LDQ, ONE, U( 1, 1 ), LDU ) + END IF + ELSE IF( CTOT( 3 ).GT.0 ) THEN + KTEMP = 2 + CTOT( 1 ) + CTOT( 2 ) + CALL SGEMM( 'N', 'N', NL, K, CTOT( 3 ), ONE, U2( 1, KTEMP ), + $ LDU2, Q( KTEMP, 1 ), LDQ, ZERO, U( 1, 1 ), LDU ) + ELSE + CALL SLACPY( 'F', NL, K, U2, LDU2, U, LDU ) + END IF + CALL SCOPY( K, Q( 1, 1 ), LDQ, U( NLP1, 1 ), LDU ) + KTEMP = 2 + CTOT( 1 ) + CTEMP = CTOT( 2 ) + CTOT( 3 ) + CALL SGEMM( 'N', 'N', NR, K, CTEMP, ONE, U2( NLP2, KTEMP ), LDU2, + $ Q( KTEMP, 1 ), LDQ, ZERO, U( NLP2, 1 ), LDU ) +* +* Generate the right singular vectors. +* + 100 CONTINUE + DO 120 I = 1, K + TEMP = SNRM2( K, VT( 1, I ), 1 ) + Q( I, 1 ) = VT( 1, I ) / TEMP + DO 110 J = 2, K + JC = IDXC( J ) + Q( I, J ) = VT( JC, I ) / TEMP + 110 CONTINUE + 120 CONTINUE +* +* Update the right singular vector matrix. +* + IF( K.EQ.2 ) THEN + CALL SGEMM( 'N', 'N', K, M, K, ONE, Q, LDQ, VT2, LDVT2, ZERO, + $ VT, LDVT ) + RETURN + END IF + KTEMP = 1 + CTOT( 1 ) + CALL SGEMM( 'N', 'N', K, NLP1, KTEMP, ONE, Q( 1, 1 ), LDQ, + $ VT2( 1, 1 ), LDVT2, ZERO, VT( 1, 1 ), LDVT ) + KTEMP = 2 + CTOT( 1 ) + CTOT( 2 ) + IF( KTEMP.LE.LDVT2 ) + $ CALL SGEMM( 'N', 'N', K, NLP1, CTOT( 3 ), ONE, Q( 1, KTEMP ), + $ LDQ, VT2( KTEMP, 1 ), LDVT2, ONE, VT( 1, 1 ), + $ LDVT ) +* + KTEMP = CTOT( 1 ) + 1 + NRP1 = NR + SQRE + IF( KTEMP.GT.1 ) THEN + DO 130 I = 1, K + Q( I, KTEMP ) = Q( I, 1 ) + 130 CONTINUE + DO 140 I = NLP2, M + VT2( KTEMP, I ) = VT2( 1, I ) + 140 CONTINUE + END IF + CTEMP = 1 + CTOT( 2 ) + CTOT( 3 ) + CALL SGEMM( 'N', 'N', K, NRP1, CTEMP, ONE, Q( 1, KTEMP ), LDQ, + $ VT2( KTEMP, NLP2 ), LDVT2, ZERO, VT( 1, NLP2 ), LDVT ) +* + RETURN +* +* End of SLASD3 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slasd4.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slasd4.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,890 @@ + SUBROUTINE SLASD4( N, I, D, Z, DELTA, RHO, SIGMA, WORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER I, INFO, N + REAL RHO, SIGMA +* .. +* .. Array Arguments .. + REAL D( * ), DELTA( * ), WORK( * ), Z( * ) +* .. +* +* Purpose +* ======= +* +* This subroutine computes the square root of the I-th updated +* eigenvalue of a positive symmetric rank-one modification to +* a positive diagonal matrix whose entries are given as the squares +* of the corresponding entries in the array d, and that +* +* 0 <= D(i) < D(j) for i < j +* +* and that RHO > 0. This is arranged by the calling routine, and is +* no loss in generality. The rank-one modified system is thus +* +* diag( D ) * diag( D ) + RHO * Z * Z_transpose. +* +* where we assume the Euclidean norm of Z is 1. +* +* The method consists of approximating the rational functions in the +* secular equation by simpler interpolating rational functions. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The length of all arrays. +* +* I (input) INTEGER +* The index of the eigenvalue to be computed. 1 <= I <= N. +* +* D (input) REAL array, dimension ( N ) +* The original eigenvalues. It is assumed that they are in +* order, 0 <= D(I) < D(J) for I < J. +* +* Z (input) REAL array, dimension (N) +* The components of the updating vector. +* +* DELTA (output) REAL array, dimension (N) +* If N .ne. 1, DELTA contains (D(j) - sigma_I) in its j-th +* component. If N = 1, then DELTA(1) = 1. The vector DELTA +* contains the information necessary to construct the +* (singular) eigenvectors. +* +* RHO (input) REAL +* The scalar in the symmetric updating formula. +* +* SIGMA (output) REAL +* The computed sigma_I, the I-th updated eigenvalue. +* +* WORK (workspace) REAL array, dimension (N) +* If N .ne. 1, WORK contains (D(j) + sigma_I) in its j-th +* component. If N = 1, then WORK( 1 ) = 1. +* +* INFO (output) INTEGER +* = 0: successful exit +* > 0: if INFO = 1, the updating process failed. +* +* Internal Parameters +* =================== +* +* Logical variable ORGATI (origin-at-i?) is used for distinguishing +* whether D(i) or D(i+1) is treated as the origin. +* +* ORGATI = .true. origin at i +* ORGATI = .false. origin at i+1 +* +* Logical variable SWTCH3 (switch-for-3-poles?) is for noting +* if we are working with THREE poles! +* +* MAXIT is the maximum number of iterations allowed for each +* eigenvalue. +* +* Further Details +* =============== +* +* Based on contributions by +* Ren-Cang Li, Computer Science Division, University of California +* at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + INTEGER MAXIT + PARAMETER ( MAXIT = 20 ) + REAL ZERO, ONE, TWO, THREE, FOUR, EIGHT, TEN + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0, + $ THREE = 3.0E+0, FOUR = 4.0E+0, EIGHT = 8.0E+0, + $ TEN = 10.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL ORGATI, SWTCH, SWTCH3 + INTEGER II, IIM1, IIP1, IP1, ITER, J, NITER + REAL A, B, C, DELSQ, DELSQ2, DPHI, DPSI, DTIIM, + $ DTIIP, DTIPSQ, DTISQ, DTNSQ, DTNSQ1, DW, EPS, + $ ERRETM, ETA, PHI, PREW, PSI, RHOINV, SG2LB, + $ SG2UB, TAU, TEMP, TEMP1, TEMP2, W +* .. +* .. Local Arrays .. + REAL DD( 3 ), ZZ( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL SLAED6, SLASD5 +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Since this routine is called in an inner loop, we do no argument +* checking. +* +* Quick return for N=1 and 2. +* + INFO = 0 + IF( N.EQ.1 ) THEN +* +* Presumably, I=1 upon entry +* + SIGMA = SQRT( D( 1 )*D( 1 )+RHO*Z( 1 )*Z( 1 ) ) + DELTA( 1 ) = ONE + WORK( 1 ) = ONE + RETURN + END IF + IF( N.EQ.2 ) THEN + CALL SLASD5( I, D, Z, DELTA, RHO, SIGMA, WORK ) + RETURN + END IF +* +* Compute machine epsilon +* + EPS = SLAMCH( 'Epsilon' ) + RHOINV = ONE / RHO +* +* The case I = N +* + IF( I.EQ.N ) THEN +* +* Initialize some basic variables +* + II = N - 1 + NITER = 1 +* +* Calculate initial guess +* + TEMP = RHO / TWO +* +* If ||Z||_2 is not one, then TEMP should be set to +* RHO * ||Z||_2^2 / TWO +* + TEMP1 = TEMP / ( D( N )+SQRT( D( N )*D( N )+TEMP ) ) + DO 10 J = 1, N + WORK( J ) = D( J ) + D( N ) + TEMP1 + DELTA( J ) = ( D( J )-D( N ) ) - TEMP1 + 10 CONTINUE +* + PSI = ZERO + DO 20 J = 1, N - 2 + PSI = PSI + Z( J )*Z( J ) / ( DELTA( J )*WORK( J ) ) + 20 CONTINUE +* + C = RHOINV + PSI + W = C + Z( II )*Z( II ) / ( DELTA( II )*WORK( II ) ) + + $ Z( N )*Z( N ) / ( DELTA( N )*WORK( N ) ) +* + IF( W.LE.ZERO ) THEN + TEMP1 = SQRT( D( N )*D( N )+RHO ) + TEMP = Z( N-1 )*Z( N-1 ) / ( ( D( N-1 )+TEMP1 )* + $ ( D( N )-D( N-1 )+RHO / ( D( N )+TEMP1 ) ) ) + + $ Z( N )*Z( N ) / RHO +* +* The following TAU is to approximate +* SIGMA_n^2 - D( N )*D( N ) +* + IF( C.LE.TEMP ) THEN + TAU = RHO + ELSE + DELSQ = ( D( N )-D( N-1 ) )*( D( N )+D( N-1 ) ) + A = -C*DELSQ + Z( N-1 )*Z( N-1 ) + Z( N )*Z( N ) + B = Z( N )*Z( N )*DELSQ + IF( A.LT.ZERO ) THEN + TAU = TWO*B / ( SQRT( A*A+FOUR*B*C )-A ) + ELSE + TAU = ( A+SQRT( A*A+FOUR*B*C ) ) / ( TWO*C ) + END IF + END IF +* +* It can be proved that +* D(N)^2+RHO/2 <= SIGMA_n^2 < D(N)^2+TAU <= D(N)^2+RHO +* + ELSE + DELSQ = ( D( N )-D( N-1 ) )*( D( N )+D( N-1 ) ) + A = -C*DELSQ + Z( N-1 )*Z( N-1 ) + Z( N )*Z( N ) + B = Z( N )*Z( N )*DELSQ +* +* The following TAU is to approximate +* SIGMA_n^2 - D( N )*D( N ) +* + IF( A.LT.ZERO ) THEN + TAU = TWO*B / ( SQRT( A*A+FOUR*B*C )-A ) + ELSE + TAU = ( A+SQRT( A*A+FOUR*B*C ) ) / ( TWO*C ) + END IF +* +* It can be proved that +* D(N)^2 < D(N)^2+TAU < SIGMA(N)^2 < D(N)^2+RHO/2 +* + END IF +* +* The following ETA is to approximate SIGMA_n - D( N ) +* + ETA = TAU / ( D( N )+SQRT( D( N )*D( N )+TAU ) ) +* + SIGMA = D( N ) + ETA + DO 30 J = 1, N + DELTA( J ) = ( D( J )-D( I ) ) - ETA + WORK( J ) = D( J ) + D( I ) + ETA + 30 CONTINUE +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 40 J = 1, II + TEMP = Z( J ) / ( DELTA( J )*WORK( J ) ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 40 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + TEMP = Z( N ) / ( DELTA( N )*WORK( N ) ) + PHI = Z( N )*TEMP + DPHI = TEMP*TEMP + ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV + + $ ABS( TAU )*( DPSI+DPHI ) +* + W = RHOINV + PHI + PSI +* +* Test for convergence +* + IF( ABS( W ).LE.EPS*ERRETM ) THEN + GO TO 240 + END IF +* +* Calculate the new step +* + NITER = NITER + 1 + DTNSQ1 = WORK( N-1 )*DELTA( N-1 ) + DTNSQ = WORK( N )*DELTA( N ) + C = W - DTNSQ1*DPSI - DTNSQ*DPHI + A = ( DTNSQ+DTNSQ1 )*W - DTNSQ*DTNSQ1*( DPSI+DPHI ) + B = DTNSQ*DTNSQ1*W + IF( C.LT.ZERO ) + $ C = ABS( C ) + IF( C.EQ.ZERO ) THEN + ETA = RHO - SIGMA*SIGMA + ELSE IF( A.GE.ZERO ) THEN + ETA = ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + ETA = TWO*B / ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF +* +* Note, eta should be positive if w is negative, and +* eta should be negative otherwise. However, +* if for some reason caused by roundoff, eta*w > 0, +* we simply use one Newton step instead. This way +* will guarantee eta*w < 0. +* + IF( W*ETA.GT.ZERO ) + $ ETA = -W / ( DPSI+DPHI ) + TEMP = ETA - DTNSQ + IF( TEMP.GT.RHO ) + $ ETA = RHO + DTNSQ +* + TAU = TAU + ETA + ETA = ETA / ( SIGMA+SQRT( ETA+SIGMA*SIGMA ) ) + DO 50 J = 1, N + DELTA( J ) = DELTA( J ) - ETA + WORK( J ) = WORK( J ) + ETA + 50 CONTINUE +* + SIGMA = SIGMA + ETA +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 60 J = 1, II + TEMP = Z( J ) / ( WORK( J )*DELTA( J ) ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 60 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + TEMP = Z( N ) / ( WORK( N )*DELTA( N ) ) + PHI = Z( N )*TEMP + DPHI = TEMP*TEMP + ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV + + $ ABS( TAU )*( DPSI+DPHI ) +* + W = RHOINV + PHI + PSI +* +* Main loop to update the values of the array DELTA +* + ITER = NITER + 1 +* + DO 90 NITER = ITER, MAXIT +* +* Test for convergence +* + IF( ABS( W ).LE.EPS*ERRETM ) THEN + GO TO 240 + END IF +* +* Calculate the new step +* + DTNSQ1 = WORK( N-1 )*DELTA( N-1 ) + DTNSQ = WORK( N )*DELTA( N ) + C = W - DTNSQ1*DPSI - DTNSQ*DPHI + A = ( DTNSQ+DTNSQ1 )*W - DTNSQ1*DTNSQ*( DPSI+DPHI ) + B = DTNSQ1*DTNSQ*W + IF( A.GE.ZERO ) THEN + ETA = ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + ETA = TWO*B / ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF +* +* Note, eta should be positive if w is negative, and +* eta should be negative otherwise. However, +* if for some reason caused by roundoff, eta*w > 0, +* we simply use one Newton step instead. This way +* will guarantee eta*w < 0. +* + IF( W*ETA.GT.ZERO ) + $ ETA = -W / ( DPSI+DPHI ) + TEMP = ETA - DTNSQ + IF( TEMP.LE.ZERO ) + $ ETA = ETA / TWO +* + TAU = TAU + ETA + ETA = ETA / ( SIGMA+SQRT( ETA+SIGMA*SIGMA ) ) + DO 70 J = 1, N + DELTA( J ) = DELTA( J ) - ETA + WORK( J ) = WORK( J ) + ETA + 70 CONTINUE +* + SIGMA = SIGMA + ETA +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 80 J = 1, II + TEMP = Z( J ) / ( WORK( J )*DELTA( J ) ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 80 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + TEMP = Z( N ) / ( WORK( N )*DELTA( N ) ) + PHI = Z( N )*TEMP + DPHI = TEMP*TEMP + ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV + + $ ABS( TAU )*( DPSI+DPHI ) +* + W = RHOINV + PHI + PSI + 90 CONTINUE +* +* Return with INFO = 1, NITER = MAXIT and not converged +* + INFO = 1 + GO TO 240 +* +* End for the case I = N +* + ELSE +* +* The case for I < N +* + NITER = 1 + IP1 = I + 1 +* +* Calculate initial guess +* + DELSQ = ( D( IP1 )-D( I ) )*( D( IP1 )+D( I ) ) + DELSQ2 = DELSQ / TWO + TEMP = DELSQ2 / ( D( I )+SQRT( D( I )*D( I )+DELSQ2 ) ) + DO 100 J = 1, N + WORK( J ) = D( J ) + D( I ) + TEMP + DELTA( J ) = ( D( J )-D( I ) ) - TEMP + 100 CONTINUE +* + PSI = ZERO + DO 110 J = 1, I - 1 + PSI = PSI + Z( J )*Z( J ) / ( WORK( J )*DELTA( J ) ) + 110 CONTINUE +* + PHI = ZERO + DO 120 J = N, I + 2, -1 + PHI = PHI + Z( J )*Z( J ) / ( WORK( J )*DELTA( J ) ) + 120 CONTINUE + C = RHOINV + PSI + PHI + W = C + Z( I )*Z( I ) / ( WORK( I )*DELTA( I ) ) + + $ Z( IP1 )*Z( IP1 ) / ( WORK( IP1 )*DELTA( IP1 ) ) +* + IF( W.GT.ZERO ) THEN +* +* d(i)^2 < the ith sigma^2 < (d(i)^2+d(i+1)^2)/2 +* +* We choose d(i) as origin. +* + ORGATI = .TRUE. + SG2LB = ZERO + SG2UB = DELSQ2 + A = C*DELSQ + Z( I )*Z( I ) + Z( IP1 )*Z( IP1 ) + B = Z( I )*Z( I )*DELSQ + IF( A.GT.ZERO ) THEN + TAU = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) + ELSE + TAU = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + END IF +* +* TAU now is an estimation of SIGMA^2 - D( I )^2. The +* following, however, is the corresponding estimation of +* SIGMA - D( I ). +* + ETA = TAU / ( D( I )+SQRT( D( I )*D( I )+TAU ) ) + ELSE +* +* (d(i)^2+d(i+1)^2)/2 <= the ith sigma^2 < d(i+1)^2/2 +* +* We choose d(i+1) as origin. +* + ORGATI = .FALSE. + SG2LB = -DELSQ2 + SG2UB = ZERO + A = C*DELSQ - Z( I )*Z( I ) - Z( IP1 )*Z( IP1 ) + B = Z( IP1 )*Z( IP1 )*DELSQ + IF( A.LT.ZERO ) THEN + TAU = TWO*B / ( A-SQRT( ABS( A*A+FOUR*B*C ) ) ) + ELSE + TAU = -( A+SQRT( ABS( A*A+FOUR*B*C ) ) ) / ( TWO*C ) + END IF +* +* TAU now is an estimation of SIGMA^2 - D( IP1 )^2. The +* following, however, is the corresponding estimation of +* SIGMA - D( IP1 ). +* + ETA = TAU / ( D( IP1 )+SQRT( ABS( D( IP1 )*D( IP1 )+ + $ TAU ) ) ) + END IF +* + IF( ORGATI ) THEN + II = I + SIGMA = D( I ) + ETA + DO 130 J = 1, N + WORK( J ) = D( J ) + D( I ) + ETA + DELTA( J ) = ( D( J )-D( I ) ) - ETA + 130 CONTINUE + ELSE + II = I + 1 + SIGMA = D( IP1 ) + ETA + DO 140 J = 1, N + WORK( J ) = D( J ) + D( IP1 ) + ETA + DELTA( J ) = ( D( J )-D( IP1 ) ) - ETA + 140 CONTINUE + END IF + IIM1 = II - 1 + IIP1 = II + 1 +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 150 J = 1, IIM1 + TEMP = Z( J ) / ( WORK( J )*DELTA( J ) ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 150 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + DPHI = ZERO + PHI = ZERO + DO 160 J = N, IIP1, -1 + TEMP = Z( J ) / ( WORK( J )*DELTA( J ) ) + PHI = PHI + Z( J )*TEMP + DPHI = DPHI + TEMP*TEMP + ERRETM = ERRETM + PHI + 160 CONTINUE +* + W = RHOINV + PHI + PSI +* +* W is the value of the secular function with +* its ii-th element removed. +* + SWTCH3 = .FALSE. + IF( ORGATI ) THEN + IF( W.LT.ZERO ) + $ SWTCH3 = .TRUE. + ELSE + IF( W.GT.ZERO ) + $ SWTCH3 = .TRUE. + END IF + IF( II.EQ.1 .OR. II.EQ.N ) + $ SWTCH3 = .FALSE. +* + TEMP = Z( II ) / ( WORK( II )*DELTA( II ) ) + DW = DPSI + DPHI + TEMP*TEMP + TEMP = Z( II )*TEMP + W = W + TEMP + ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV + + $ THREE*ABS( TEMP ) + ABS( TAU )*DW +* +* Test for convergence +* + IF( ABS( W ).LE.EPS*ERRETM ) THEN + GO TO 240 + END IF +* + IF( W.LE.ZERO ) THEN + SG2LB = MAX( SG2LB, TAU ) + ELSE + SG2UB = MIN( SG2UB, TAU ) + END IF +* +* Calculate the new step +* + NITER = NITER + 1 + IF( .NOT.SWTCH3 ) THEN + DTIPSQ = WORK( IP1 )*DELTA( IP1 ) + DTISQ = WORK( I )*DELTA( I ) + IF( ORGATI ) THEN + C = W - DTIPSQ*DW + DELSQ*( Z( I ) / DTISQ )**2 + ELSE + C = W - DTISQ*DW - DELSQ*( Z( IP1 ) / DTIPSQ )**2 + END IF + A = ( DTIPSQ+DTISQ )*W - DTIPSQ*DTISQ*DW + B = DTIPSQ*DTISQ*W + IF( C.EQ.ZERO ) THEN + IF( A.EQ.ZERO ) THEN + IF( ORGATI ) THEN + A = Z( I )*Z( I ) + DTIPSQ*DTIPSQ*( DPSI+DPHI ) + ELSE + A = Z( IP1 )*Z( IP1 ) + DTISQ*DTISQ*( DPSI+DPHI ) + END IF + END IF + ETA = B / A + ELSE IF( A.LE.ZERO ) THEN + ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF + ELSE +* +* Interpolation using THREE most relevant poles +* + DTIIM = WORK( IIM1 )*DELTA( IIM1 ) + DTIIP = WORK( IIP1 )*DELTA( IIP1 ) + TEMP = RHOINV + PSI + PHI + IF( ORGATI ) THEN + TEMP1 = Z( IIM1 ) / DTIIM + TEMP1 = TEMP1*TEMP1 + C = ( TEMP - DTIIP*( DPSI+DPHI ) ) - + $ ( D( IIM1 )-D( IIP1 ) )*( D( IIM1 )+D( IIP1 ) )*TEMP1 + ZZ( 1 ) = Z( IIM1 )*Z( IIM1 ) + IF( DPSI.LT.TEMP1 ) THEN + ZZ( 3 ) = DTIIP*DTIIP*DPHI + ELSE + ZZ( 3 ) = DTIIP*DTIIP*( ( DPSI-TEMP1 )+DPHI ) + END IF + ELSE + TEMP1 = Z( IIP1 ) / DTIIP + TEMP1 = TEMP1*TEMP1 + C = ( TEMP - DTIIM*( DPSI+DPHI ) ) - + $ ( D( IIP1 )-D( IIM1 ) )*( D( IIM1 )+D( IIP1 ) )*TEMP1 + IF( DPHI.LT.TEMP1 ) THEN + ZZ( 1 ) = DTIIM*DTIIM*DPSI + ELSE + ZZ( 1 ) = DTIIM*DTIIM*( DPSI+( DPHI-TEMP1 ) ) + END IF + ZZ( 3 ) = Z( IIP1 )*Z( IIP1 ) + END IF + ZZ( 2 ) = Z( II )*Z( II ) + DD( 1 ) = DTIIM + DD( 2 ) = DELTA( II )*WORK( II ) + DD( 3 ) = DTIIP + CALL SLAED6( NITER, ORGATI, C, DD, ZZ, W, ETA, INFO ) + IF( INFO.NE.0 ) + $ GO TO 240 + END IF +* +* Note, eta should be positive if w is negative, and +* eta should be negative otherwise. However, +* if for some reason caused by roundoff, eta*w > 0, +* we simply use one Newton step instead. This way +* will guarantee eta*w < 0. +* + IF( W*ETA.GE.ZERO ) + $ ETA = -W / DW + IF( ORGATI ) THEN + TEMP1 = WORK( I )*DELTA( I ) + TEMP = ETA - TEMP1 + ELSE + TEMP1 = WORK( IP1 )*DELTA( IP1 ) + TEMP = ETA - TEMP1 + END IF + IF( TEMP.GT.SG2UB .OR. TEMP.LT.SG2LB ) THEN + IF( W.LT.ZERO ) THEN + ETA = ( SG2UB-TAU ) / TWO + ELSE + ETA = ( SG2LB-TAU ) / TWO + END IF + END IF +* + TAU = TAU + ETA + ETA = ETA / ( SIGMA+SQRT( SIGMA*SIGMA+ETA ) ) +* + PREW = W +* + SIGMA = SIGMA + ETA + DO 170 J = 1, N + WORK( J ) = WORK( J ) + ETA + DELTA( J ) = DELTA( J ) - ETA + 170 CONTINUE +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 180 J = 1, IIM1 + TEMP = Z( J ) / ( WORK( J )*DELTA( J ) ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 180 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + DPHI = ZERO + PHI = ZERO + DO 190 J = N, IIP1, -1 + TEMP = Z( J ) / ( WORK( J )*DELTA( J ) ) + PHI = PHI + Z( J )*TEMP + DPHI = DPHI + TEMP*TEMP + ERRETM = ERRETM + PHI + 190 CONTINUE +* + TEMP = Z( II ) / ( WORK( II )*DELTA( II ) ) + DW = DPSI + DPHI + TEMP*TEMP + TEMP = Z( II )*TEMP + W = RHOINV + PHI + PSI + TEMP + ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV + + $ THREE*ABS( TEMP ) + ABS( TAU )*DW +* + IF( W.LE.ZERO ) THEN + SG2LB = MAX( SG2LB, TAU ) + ELSE + SG2UB = MIN( SG2UB, TAU ) + END IF +* + SWTCH = .FALSE. + IF( ORGATI ) THEN + IF( -W.GT.ABS( PREW ) / TEN ) + $ SWTCH = .TRUE. + ELSE + IF( W.GT.ABS( PREW ) / TEN ) + $ SWTCH = .TRUE. + END IF +* +* Main loop to update the values of the array DELTA and WORK +* + ITER = NITER + 1 +* + DO 230 NITER = ITER, MAXIT +* +* Test for convergence +* + IF( ABS( W ).LE.EPS*ERRETM ) THEN + GO TO 240 + END IF +* +* Calculate the new step +* + IF( .NOT.SWTCH3 ) THEN + DTIPSQ = WORK( IP1 )*DELTA( IP1 ) + DTISQ = WORK( I )*DELTA( I ) + IF( .NOT.SWTCH ) THEN + IF( ORGATI ) THEN + C = W - DTIPSQ*DW + DELSQ*( Z( I ) / DTISQ )**2 + ELSE + C = W - DTISQ*DW - DELSQ*( Z( IP1 ) / DTIPSQ )**2 + END IF + ELSE + TEMP = Z( II ) / ( WORK( II )*DELTA( II ) ) + IF( ORGATI ) THEN + DPSI = DPSI + TEMP*TEMP + ELSE + DPHI = DPHI + TEMP*TEMP + END IF + C = W - DTISQ*DPSI - DTIPSQ*DPHI + END IF + A = ( DTIPSQ+DTISQ )*W - DTIPSQ*DTISQ*DW + B = DTIPSQ*DTISQ*W + IF( C.EQ.ZERO ) THEN + IF( A.EQ.ZERO ) THEN + IF( .NOT.SWTCH ) THEN + IF( ORGATI ) THEN + A = Z( I )*Z( I ) + DTIPSQ*DTIPSQ* + $ ( DPSI+DPHI ) + ELSE + A = Z( IP1 )*Z( IP1 ) + + $ DTISQ*DTISQ*( DPSI+DPHI ) + END IF + ELSE + A = DTISQ*DTISQ*DPSI + DTIPSQ*DTIPSQ*DPHI + END IF + END IF + ETA = B / A + ELSE IF( A.LE.ZERO ) THEN + ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF + ELSE +* +* Interpolation using THREE most relevant poles +* + DTIIM = WORK( IIM1 )*DELTA( IIM1 ) + DTIIP = WORK( IIP1 )*DELTA( IIP1 ) + TEMP = RHOINV + PSI + PHI + IF( SWTCH ) THEN + C = TEMP - DTIIM*DPSI - DTIIP*DPHI + ZZ( 1 ) = DTIIM*DTIIM*DPSI + ZZ( 3 ) = DTIIP*DTIIP*DPHI + ELSE + IF( ORGATI ) THEN + TEMP1 = Z( IIM1 ) / DTIIM + TEMP1 = TEMP1*TEMP1 + TEMP2 = ( D( IIM1 )-D( IIP1 ) )* + $ ( D( IIM1 )+D( IIP1 ) )*TEMP1 + C = TEMP - DTIIP*( DPSI+DPHI ) - TEMP2 + ZZ( 1 ) = Z( IIM1 )*Z( IIM1 ) + IF( DPSI.LT.TEMP1 ) THEN + ZZ( 3 ) = DTIIP*DTIIP*DPHI + ELSE + ZZ( 3 ) = DTIIP*DTIIP*( ( DPSI-TEMP1 )+DPHI ) + END IF + ELSE + TEMP1 = Z( IIP1 ) / DTIIP + TEMP1 = TEMP1*TEMP1 + TEMP2 = ( D( IIP1 )-D( IIM1 ) )* + $ ( D( IIM1 )+D( IIP1 ) )*TEMP1 + C = TEMP - DTIIM*( DPSI+DPHI ) - TEMP2 + IF( DPHI.LT.TEMP1 ) THEN + ZZ( 1 ) = DTIIM*DTIIM*DPSI + ELSE + ZZ( 1 ) = DTIIM*DTIIM*( DPSI+( DPHI-TEMP1 ) ) + END IF + ZZ( 3 ) = Z( IIP1 )*Z( IIP1 ) + END IF + END IF + DD( 1 ) = DTIIM + DD( 2 ) = DELTA( II )*WORK( II ) + DD( 3 ) = DTIIP + CALL SLAED6( NITER, ORGATI, C, DD, ZZ, W, ETA, INFO ) + IF( INFO.NE.0 ) + $ GO TO 240 + END IF +* +* Note, eta should be positive if w is negative, and +* eta should be negative otherwise. However, +* if for some reason caused by roundoff, eta*w > 0, +* we simply use one Newton step instead. This way +* will guarantee eta*w < 0. +* + IF( W*ETA.GE.ZERO ) + $ ETA = -W / DW + IF( ORGATI ) THEN + TEMP1 = WORK( I )*DELTA( I ) + TEMP = ETA - TEMP1 + ELSE + TEMP1 = WORK( IP1 )*DELTA( IP1 ) + TEMP = ETA - TEMP1 + END IF + IF( TEMP.GT.SG2UB .OR. TEMP.LT.SG2LB ) THEN + IF( W.LT.ZERO ) THEN + ETA = ( SG2UB-TAU ) / TWO + ELSE + ETA = ( SG2LB-TAU ) / TWO + END IF + END IF +* + TAU = TAU + ETA + ETA = ETA / ( SIGMA+SQRT( SIGMA*SIGMA+ETA ) ) +* + SIGMA = SIGMA + ETA + DO 200 J = 1, N + WORK( J ) = WORK( J ) + ETA + DELTA( J ) = DELTA( J ) - ETA + 200 CONTINUE +* + PREW = W +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 210 J = 1, IIM1 + TEMP = Z( J ) / ( WORK( J )*DELTA( J ) ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 210 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + DPHI = ZERO + PHI = ZERO + DO 220 J = N, IIP1, -1 + TEMP = Z( J ) / ( WORK( J )*DELTA( J ) ) + PHI = PHI + Z( J )*TEMP + DPHI = DPHI + TEMP*TEMP + ERRETM = ERRETM + PHI + 220 CONTINUE +* + TEMP = Z( II ) / ( WORK( II )*DELTA( II ) ) + DW = DPSI + DPHI + TEMP*TEMP + TEMP = Z( II )*TEMP + W = RHOINV + PHI + PSI + TEMP + ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV + + $ THREE*ABS( TEMP ) + ABS( TAU )*DW + IF( W*PREW.GT.ZERO .AND. ABS( W ).GT.ABS( PREW ) / TEN ) + $ SWTCH = .NOT.SWTCH +* + IF( W.LE.ZERO ) THEN + SG2LB = MAX( SG2LB, TAU ) + ELSE + SG2UB = MIN( SG2UB, TAU ) + END IF +* + 230 CONTINUE +* +* Return with INFO = 1, NITER = MAXIT and not converged +* + INFO = 1 +* + END IF +* + 240 CONTINUE + RETURN +* +* End of SLASD4 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slasd5.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slasd5.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,163 @@ + SUBROUTINE SLASD5( I, D, Z, DELTA, RHO, DSIGMA, WORK ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER I + REAL DSIGMA, RHO +* .. +* .. Array Arguments .. + REAL D( 2 ), DELTA( 2 ), WORK( 2 ), Z( 2 ) +* .. +* +* Purpose +* ======= +* +* This subroutine computes the square root of the I-th eigenvalue +* of a positive symmetric rank-one modification of a 2-by-2 diagonal +* matrix +* +* diag( D ) * diag( D ) + RHO * Z * transpose(Z) . +* +* The diagonal entries in the array D are assumed to satisfy +* +* 0 <= D(i) < D(j) for i < j . +* +* We also assume RHO > 0 and that the Euclidean norm of the vector +* Z is one. +* +* Arguments +* ========= +* +* I (input) INTEGER +* The index of the eigenvalue to be computed. I = 1 or I = 2. +* +* D (input) REAL array, dimension (2) +* The original eigenvalues. We assume 0 <= D(1) < D(2). +* +* Z (input) REAL array, dimension (2) +* The components of the updating vector. +* +* DELTA (output) REAL array, dimension (2) +* Contains (D(j) - sigma_I) in its j-th component. +* The vector DELTA contains the information necessary +* to construct the eigenvectors. +* +* RHO (input) REAL +* The scalar in the symmetric updating formula. +* +* DSIGMA (output) REAL +* The computed sigma_I, the I-th updated eigenvalue. +* +* WORK (workspace) REAL array, dimension (2) +* WORK contains (D(j) + sigma_I) in its j-th component. +* +* Further Details +* =============== +* +* Based on contributions by +* Ren-Cang Li, Computer Science Division, University of California +* at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO, THREE, FOUR + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0, + $ THREE = 3.0E+0, FOUR = 4.0E+0 ) +* .. +* .. Local Scalars .. + REAL B, C, DEL, DELSQ, TAU, W +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. +* + DEL = D( 2 ) - D( 1 ) + DELSQ = DEL*( D( 2 )+D( 1 ) ) + IF( I.EQ.1 ) THEN + W = ONE + FOUR*RHO*( Z( 2 )*Z( 2 ) / ( D( 1 )+THREE*D( 2 ) )- + $ Z( 1 )*Z( 1 ) / ( THREE*D( 1 )+D( 2 ) ) ) / DEL + IF( W.GT.ZERO ) THEN + B = DELSQ + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) ) + C = RHO*Z( 1 )*Z( 1 )*DELSQ +* +* B > ZERO, always +* +* The following TAU is DSIGMA * DSIGMA - D( 1 ) * D( 1 ) +* + TAU = TWO*C / ( B+SQRT( ABS( B*B-FOUR*C ) ) ) +* +* The following TAU is DSIGMA - D( 1 ) +* + TAU = TAU / ( D( 1 )+SQRT( D( 1 )*D( 1 )+TAU ) ) + DSIGMA = D( 1 ) + TAU + DELTA( 1 ) = -TAU + DELTA( 2 ) = DEL - TAU + WORK( 1 ) = TWO*D( 1 ) + TAU + WORK( 2 ) = ( D( 1 )+TAU ) + D( 2 ) +* DELTA( 1 ) = -Z( 1 ) / TAU +* DELTA( 2 ) = Z( 2 ) / ( DEL-TAU ) + ELSE + B = -DELSQ + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) ) + C = RHO*Z( 2 )*Z( 2 )*DELSQ +* +* The following TAU is DSIGMA * DSIGMA - D( 2 ) * D( 2 ) +* + IF( B.GT.ZERO ) THEN + TAU = -TWO*C / ( B+SQRT( B*B+FOUR*C ) ) + ELSE + TAU = ( B-SQRT( B*B+FOUR*C ) ) / TWO + END IF +* +* The following TAU is DSIGMA - D( 2 ) +* + TAU = TAU / ( D( 2 )+SQRT( ABS( D( 2 )*D( 2 )+TAU ) ) ) + DSIGMA = D( 2 ) + TAU + DELTA( 1 ) = -( DEL+TAU ) + DELTA( 2 ) = -TAU + WORK( 1 ) = D( 1 ) + TAU + D( 2 ) + WORK( 2 ) = TWO*D( 2 ) + TAU +* DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU ) +* DELTA( 2 ) = -Z( 2 ) / TAU + END IF +* TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) ) +* DELTA( 1 ) = DELTA( 1 ) / TEMP +* DELTA( 2 ) = DELTA( 2 ) / TEMP + ELSE +* +* Now I=2 +* + B = -DELSQ + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) ) + C = RHO*Z( 2 )*Z( 2 )*DELSQ +* +* The following TAU is DSIGMA * DSIGMA - D( 2 ) * D( 2 ) +* + IF( B.GT.ZERO ) THEN + TAU = ( B+SQRT( B*B+FOUR*C ) ) / TWO + ELSE + TAU = TWO*C / ( -B+SQRT( B*B+FOUR*C ) ) + END IF +* +* The following TAU is DSIGMA - D( 2 ) +* + TAU = TAU / ( D( 2 )+SQRT( D( 2 )*D( 2 )+TAU ) ) + DSIGMA = D( 2 ) + TAU + DELTA( 1 ) = -( DEL+TAU ) + DELTA( 2 ) = -TAU + WORK( 1 ) = D( 1 ) + TAU + D( 2 ) + WORK( 2 ) = TWO*D( 2 ) + TAU +* DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU ) +* DELTA( 2 ) = -Z( 2 ) / TAU +* TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) ) +* DELTA( 1 ) = DELTA( 1 ) / TEMP +* DELTA( 2 ) = DELTA( 2 ) / TEMP + END IF + RETURN +* +* End of SLASD5 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slasd6.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slasd6.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,305 @@ + SUBROUTINE SLASD6( ICOMPQ, NL, NR, SQRE, D, VF, VL, ALPHA, BETA, + $ IDXQ, PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, + $ LDGNUM, POLES, DIFL, DIFR, Z, K, C, S, WORK, + $ IWORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER GIVPTR, ICOMPQ, INFO, K, LDGCOL, LDGNUM, NL, + $ NR, SQRE + REAL ALPHA, BETA, C, S +* .. +* .. Array Arguments .. + INTEGER GIVCOL( LDGCOL, * ), IDXQ( * ), IWORK( * ), + $ PERM( * ) + REAL D( * ), DIFL( * ), DIFR( * ), + $ GIVNUM( LDGNUM, * ), POLES( LDGNUM, * ), + $ VF( * ), VL( * ), WORK( * ), Z( * ) +* .. +* +* Purpose +* ======= +* +* SLASD6 computes the SVD of an updated upper bidiagonal matrix B +* obtained by merging two smaller ones by appending a row. This +* routine is used only for the problem which requires all singular +* values and optionally singular vector matrices in factored form. +* B is an N-by-M matrix with N = NL + NR + 1 and M = N + SQRE. +* A related subroutine, SLASD1, handles the case in which all singular +* values and singular vectors of the bidiagonal matrix are desired. +* +* SLASD6 computes the SVD as follows: +* +* ( D1(in) 0 0 0 ) +* B = U(in) * ( Z1' a Z2' b ) * VT(in) +* ( 0 0 D2(in) 0 ) +* +* = U(out) * ( D(out) 0) * VT(out) +* +* where Z' = (Z1' a Z2' b) = u' VT', and u is a vector of dimension M +* with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros +* elsewhere; and the entry b is empty if SQRE = 0. +* +* The singular values of B can be computed using D1, D2, the first +* components of all the right singular vectors of the lower block, and +* the last components of all the right singular vectors of the upper +* block. These components are stored and updated in VF and VL, +* respectively, in SLASD6. Hence U and VT are not explicitly +* referenced. +* +* The singular values are stored in D. The algorithm consists of two +* stages: +* +* The first stage consists of deflating the size of the problem +* when there are multiple singular values or if there is a zero +* in the Z vector. For each such occurence the dimension of the +* secular equation problem is reduced by one. This stage is +* performed by the routine SLASD7. +* +* The second stage consists of calculating the updated +* singular values. This is done by finding the roots of the +* secular equation via the routine SLASD4 (as called by SLASD8). +* This routine also updates VF and VL and computes the distances +* between the updated singular values and the old singular +* values. +* +* SLASD6 is called from SLASDA. +* +* Arguments +* ========= +* +* ICOMPQ (input) INTEGER +* Specifies whether singular vectors are to be computed in +* factored form: +* = 0: Compute singular values only. +* = 1: Compute singular vectors in factored form as well. +* +* NL (input) INTEGER +* The row dimension of the upper block. NL >= 1. +* +* NR (input) INTEGER +* The row dimension of the lower block. NR >= 1. +* +* SQRE (input) INTEGER +* = 0: the lower block is an NR-by-NR square matrix. +* = 1: the lower block is an NR-by-(NR+1) rectangular matrix. +* +* The bidiagonal matrix has row dimension N = NL + NR + 1, +* and column dimension M = N + SQRE. +* +* D (input/output) REAL array, dimension (NL+NR+1). +* On entry D(1:NL,1:NL) contains the singular values of the +* upper block, and D(NL+2:N) contains the singular values +* of the lower block. On exit D(1:N) contains the singular +* values of the modified matrix. +* +* VF (input/output) REAL array, dimension (M) +* On entry, VF(1:NL+1) contains the first components of all +* right singular vectors of the upper block; and VF(NL+2:M) +* contains the first components of all right singular vectors +* of the lower block. On exit, VF contains the first components +* of all right singular vectors of the bidiagonal matrix. +* +* VL (input/output) REAL array, dimension (M) +* On entry, VL(1:NL+1) contains the last components of all +* right singular vectors of the upper block; and VL(NL+2:M) +* contains the last components of all right singular vectors of +* the lower block. On exit, VL contains the last components of +* all right singular vectors of the bidiagonal matrix. +* +* ALPHA (input/output) REAL +* Contains the diagonal element associated with the added row. +* +* BETA (input/output) REAL +* Contains the off-diagonal element associated with the added +* row. +* +* IDXQ (output) INTEGER array, dimension (N) +* This contains the permutation which will reintegrate the +* subproblem just solved back into sorted order, i.e. +* D( IDXQ( I = 1, N ) ) will be in ascending order. +* +* PERM (output) INTEGER array, dimension ( N ) +* The permutations (from deflation and sorting) to be applied +* to each block. Not referenced if ICOMPQ = 0. +* +* GIVPTR (output) INTEGER +* The number of Givens rotations which took place in this +* subproblem. Not referenced if ICOMPQ = 0. +* +* GIVCOL (output) INTEGER array, dimension ( LDGCOL, 2 ) +* Each pair of numbers indicates a pair of columns to take place +* in a Givens rotation. Not referenced if ICOMPQ = 0. +* +* LDGCOL (input) INTEGER +* leading dimension of GIVCOL, must be at least N. +* +* GIVNUM (output) REAL array, dimension ( LDGNUM, 2 ) +* Each number indicates the C or S value to be used in the +* corresponding Givens rotation. Not referenced if ICOMPQ = 0. +* +* LDGNUM (input) INTEGER +* The leading dimension of GIVNUM and POLES, must be at least N. +* +* POLES (output) REAL array, dimension ( LDGNUM, 2 ) +* On exit, POLES(1,*) is an array containing the new singular +* values obtained from solving the secular equation, and +* POLES(2,*) is an array containing the poles in the secular +* equation. Not referenced if ICOMPQ = 0. +* +* DIFL (output) REAL array, dimension ( N ) +* On exit, DIFL(I) is the distance between I-th updated +* (undeflated) singular value and the I-th (undeflated) old +* singular value. +* +* DIFR (output) REAL array, +* dimension ( LDGNUM, 2 ) if ICOMPQ = 1 and +* dimension ( N ) if ICOMPQ = 0. +* On exit, DIFR(I, 1) is the distance between I-th updated +* (undeflated) singular value and the I+1-th (undeflated) old +* singular value. +* +* If ICOMPQ = 1, DIFR(1:K,2) is an array containing the +* normalizing factors for the right singular vector matrix. +* +* See SLASD8 for details on DIFL and DIFR. +* +* Z (output) REAL array, dimension ( M ) +* The first elements of this array contain the components +* of the deflation-adjusted updating row vector. +* +* K (output) INTEGER +* Contains the dimension of the non-deflated matrix, +* This is the order of the related secular equation. 1 <= K <=N. +* +* C (output) REAL +* C contains garbage if SQRE =0 and the C-value of a Givens +* rotation related to the right null space if SQRE = 1. +* +* S (output) REAL +* S contains garbage if SQRE =0 and the S-value of a Givens +* rotation related to the right null space if SQRE = 1. +* +* WORK (workspace) REAL array, dimension ( 4 * M ) +* +* IWORK (workspace) INTEGER array, dimension ( 3 * N ) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = 1, an singular value did not converge +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Huan Ren, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IDX, IDXC, IDXP, ISIGMA, IVFW, IVLW, IW, M, + $ N, N1, N2 + REAL ORGNRM +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLAMRG, SLASCL, SLASD7, SLASD8, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + N = NL + NR + 1 + M = N + SQRE +* + IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN + INFO = -1 + ELSE IF( NL.LT.1 ) THEN + INFO = -2 + ELSE IF( NR.LT.1 ) THEN + INFO = -3 + ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN + INFO = -4 + ELSE IF( LDGCOL.LT.N ) THEN + INFO = -14 + ELSE IF( LDGNUM.LT.N ) THEN + INFO = -16 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLASD6', -INFO ) + RETURN + END IF +* +* The following values are for bookkeeping purposes only. They are +* integer pointers which indicate the portion of the workspace +* used by a particular array in SLASD7 and SLASD8. +* + ISIGMA = 1 + IW = ISIGMA + N + IVFW = IW + M + IVLW = IVFW + M +* + IDX = 1 + IDXC = IDX + N + IDXP = IDXC + N +* +* Scale. +* + ORGNRM = MAX( ABS( ALPHA ), ABS( BETA ) ) + D( NL+1 ) = ZERO + DO 10 I = 1, N + IF( ABS( D( I ) ).GT.ORGNRM ) THEN + ORGNRM = ABS( D( I ) ) + END IF + 10 CONTINUE + CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, N, 1, D, N, INFO ) + ALPHA = ALPHA / ORGNRM + BETA = BETA / ORGNRM +* +* Sort and Deflate singular values. +* + CALL SLASD7( ICOMPQ, NL, NR, SQRE, K, D, Z, WORK( IW ), VF, + $ WORK( IVFW ), VL, WORK( IVLW ), ALPHA, BETA, + $ WORK( ISIGMA ), IWORK( IDX ), IWORK( IDXP ), IDXQ, + $ PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, LDGNUM, C, S, + $ INFO ) +* +* Solve Secular Equation, compute DIFL, DIFR, and update VF, VL. +* + CALL SLASD8( ICOMPQ, K, D, Z, VF, VL, DIFL, DIFR, LDGNUM, + $ WORK( ISIGMA ), WORK( IW ), INFO ) +* +* Save the poles if ICOMPQ = 1. +* + IF( ICOMPQ.EQ.1 ) THEN + CALL SCOPY( K, D, 1, POLES( 1, 1 ), 1 ) + CALL SCOPY( K, WORK( ISIGMA ), 1, POLES( 1, 2 ), 1 ) + END IF +* +* Unscale. +* + CALL SLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO ) +* +* Prepare the IDXQ sorting permutation. +* + N1 = K + N2 = N - K + CALL SLAMRG( N1, N2, D, 1, -1, IDXQ ) +* + RETURN +* +* End of SLASD6 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slasd7.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slasd7.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,444 @@ + SUBROUTINE SLASD7( ICOMPQ, NL, NR, SQRE, K, D, Z, ZW, VF, VFW, VL, + $ VLW, ALPHA, BETA, DSIGMA, IDX, IDXP, IDXQ, + $ PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, LDGNUM, + $ C, S, INFO ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER GIVPTR, ICOMPQ, INFO, K, LDGCOL, LDGNUM, NL, + $ NR, SQRE + REAL ALPHA, BETA, C, S +* .. +* .. Array Arguments .. + INTEGER GIVCOL( LDGCOL, * ), IDX( * ), IDXP( * ), + $ IDXQ( * ), PERM( * ) + REAL D( * ), DSIGMA( * ), GIVNUM( LDGNUM, * ), + $ VF( * ), VFW( * ), VL( * ), VLW( * ), Z( * ), + $ ZW( * ) +* .. +* +* Purpose +* ======= +* +* SLASD7 merges the two sets of singular values together into a single +* sorted set. Then it tries to deflate the size of the problem. There +* are two ways in which deflation can occur: when two or more singular +* values are close together or if there is a tiny entry in the Z +* vector. For each such occurrence the order of the related +* secular equation problem is reduced by one. +* +* SLASD7 is called from SLASD6. +* +* Arguments +* ========= +* +* ICOMPQ (input) INTEGER +* Specifies whether singular vectors are to be computed +* in compact form, as follows: +* = 0: Compute singular values only. +* = 1: Compute singular vectors of upper +* bidiagonal matrix in compact form. +* +* NL (input) INTEGER +* The row dimension of the upper block. NL >= 1. +* +* NR (input) INTEGER +* The row dimension of the lower block. NR >= 1. +* +* SQRE (input) INTEGER +* = 0: the lower block is an NR-by-NR square matrix. +* = 1: the lower block is an NR-by-(NR+1) rectangular matrix. +* +* The bidiagonal matrix has +* N = NL + NR + 1 rows and +* M = N + SQRE >= N columns. +* +* K (output) INTEGER +* Contains the dimension of the non-deflated matrix, this is +* the order of the related secular equation. 1 <= K <=N. +* +* D (input/output) REAL array, dimension ( N ) +* On entry D contains the singular values of the two submatrices +* to be combined. On exit D contains the trailing (N-K) updated +* singular values (those which were deflated) sorted into +* increasing order. +* +* Z (output) REAL array, dimension ( M ) +* On exit Z contains the updating row vector in the secular +* equation. +* +* ZW (workspace) REAL array, dimension ( M ) +* Workspace for Z. +* +* VF (input/output) REAL array, dimension ( M ) +* On entry, VF(1:NL+1) contains the first components of all +* right singular vectors of the upper block; and VF(NL+2:M) +* contains the first components of all right singular vectors +* of the lower block. On exit, VF contains the first components +* of all right singular vectors of the bidiagonal matrix. +* +* VFW (workspace) REAL array, dimension ( M ) +* Workspace for VF. +* +* VL (input/output) REAL array, dimension ( M ) +* On entry, VL(1:NL+1) contains the last components of all +* right singular vectors of the upper block; and VL(NL+2:M) +* contains the last components of all right singular vectors +* of the lower block. On exit, VL contains the last components +* of all right singular vectors of the bidiagonal matrix. +* +* VLW (workspace) REAL array, dimension ( M ) +* Workspace for VL. +* +* ALPHA (input) REAL +* Contains the diagonal element associated with the added row. +* +* BETA (input) REAL +* Contains the off-diagonal element associated with the added +* row. +* +* DSIGMA (output) REAL array, dimension ( N ) +* Contains a copy of the diagonal elements (K-1 singular values +* and one zero) in the secular equation. +* +* IDX (workspace) INTEGER array, dimension ( N ) +* This will contain the permutation used to sort the contents of +* D into ascending order. +* +* IDXP (workspace) INTEGER array, dimension ( N ) +* This will contain the permutation used to place deflated +* values of D at the end of the array. On output IDXP(2:K) +* points to the nondeflated D-values and IDXP(K+1:N) +* points to the deflated singular values. +* +* IDXQ (input) INTEGER array, dimension ( N ) +* This contains the permutation which separately sorts the two +* sub-problems in D into ascending order. Note that entries in +* the first half of this permutation must first be moved one +* position backward; and entries in the second half +* must first have NL+1 added to their values. +* +* PERM (output) INTEGER array, dimension ( N ) +* The permutations (from deflation and sorting) to be applied +* to each singular block. Not referenced if ICOMPQ = 0. +* +* GIVPTR (output) INTEGER +* The number of Givens rotations which took place in this +* subproblem. Not referenced if ICOMPQ = 0. +* +* GIVCOL (output) INTEGER array, dimension ( LDGCOL, 2 ) +* Each pair of numbers indicates a pair of columns to take place +* in a Givens rotation. Not referenced if ICOMPQ = 0. +* +* LDGCOL (input) INTEGER +* The leading dimension of GIVCOL, must be at least N. +* +* GIVNUM (output) REAL array, dimension ( LDGNUM, 2 ) +* Each number indicates the C or S value to be used in the +* corresponding Givens rotation. Not referenced if ICOMPQ = 0. +* +* LDGNUM (input) INTEGER +* The leading dimension of GIVNUM, must be at least N. +* +* C (output) REAL +* C contains garbage if SQRE =0 and the C-value of a Givens +* rotation related to the right null space if SQRE = 1. +* +* S (output) REAL +* S contains garbage if SQRE =0 and the S-value of a Givens +* rotation related to the right null space if SQRE = 1. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Huan Ren, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO, EIGHT + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0, + $ EIGHT = 8.0E+0 ) +* .. +* .. Local Scalars .. +* + INTEGER I, IDXI, IDXJ, IDXJP, J, JP, JPREV, K2, M, N, + $ NLP1, NLP2 + REAL EPS, HLFTOL, TAU, TOL, Z1 +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLAMRG, SROT, XERBLA +* .. +* .. External Functions .. + REAL SLAMCH, SLAPY2 + EXTERNAL SLAMCH, SLAPY2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + N = NL + NR + 1 + M = N + SQRE +* + IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN + INFO = -1 + ELSE IF( NL.LT.1 ) THEN + INFO = -2 + ELSE IF( NR.LT.1 ) THEN + INFO = -3 + ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN + INFO = -4 + ELSE IF( LDGCOL.LT.N ) THEN + INFO = -22 + ELSE IF( LDGNUM.LT.N ) THEN + INFO = -24 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLASD7', -INFO ) + RETURN + END IF +* + NLP1 = NL + 1 + NLP2 = NL + 2 + IF( ICOMPQ.EQ.1 ) THEN + GIVPTR = 0 + END IF +* +* Generate the first part of the vector Z and move the singular +* values in the first part of D one position backward. +* + Z1 = ALPHA*VL( NLP1 ) + VL( NLP1 ) = ZERO + TAU = VF( NLP1 ) + DO 10 I = NL, 1, -1 + Z( I+1 ) = ALPHA*VL( I ) + VL( I ) = ZERO + VF( I+1 ) = VF( I ) + D( I+1 ) = D( I ) + IDXQ( I+1 ) = IDXQ( I ) + 1 + 10 CONTINUE + VF( 1 ) = TAU +* +* Generate the second part of the vector Z. +* + DO 20 I = NLP2, M + Z( I ) = BETA*VF( I ) + VF( I ) = ZERO + 20 CONTINUE +* +* Sort the singular values into increasing order +* + DO 30 I = NLP2, N + IDXQ( I ) = IDXQ( I ) + NLP1 + 30 CONTINUE +* +* DSIGMA, IDXC, IDXC, and ZW are used as storage space. +* + DO 40 I = 2, N + DSIGMA( I ) = D( IDXQ( I ) ) + ZW( I ) = Z( IDXQ( I ) ) + VFW( I ) = VF( IDXQ( I ) ) + VLW( I ) = VL( IDXQ( I ) ) + 40 CONTINUE +* + CALL SLAMRG( NL, NR, DSIGMA( 2 ), 1, 1, IDX( 2 ) ) +* + DO 50 I = 2, N + IDXI = 1 + IDX( I ) + D( I ) = DSIGMA( IDXI ) + Z( I ) = ZW( IDXI ) + VF( I ) = VFW( IDXI ) + VL( I ) = VLW( IDXI ) + 50 CONTINUE +* +* Calculate the allowable deflation tolerence +* + EPS = SLAMCH( 'Epsilon' ) + TOL = MAX( ABS( ALPHA ), ABS( BETA ) ) + TOL = EIGHT*EIGHT*EPS*MAX( ABS( D( N ) ), TOL ) +* +* There are 2 kinds of deflation -- first a value in the z-vector +* is small, second two (or more) singular values are very close +* together (their difference is small). +* +* If the value in the z-vector is small, we simply permute the +* array so that the corresponding singular value is moved to the +* end. +* +* If two values in the D-vector are close, we perform a two-sided +* rotation designed to make one of the corresponding z-vector +* entries zero, and then permute the array so that the deflated +* singular value is moved to the end. +* +* If there are multiple singular values then the problem deflates. +* Here the number of equal singular values are found. As each equal +* singular value is found, an elementary reflector is computed to +* rotate the corresponding singular subspace so that the +* corresponding components of Z are zero in this new basis. +* + K = 1 + K2 = N + 1 + DO 60 J = 2, N + IF( ABS( Z( J ) ).LE.TOL ) THEN +* +* Deflate due to small z component. +* + K2 = K2 - 1 + IDXP( K2 ) = J + IF( J.EQ.N ) + $ GO TO 100 + ELSE + JPREV = J + GO TO 70 + END IF + 60 CONTINUE + 70 CONTINUE + J = JPREV + 80 CONTINUE + J = J + 1 + IF( J.GT.N ) + $ GO TO 90 + IF( ABS( Z( J ) ).LE.TOL ) THEN +* +* Deflate due to small z component. +* + K2 = K2 - 1 + IDXP( K2 ) = J + ELSE +* +* Check if singular values are close enough to allow deflation. +* + IF( ABS( D( J )-D( JPREV ) ).LE.TOL ) THEN +* +* Deflation is possible. +* + S = Z( JPREV ) + C = Z( J ) +* +* Find sqrt(a**2+b**2) without overflow or +* destructive underflow. +* + TAU = SLAPY2( C, S ) + Z( J ) = TAU + Z( JPREV ) = ZERO + C = C / TAU + S = -S / TAU +* +* Record the appropriate Givens rotation +* + IF( ICOMPQ.EQ.1 ) THEN + GIVPTR = GIVPTR + 1 + IDXJP = IDXQ( IDX( JPREV )+1 ) + IDXJ = IDXQ( IDX( J )+1 ) + IF( IDXJP.LE.NLP1 ) THEN + IDXJP = IDXJP - 1 + END IF + IF( IDXJ.LE.NLP1 ) THEN + IDXJ = IDXJ - 1 + END IF + GIVCOL( GIVPTR, 2 ) = IDXJP + GIVCOL( GIVPTR, 1 ) = IDXJ + GIVNUM( GIVPTR, 2 ) = C + GIVNUM( GIVPTR, 1 ) = S + END IF + CALL SROT( 1, VF( JPREV ), 1, VF( J ), 1, C, S ) + CALL SROT( 1, VL( JPREV ), 1, VL( J ), 1, C, S ) + K2 = K2 - 1 + IDXP( K2 ) = JPREV + JPREV = J + ELSE + K = K + 1 + ZW( K ) = Z( JPREV ) + DSIGMA( K ) = D( JPREV ) + IDXP( K ) = JPREV + JPREV = J + END IF + END IF + GO TO 80 + 90 CONTINUE +* +* Record the last singular value. +* + K = K + 1 + ZW( K ) = Z( JPREV ) + DSIGMA( K ) = D( JPREV ) + IDXP( K ) = JPREV +* + 100 CONTINUE +* +* Sort the singular values into DSIGMA. The singular values which +* were not deflated go into the first K slots of DSIGMA, except +* that DSIGMA(1) is treated separately. +* + DO 110 J = 2, N + JP = IDXP( J ) + DSIGMA( J ) = D( JP ) + VFW( J ) = VF( JP ) + VLW( J ) = VL( JP ) + 110 CONTINUE + IF( ICOMPQ.EQ.1 ) THEN + DO 120 J = 2, N + JP = IDXP( J ) + PERM( J ) = IDXQ( IDX( JP )+1 ) + IF( PERM( J ).LE.NLP1 ) THEN + PERM( J ) = PERM( J ) - 1 + END IF + 120 CONTINUE + END IF +* +* The deflated singular values go back into the last N - K slots of +* D. +* + CALL SCOPY( N-K, DSIGMA( K+1 ), 1, D( K+1 ), 1 ) +* +* Determine DSIGMA(1), DSIGMA(2), Z(1), VF(1), VL(1), VF(M), and +* VL(M). +* + DSIGMA( 1 ) = ZERO + HLFTOL = TOL / TWO + IF( ABS( DSIGMA( 2 ) ).LE.HLFTOL ) + $ DSIGMA( 2 ) = HLFTOL + IF( M.GT.N ) THEN + Z( 1 ) = SLAPY2( Z1, Z( M ) ) + IF( Z( 1 ).LE.TOL ) THEN + C = ONE + S = ZERO + Z( 1 ) = TOL + ELSE + C = Z1 / Z( 1 ) + S = -Z( M ) / Z( 1 ) + END IF + CALL SROT( 1, VF( M ), 1, VF( 1 ), 1, C, S ) + CALL SROT( 1, VL( M ), 1, VL( 1 ), 1, C, S ) + ELSE + IF( ABS( Z1 ).LE.TOL ) THEN + Z( 1 ) = TOL + ELSE + Z( 1 ) = Z1 + END IF + END IF +* +* Restore Z, VF, and VL. +* + CALL SCOPY( K-1, ZW( 2 ), 1, Z( 2 ), 1 ) + CALL SCOPY( N-1, VFW( 2 ), 1, VF( 2 ), 1 ) + CALL SCOPY( N-1, VLW( 2 ), 1, VL( 2 ), 1 ) +* + RETURN +* +* End of SLASD7 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slasd8.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slasd8.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,253 @@ + SUBROUTINE SLASD8( ICOMPQ, K, D, Z, VF, VL, DIFL, DIFR, LDDIFR, + $ DSIGMA, WORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER ICOMPQ, INFO, K, LDDIFR +* .. +* .. Array Arguments .. + REAL D( * ), DIFL( * ), DIFR( LDDIFR, * ), + $ DSIGMA( * ), VF( * ), VL( * ), WORK( * ), + $ Z( * ) +* .. +* +* Purpose +* ======= +* +* SLASD8 finds the square roots of the roots of the secular equation, +* as defined by the values in DSIGMA and Z. It makes the appropriate +* calls to SLASD4, and stores, for each element in D, the distance +* to its two nearest poles (elements in DSIGMA). It also updates +* the arrays VF and VL, the first and last components of all the +* right singular vectors of the original bidiagonal matrix. +* +* SLASD8 is called from SLASD6. +* +* Arguments +* ========= +* +* ICOMPQ (input) INTEGER +* Specifies whether singular vectors are to be computed in +* factored form in the calling routine: +* = 0: Compute singular values only. +* = 1: Compute singular vectors in factored form as well. +* +* K (input) INTEGER +* The number of terms in the rational function to be solved +* by SLASD4. K >= 1. +* +* D (output) REAL array, dimension ( K ) +* On output, D contains the updated singular values. +* +* Z (input) REAL array, dimension ( K ) +* The first K elements of this array contain the components +* of the deflation-adjusted updating row vector. +* +* VF (input/output) REAL array, dimension ( K ) +* On entry, VF contains information passed through DBEDE8. +* On exit, VF contains the first K components of the first +* components of all right singular vectors of the bidiagonal +* matrix. +* +* VL (input/output) REAL array, dimension ( K ) +* On entry, VL contains information passed through DBEDE8. +* On exit, VL contains the first K components of the last +* components of all right singular vectors of the bidiagonal +* matrix. +* +* DIFL (output) REAL array, dimension ( K ) +* On exit, DIFL(I) = D(I) - DSIGMA(I). +* +* DIFR (output) REAL array, +* dimension ( LDDIFR, 2 ) if ICOMPQ = 1 and +* dimension ( K ) if ICOMPQ = 0. +* On exit, DIFR(I,1) = D(I) - DSIGMA(I+1), DIFR(K,1) is not +* defined and will not be referenced. +* +* If ICOMPQ = 1, DIFR(1:K,2) is an array containing the +* normalizing factors for the right singular vector matrix. +* +* LDDIFR (input) INTEGER +* The leading dimension of DIFR, must be at least K. +* +* DSIGMA (input) REAL array, dimension ( K ) +* The first K elements of this array contain the old roots +* of the deflated updating problem. These are the poles +* of the secular equation. +* +* WORK (workspace) REAL array, dimension at least 3 * K +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = 1, an singular value did not converge +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Huan Ren, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IWK1, IWK2, IWK2I, IWK3, IWK3I, J + REAL DIFLJ, DIFRJ, DJ, DSIGJ, DSIGJP, RHO, TEMP +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLASCL, SLASD4, SLASET, XERBLA +* .. +* .. External Functions .. + REAL SDOT, SLAMC3, SNRM2 + EXTERNAL SDOT, SLAMC3, SNRM2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN + INFO = -1 + ELSE IF( K.LT.1 ) THEN + INFO = -2 + ELSE IF( LDDIFR.LT.K ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLASD8', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( K.EQ.1 ) THEN + D( 1 ) = ABS( Z( 1 ) ) + DIFL( 1 ) = D( 1 ) + IF( ICOMPQ.EQ.1 ) THEN + DIFL( 2 ) = ONE + DIFR( 1, 2 ) = ONE + END IF + RETURN + END IF +* +* Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can +* be computed with high relative accuracy (barring over/underflow). +* This is a problem on machines without a guard digit in +* add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). +* The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I), +* which on any of these machines zeros out the bottommost +* bit of DSIGMA(I) if it is 1; this makes the subsequent +* subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation +* occurs. On binary machines with a guard digit (almost all +* machines) it does not change DSIGMA(I) at all. On hexadecimal +* and decimal machines with a guard digit, it slightly +* changes the bottommost bits of DSIGMA(I). It does not account +* for hexadecimal or decimal machines without guard digits +* (we know of none). We use a subroutine call to compute +* 2*DSIGMA(I) to prevent optimizing compilers from eliminating +* this code. +* + DO 10 I = 1, K + DSIGMA( I ) = SLAMC3( DSIGMA( I ), DSIGMA( I ) ) - DSIGMA( I ) + 10 CONTINUE +* +* Book keeping. +* + IWK1 = 1 + IWK2 = IWK1 + K + IWK3 = IWK2 + K + IWK2I = IWK2 - 1 + IWK3I = IWK3 - 1 +* +* Normalize Z. +* + RHO = SNRM2( K, Z, 1 ) + CALL SLASCL( 'G', 0, 0, RHO, ONE, K, 1, Z, K, INFO ) + RHO = RHO*RHO +* +* Initialize WORK(IWK3). +* + CALL SLASET( 'A', K, 1, ONE, ONE, WORK( IWK3 ), K ) +* +* Compute the updated singular values, the arrays DIFL, DIFR, +* and the updated Z. +* + DO 40 J = 1, K + CALL SLASD4( K, J, DSIGMA, Z, WORK( IWK1 ), RHO, D( J ), + $ WORK( IWK2 ), INFO ) +* +* If the root finder fails, the computation is terminated. +* + IF( INFO.NE.0 ) THEN + RETURN + END IF + WORK( IWK3I+J ) = WORK( IWK3I+J )*WORK( J )*WORK( IWK2I+J ) + DIFL( J ) = -WORK( J ) + DIFR( J, 1 ) = -WORK( J+1 ) + DO 20 I = 1, J - 1 + WORK( IWK3I+I ) = WORK( IWK3I+I )*WORK( I )* + $ WORK( IWK2I+I ) / ( DSIGMA( I )- + $ DSIGMA( J ) ) / ( DSIGMA( I )+ + $ DSIGMA( J ) ) + 20 CONTINUE + DO 30 I = J + 1, K + WORK( IWK3I+I ) = WORK( IWK3I+I )*WORK( I )* + $ WORK( IWK2I+I ) / ( DSIGMA( I )- + $ DSIGMA( J ) ) / ( DSIGMA( I )+ + $ DSIGMA( J ) ) + 30 CONTINUE + 40 CONTINUE +* +* Compute updated Z. +* + DO 50 I = 1, K + Z( I ) = SIGN( SQRT( ABS( WORK( IWK3I+I ) ) ), Z( I ) ) + 50 CONTINUE +* +* Update VF and VL. +* + DO 80 J = 1, K + DIFLJ = DIFL( J ) + DJ = D( J ) + DSIGJ = -DSIGMA( J ) + IF( J.LT.K ) THEN + DIFRJ = -DIFR( J, 1 ) + DSIGJP = -DSIGMA( J+1 ) + END IF + WORK( J ) = -Z( J ) / DIFLJ / ( DSIGMA( J )+DJ ) + DO 60 I = 1, J - 1 + WORK( I ) = Z( I ) / ( SLAMC3( DSIGMA( I ), DSIGJ )-DIFLJ ) + $ / ( DSIGMA( I )+DJ ) + 60 CONTINUE + DO 70 I = J + 1, K + WORK( I ) = Z( I ) / ( SLAMC3( DSIGMA( I ), DSIGJP )+DIFRJ ) + $ / ( DSIGMA( I )+DJ ) + 70 CONTINUE + TEMP = SNRM2( K, WORK, 1 ) + WORK( IWK2I+J ) = SDOT( K, WORK, 1, VF, 1 ) / TEMP + WORK( IWK3I+J ) = SDOT( K, WORK, 1, VL, 1 ) / TEMP + IF( ICOMPQ.EQ.1 ) THEN + DIFR( J, 2 ) = TEMP + END IF + 80 CONTINUE +* + CALL SCOPY( K, WORK( IWK2 ), 1, VF, 1 ) + CALL SCOPY( K, WORK( IWK3 ), 1, VL, 1 ) +* + RETURN +* +* End of SLASD8 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slasda.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slasda.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,389 @@ + SUBROUTINE SLASDA( ICOMPQ, SMLSIZ, N, SQRE, D, E, U, LDU, VT, K, + $ DIFL, DIFR, Z, POLES, GIVPTR, GIVCOL, LDGCOL, + $ PERM, GIVNUM, C, S, WORK, IWORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER ICOMPQ, INFO, LDGCOL, LDU, N, SMLSIZ, SQRE +* .. +* .. Array Arguments .. + INTEGER GIVCOL( LDGCOL, * ), GIVPTR( * ), IWORK( * ), + $ K( * ), PERM( LDGCOL, * ) + REAL C( * ), D( * ), DIFL( LDU, * ), DIFR( LDU, * ), + $ E( * ), GIVNUM( LDU, * ), POLES( LDU, * ), + $ S( * ), U( LDU, * ), VT( LDU, * ), WORK( * ), + $ Z( LDU, * ) +* .. +* +* Purpose +* ======= +* +* Using a divide and conquer approach, SLASDA computes the singular +* value decomposition (SVD) of a real upper bidiagonal N-by-M matrix +* B with diagonal D and offdiagonal E, where M = N + SQRE. The +* algorithm computes the singular values in the SVD B = U * S * VT. +* The orthogonal matrices U and VT are optionally computed in +* compact form. +* +* A related subroutine, SLASD0, computes the singular values and +* the singular vectors in explicit form. +* +* Arguments +* ========= +* +* ICOMPQ (input) INTEGER +* Specifies whether singular vectors are to be computed +* in compact form, as follows +* = 0: Compute singular values only. +* = 1: Compute singular vectors of upper bidiagonal +* matrix in compact form. +* +* SMLSIZ (input) INTEGER +* The maximum size of the subproblems at the bottom of the +* computation tree. +* +* N (input) INTEGER +* The row dimension of the upper bidiagonal matrix. This is +* also the dimension of the main diagonal array D. +* +* SQRE (input) INTEGER +* Specifies the column dimension of the bidiagonal matrix. +* = 0: The bidiagonal matrix has column dimension M = N; +* = 1: The bidiagonal matrix has column dimension M = N + 1. +* +* D (input/output) REAL array, dimension ( N ) +* On entry D contains the main diagonal of the bidiagonal +* matrix. On exit D, if INFO = 0, contains its singular values. +* +* E (input) REAL array, dimension ( M-1 ) +* Contains the subdiagonal entries of the bidiagonal matrix. +* On exit, E has been destroyed. +* +* U (output) REAL array, +* dimension ( LDU, SMLSIZ ) if ICOMPQ = 1, and not referenced +* if ICOMPQ = 0. If ICOMPQ = 1, on exit, U contains the left +* singular vector matrices of all subproblems at the bottom +* level. +* +* LDU (input) INTEGER, LDU = > N. +* The leading dimension of arrays U, VT, DIFL, DIFR, POLES, +* GIVNUM, and Z. +* +* VT (output) REAL array, +* dimension ( LDU, SMLSIZ+1 ) if ICOMPQ = 1, and not referenced +* if ICOMPQ = 0. If ICOMPQ = 1, on exit, VT' contains the right +* singular vector matrices of all subproblems at the bottom +* level. +* +* K (output) INTEGER array, dimension ( N ) +* if ICOMPQ = 1 and dimension 1 if ICOMPQ = 0. +* If ICOMPQ = 1, on exit, K(I) is the dimension of the I-th +* secular equation on the computation tree. +* +* DIFL (output) REAL array, dimension ( LDU, NLVL ), +* where NLVL = floor(log_2 (N/SMLSIZ))). +* +* DIFR (output) REAL array, +* dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1 and +* dimension ( N ) if ICOMPQ = 0. +* If ICOMPQ = 1, on exit, DIFL(1:N, I) and DIFR(1:N, 2 * I - 1) +* record distances between singular values on the I-th +* level and singular values on the (I -1)-th level, and +* DIFR(1:N, 2 * I ) contains the normalizing factors for +* the right singular vector matrix. See SLASD8 for details. +* +* Z (output) REAL array, +* dimension ( LDU, NLVL ) if ICOMPQ = 1 and +* dimension ( N ) if ICOMPQ = 0. +* The first K elements of Z(1, I) contain the components of +* the deflation-adjusted updating row vector for subproblems +* on the I-th level. +* +* POLES (output) REAL array, +* dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1, and not referenced +* if ICOMPQ = 0. If ICOMPQ = 1, on exit, POLES(1, 2*I - 1) and +* POLES(1, 2*I) contain the new and old singular values +* involved in the secular equations on the I-th level. +* +* GIVPTR (output) INTEGER array, +* dimension ( N ) if ICOMPQ = 1, and not referenced if +* ICOMPQ = 0. If ICOMPQ = 1, on exit, GIVPTR( I ) records +* the number of Givens rotations performed on the I-th +* problem on the computation tree. +* +* GIVCOL (output) INTEGER array, +* dimension ( LDGCOL, 2 * NLVL ) if ICOMPQ = 1, and not +* referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I, +* GIVCOL(1, 2 *I - 1) and GIVCOL(1, 2 *I) record the locations +* of Givens rotations performed on the I-th level on the +* computation tree. +* +* LDGCOL (input) INTEGER, LDGCOL = > N. +* The leading dimension of arrays GIVCOL and PERM. +* +* PERM (output) INTEGER array, dimension ( LDGCOL, NLVL ) +* if ICOMPQ = 1, and not referenced +* if ICOMPQ = 0. If ICOMPQ = 1, on exit, PERM(1, I) records +* permutations done on the I-th level of the computation tree. +* +* GIVNUM (output) REAL array, +* dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1, and not +* referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I, +* GIVNUM(1, 2 *I - 1) and GIVNUM(1, 2 *I) record the C- and S- +* values of Givens rotations performed on the I-th level on +* the computation tree. +* +* C (output) REAL array, +* dimension ( N ) if ICOMPQ = 1, and dimension 1 if ICOMPQ = 0. +* If ICOMPQ = 1 and the I-th subproblem is not square, on exit, +* C( I ) contains the C-value of a Givens rotation related to +* the right null space of the I-th subproblem. +* +* S (output) REAL array, dimension ( N ) if +* ICOMPQ = 1, and dimension 1 if ICOMPQ = 0. If ICOMPQ = 1 +* and the I-th subproblem is not square, on exit, S( I ) +* contains the S-value of a Givens rotation related to +* the right null space of the I-th subproblem. +* +* WORK (workspace) REAL array, dimension +* (6 * N + (SMLSIZ + 1)*(SMLSIZ + 1)). +* +* IWORK (workspace) INTEGER array, dimension (7*N). +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = 1, an singular value did not converge +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Huan Ren, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, I1, IC, IDXQ, IDXQI, IM1, INODE, ITEMP, IWK, + $ J, LF, LL, LVL, LVL2, M, NCC, ND, NDB1, NDIML, + $ NDIMR, NL, NLF, NLP1, NLVL, NR, NRF, NRP1, NRU, + $ NWORK1, NWORK2, SMLSZP, SQREI, VF, VFI, VL, VLI + REAL ALPHA, BETA +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLASD6, SLASDQ, SLASDT, SLASET, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN + INFO = -1 + ELSE IF( SMLSIZ.LT.3 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN + INFO = -4 + ELSE IF( LDU.LT.( N+SQRE ) ) THEN + INFO = -8 + ELSE IF( LDGCOL.LT.N ) THEN + INFO = -17 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLASDA', -INFO ) + RETURN + END IF +* + M = N + SQRE +* +* If the input matrix is too small, call SLASDQ to find the SVD. +* + IF( N.LE.SMLSIZ ) THEN + IF( ICOMPQ.EQ.0 ) THEN + CALL SLASDQ( 'U', SQRE, N, 0, 0, 0, D, E, VT, LDU, U, LDU, + $ U, LDU, WORK, INFO ) + ELSE + CALL SLASDQ( 'U', SQRE, N, M, N, 0, D, E, VT, LDU, U, LDU, + $ U, LDU, WORK, INFO ) + END IF + RETURN + END IF +* +* Book-keeping and set up the computation tree. +* + INODE = 1 + NDIML = INODE + N + NDIMR = NDIML + N + IDXQ = NDIMR + N + IWK = IDXQ + N +* + NCC = 0 + NRU = 0 +* + SMLSZP = SMLSIZ + 1 + VF = 1 + VL = VF + M + NWORK1 = VL + M + NWORK2 = NWORK1 + SMLSZP*SMLSZP +* + CALL SLASDT( N, NLVL, ND, IWORK( INODE ), IWORK( NDIML ), + $ IWORK( NDIMR ), SMLSIZ ) +* +* for the nodes on bottom level of the tree, solve +* their subproblems by SLASDQ. +* + NDB1 = ( ND+1 ) / 2 + DO 30 I = NDB1, ND +* +* IC : center row of each node +* NL : number of rows of left subproblem +* NR : number of rows of right subproblem +* NLF: starting row of the left subproblem +* NRF: starting row of the right subproblem +* + I1 = I - 1 + IC = IWORK( INODE+I1 ) + NL = IWORK( NDIML+I1 ) + NLP1 = NL + 1 + NR = IWORK( NDIMR+I1 ) + NLF = IC - NL + NRF = IC + 1 + IDXQI = IDXQ + NLF - 2 + VFI = VF + NLF - 1 + VLI = VL + NLF - 1 + SQREI = 1 + IF( ICOMPQ.EQ.0 ) THEN + CALL SLASET( 'A', NLP1, NLP1, ZERO, ONE, WORK( NWORK1 ), + $ SMLSZP ) + CALL SLASDQ( 'U', SQREI, NL, NLP1, NRU, NCC, D( NLF ), + $ E( NLF ), WORK( NWORK1 ), SMLSZP, + $ WORK( NWORK2 ), NL, WORK( NWORK2 ), NL, + $ WORK( NWORK2 ), INFO ) + ITEMP = NWORK1 + NL*SMLSZP + CALL SCOPY( NLP1, WORK( NWORK1 ), 1, WORK( VFI ), 1 ) + CALL SCOPY( NLP1, WORK( ITEMP ), 1, WORK( VLI ), 1 ) + ELSE + CALL SLASET( 'A', NL, NL, ZERO, ONE, U( NLF, 1 ), LDU ) + CALL SLASET( 'A', NLP1, NLP1, ZERO, ONE, VT( NLF, 1 ), LDU ) + CALL SLASDQ( 'U', SQREI, NL, NLP1, NL, NCC, D( NLF ), + $ E( NLF ), VT( NLF, 1 ), LDU, U( NLF, 1 ), LDU, + $ U( NLF, 1 ), LDU, WORK( NWORK1 ), INFO ) + CALL SCOPY( NLP1, VT( NLF, 1 ), 1, WORK( VFI ), 1 ) + CALL SCOPY( NLP1, VT( NLF, NLP1 ), 1, WORK( VLI ), 1 ) + END IF + IF( INFO.NE.0 ) THEN + RETURN + END IF + DO 10 J = 1, NL + IWORK( IDXQI+J ) = J + 10 CONTINUE + IF( ( I.EQ.ND ) .AND. ( SQRE.EQ.0 ) ) THEN + SQREI = 0 + ELSE + SQREI = 1 + END IF + IDXQI = IDXQI + NLP1 + VFI = VFI + NLP1 + VLI = VLI + NLP1 + NRP1 = NR + SQREI + IF( ICOMPQ.EQ.0 ) THEN + CALL SLASET( 'A', NRP1, NRP1, ZERO, ONE, WORK( NWORK1 ), + $ SMLSZP ) + CALL SLASDQ( 'U', SQREI, NR, NRP1, NRU, NCC, D( NRF ), + $ E( NRF ), WORK( NWORK1 ), SMLSZP, + $ WORK( NWORK2 ), NR, WORK( NWORK2 ), NR, + $ WORK( NWORK2 ), INFO ) + ITEMP = NWORK1 + ( NRP1-1 )*SMLSZP + CALL SCOPY( NRP1, WORK( NWORK1 ), 1, WORK( VFI ), 1 ) + CALL SCOPY( NRP1, WORK( ITEMP ), 1, WORK( VLI ), 1 ) + ELSE + CALL SLASET( 'A', NR, NR, ZERO, ONE, U( NRF, 1 ), LDU ) + CALL SLASET( 'A', NRP1, NRP1, ZERO, ONE, VT( NRF, 1 ), LDU ) + CALL SLASDQ( 'U', SQREI, NR, NRP1, NR, NCC, D( NRF ), + $ E( NRF ), VT( NRF, 1 ), LDU, U( NRF, 1 ), LDU, + $ U( NRF, 1 ), LDU, WORK( NWORK1 ), INFO ) + CALL SCOPY( NRP1, VT( NRF, 1 ), 1, WORK( VFI ), 1 ) + CALL SCOPY( NRP1, VT( NRF, NRP1 ), 1, WORK( VLI ), 1 ) + END IF + IF( INFO.NE.0 ) THEN + RETURN + END IF + DO 20 J = 1, NR + IWORK( IDXQI+J ) = J + 20 CONTINUE + 30 CONTINUE +* +* Now conquer each subproblem bottom-up. +* + J = 2**NLVL + DO 50 LVL = NLVL, 1, -1 + LVL2 = LVL*2 - 1 +* +* Find the first node LF and last node LL on +* the current level LVL. +* + IF( LVL.EQ.1 ) THEN + LF = 1 + LL = 1 + ELSE + LF = 2**( LVL-1 ) + LL = 2*LF - 1 + END IF + DO 40 I = LF, LL + IM1 = I - 1 + IC = IWORK( INODE+IM1 ) + NL = IWORK( NDIML+IM1 ) + NR = IWORK( NDIMR+IM1 ) + NLF = IC - NL + NRF = IC + 1 + IF( I.EQ.LL ) THEN + SQREI = SQRE + ELSE + SQREI = 1 + END IF + VFI = VF + NLF - 1 + VLI = VL + NLF - 1 + IDXQI = IDXQ + NLF - 1 + ALPHA = D( IC ) + BETA = E( IC ) + IF( ICOMPQ.EQ.0 ) THEN + CALL SLASD6( ICOMPQ, NL, NR, SQREI, D( NLF ), + $ WORK( VFI ), WORK( VLI ), ALPHA, BETA, + $ IWORK( IDXQI ), PERM, GIVPTR( 1 ), GIVCOL, + $ LDGCOL, GIVNUM, LDU, POLES, DIFL, DIFR, Z, + $ K( 1 ), C( 1 ), S( 1 ), WORK( NWORK1 ), + $ IWORK( IWK ), INFO ) + ELSE + J = J - 1 + CALL SLASD6( ICOMPQ, NL, NR, SQREI, D( NLF ), + $ WORK( VFI ), WORK( VLI ), ALPHA, BETA, + $ IWORK( IDXQI ), PERM( NLF, LVL ), + $ GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL, + $ GIVNUM( NLF, LVL2 ), LDU, + $ POLES( NLF, LVL2 ), DIFL( NLF, LVL ), + $ DIFR( NLF, LVL2 ), Z( NLF, LVL ), K( J ), + $ C( J ), S( J ), WORK( NWORK1 ), + $ IWORK( IWK ), INFO ) + END IF + IF( INFO.NE.0 ) THEN + RETURN + END IF + 40 CONTINUE + 50 CONTINUE +* + RETURN +* +* End of SLASDA +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slasdq.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slasdq.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,316 @@ + SUBROUTINE SLASDQ( UPLO, SQRE, N, NCVT, NRU, NCC, D, E, VT, LDVT, + $ U, LDU, C, LDC, WORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDC, LDU, LDVT, N, NCC, NCVT, NRU, SQRE +* .. +* .. Array Arguments .. + REAL C( LDC, * ), D( * ), E( * ), U( LDU, * ), + $ VT( LDVT, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SLASDQ computes the singular value decomposition (SVD) of a real +* (upper or lower) bidiagonal matrix with diagonal D and offdiagonal +* E, accumulating the transformations if desired. Letting B denote +* the input bidiagonal matrix, the algorithm computes orthogonal +* matrices Q and P such that B = Q * S * P' (P' denotes the transpose +* of P). The singular values S are overwritten on D. +* +* The input matrix U is changed to U * Q if desired. +* The input matrix VT is changed to P' * VT if desired. +* The input matrix C is changed to Q' * C if desired. +* +* See "Computing Small Singular Values of Bidiagonal Matrices With +* Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, +* LAPACK Working Note #3, for a detailed description of the algorithm. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* On entry, UPLO specifies whether the input bidiagonal matrix +* is upper or lower bidiagonal, and wether it is square are +* not. +* UPLO = 'U' or 'u' B is upper bidiagonal. +* UPLO = 'L' or 'l' B is lower bidiagonal. +* +* SQRE (input) INTEGER +* = 0: then the input matrix is N-by-N. +* = 1: then the input matrix is N-by-(N+1) if UPLU = 'U' and +* (N+1)-by-N if UPLU = 'L'. +* +* The bidiagonal matrix has +* N = NL + NR + 1 rows and +* M = N + SQRE >= N columns. +* +* N (input) INTEGER +* On entry, N specifies the number of rows and columns +* in the matrix. N must be at least 0. +* +* NCVT (input) INTEGER +* On entry, NCVT specifies the number of columns of +* the matrix VT. NCVT must be at least 0. +* +* NRU (input) INTEGER +* On entry, NRU specifies the number of rows of +* the matrix U. NRU must be at least 0. +* +* NCC (input) INTEGER +* On entry, NCC specifies the number of columns of +* the matrix C. NCC must be at least 0. +* +* D (input/output) REAL array, dimension (N) +* On entry, D contains the diagonal entries of the +* bidiagonal matrix whose SVD is desired. On normal exit, +* D contains the singular values in ascending order. +* +* E (input/output) REAL array. +* dimension is (N-1) if SQRE = 0 and N if SQRE = 1. +* On entry, the entries of E contain the offdiagonal entries +* of the bidiagonal matrix whose SVD is desired. On normal +* exit, E will contain 0. If the algorithm does not converge, +* D and E will contain the diagonal and superdiagonal entries +* of a bidiagonal matrix orthogonally equivalent to the one +* given as input. +* +* VT (input/output) REAL array, dimension (LDVT, NCVT) +* On entry, contains a matrix which on exit has been +* premultiplied by P', dimension N-by-NCVT if SQRE = 0 +* and (N+1)-by-NCVT if SQRE = 1 (not referenced if NCVT=0). +* +* LDVT (input) INTEGER +* On entry, LDVT specifies the leading dimension of VT as +* declared in the calling (sub) program. LDVT must be at +* least 1. If NCVT is nonzero LDVT must also be at least N. +* +* U (input/output) REAL array, dimension (LDU, N) +* On entry, contains a matrix which on exit has been +* postmultiplied by Q, dimension NRU-by-N if SQRE = 0 +* and NRU-by-(N+1) if SQRE = 1 (not referenced if NRU=0). +* +* LDU (input) INTEGER +* On entry, LDU specifies the leading dimension of U as +* declared in the calling (sub) program. LDU must be at +* least max( 1, NRU ) . +* +* C (input/output) REAL array, dimension (LDC, NCC) +* On entry, contains an N-by-NCC matrix which on exit +* has been premultiplied by Q' dimension N-by-NCC if SQRE = 0 +* and (N+1)-by-NCC if SQRE = 1 (not referenced if NCC=0). +* +* LDC (input) INTEGER +* On entry, LDC specifies the leading dimension of C as +* declared in the calling (sub) program. LDC must be at +* least 1. If NCC is nonzero, LDC must also be at least N. +* +* WORK (workspace) REAL array, dimension (4*N) +* Workspace. Only referenced if one of NCVT, NRU, or NCC is +* nonzero, and if N is at least 2. +* +* INFO (output) INTEGER +* On exit, a value of 0 indicates a successful exit. +* If INFO < 0, argument number -INFO is illegal. +* If INFO > 0, the algorithm did not converge, and INFO +* specifies how many superdiagonals did not converge. +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Huan Ren, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL ROTATE + INTEGER I, ISUB, IUPLO, J, NP1, SQRE1 + REAL CS, R, SMIN, SN +* .. +* .. External Subroutines .. + EXTERNAL SBDSQR, SLARTG, SLASR, SSWAP, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IUPLO = 0 + IF( LSAME( UPLO, 'U' ) ) + $ IUPLO = 1 + IF( LSAME( UPLO, 'L' ) ) + $ IUPLO = 2 + IF( IUPLO.EQ.0 ) THEN + INFO = -1 + ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NCVT.LT.0 ) THEN + INFO = -4 + ELSE IF( NRU.LT.0 ) THEN + INFO = -5 + ELSE IF( NCC.LT.0 ) THEN + INFO = -6 + ELSE IF( ( NCVT.EQ.0 .AND. LDVT.LT.1 ) .OR. + $ ( NCVT.GT.0 .AND. LDVT.LT.MAX( 1, N ) ) ) THEN + INFO = -10 + ELSE IF( LDU.LT.MAX( 1, NRU ) ) THEN + INFO = -12 + ELSE IF( ( NCC.EQ.0 .AND. LDC.LT.1 ) .OR. + $ ( NCC.GT.0 .AND. LDC.LT.MAX( 1, N ) ) ) THEN + INFO = -14 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLASDQ', -INFO ) + RETURN + END IF + IF( N.EQ.0 ) + $ RETURN +* +* ROTATE is true if any singular vectors desired, false otherwise +* + ROTATE = ( NCVT.GT.0 ) .OR. ( NRU.GT.0 ) .OR. ( NCC.GT.0 ) + NP1 = N + 1 + SQRE1 = SQRE +* +* If matrix non-square upper bidiagonal, rotate to be lower +* bidiagonal. The rotations are on the right. +* + IF( ( IUPLO.EQ.1 ) .AND. ( SQRE1.EQ.1 ) ) THEN + DO 10 I = 1, N - 1 + CALL SLARTG( D( I ), E( I ), CS, SN, R ) + D( I ) = R + E( I ) = SN*D( I+1 ) + D( I+1 ) = CS*D( I+1 ) + IF( ROTATE ) THEN + WORK( I ) = CS + WORK( N+I ) = SN + END IF + 10 CONTINUE + CALL SLARTG( D( N ), E( N ), CS, SN, R ) + D( N ) = R + E( N ) = ZERO + IF( ROTATE ) THEN + WORK( N ) = CS + WORK( N+N ) = SN + END IF + IUPLO = 2 + SQRE1 = 0 +* +* Update singular vectors if desired. +* + IF( NCVT.GT.0 ) + $ CALL SLASR( 'L', 'V', 'F', NP1, NCVT, WORK( 1 ), + $ WORK( NP1 ), VT, LDVT ) + END IF +* +* If matrix lower bidiagonal, rotate to be upper bidiagonal +* by applying Givens rotations on the left. +* + IF( IUPLO.EQ.2 ) THEN + DO 20 I = 1, N - 1 + CALL SLARTG( D( I ), E( I ), CS, SN, R ) + D( I ) = R + E( I ) = SN*D( I+1 ) + D( I+1 ) = CS*D( I+1 ) + IF( ROTATE ) THEN + WORK( I ) = CS + WORK( N+I ) = SN + END IF + 20 CONTINUE +* +* If matrix (N+1)-by-N lower bidiagonal, one additional +* rotation is needed. +* + IF( SQRE1.EQ.1 ) THEN + CALL SLARTG( D( N ), E( N ), CS, SN, R ) + D( N ) = R + IF( ROTATE ) THEN + WORK( N ) = CS + WORK( N+N ) = SN + END IF + END IF +* +* Update singular vectors if desired. +* + IF( NRU.GT.0 ) THEN + IF( SQRE1.EQ.0 ) THEN + CALL SLASR( 'R', 'V', 'F', NRU, N, WORK( 1 ), + $ WORK( NP1 ), U, LDU ) + ELSE + CALL SLASR( 'R', 'V', 'F', NRU, NP1, WORK( 1 ), + $ WORK( NP1 ), U, LDU ) + END IF + END IF + IF( NCC.GT.0 ) THEN + IF( SQRE1.EQ.0 ) THEN + CALL SLASR( 'L', 'V', 'F', N, NCC, WORK( 1 ), + $ WORK( NP1 ), C, LDC ) + ELSE + CALL SLASR( 'L', 'V', 'F', NP1, NCC, WORK( 1 ), + $ WORK( NP1 ), C, LDC ) + END IF + END IF + END IF +* +* Call SBDSQR to compute the SVD of the reduced real +* N-by-N upper bidiagonal matrix. +* + CALL SBDSQR( 'U', N, NCVT, NRU, NCC, D, E, VT, LDVT, U, LDU, C, + $ LDC, WORK, INFO ) +* +* Sort the singular values into ascending order (insertion sort on +* singular values, but only one transposition per singular vector) +* + DO 40 I = 1, N +* +* Scan for smallest D(I). +* + ISUB = I + SMIN = D( I ) + DO 30 J = I + 1, N + IF( D( J ).LT.SMIN ) THEN + ISUB = J + SMIN = D( J ) + END IF + 30 CONTINUE + IF( ISUB.NE.I ) THEN +* +* Swap singular values and vectors. +* + D( ISUB ) = D( I ) + D( I ) = SMIN + IF( NCVT.GT.0 ) + $ CALL SSWAP( NCVT, VT( ISUB, 1 ), LDVT, VT( I, 1 ), LDVT ) + IF( NRU.GT.0 ) + $ CALL SSWAP( NRU, U( 1, ISUB ), 1, U( 1, I ), 1 ) + IF( NCC.GT.0 ) + $ CALL SSWAP( NCC, C( ISUB, 1 ), LDC, C( I, 1 ), LDC ) + END IF + 40 CONTINUE +* + RETURN +* +* End of SLASDQ +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slasdt.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slasdt.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,105 @@ + SUBROUTINE SLASDT( N, LVL, ND, INODE, NDIML, NDIMR, MSUB ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER LVL, MSUB, N, ND +* .. +* .. Array Arguments .. + INTEGER INODE( * ), NDIML( * ), NDIMR( * ) +* .. +* +* Purpose +* ======= +* +* SLASDT creates a tree of subproblems for bidiagonal divide and +* conquer. +* +* Arguments +* ========= +* +* N (input) INTEGER +* On entry, the number of diagonal elements of the +* bidiagonal matrix. +* +* LVL (output) INTEGER +* On exit, the number of levels on the computation tree. +* +* ND (output) INTEGER +* On exit, the number of nodes on the tree. +* +* INODE (output) INTEGER array, dimension ( N ) +* On exit, centers of subproblems. +* +* NDIML (output) INTEGER array, dimension ( N ) +* On exit, row dimensions of left children. +* +* NDIMR (output) INTEGER array, dimension ( N ) +* On exit, row dimensions of right children. +* +* MSUB (input) INTEGER. +* On entry, the maximum row dimension each subproblem at the +* bottom of the tree can be of. +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Huan Ren, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL TWO + PARAMETER ( TWO = 2.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IL, IR, LLST, MAXN, NCRNT, NLVL + REAL TEMP +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, LOG, MAX, REAL +* .. +* .. Executable Statements .. +* +* Find the number of levels on the tree. +* + MAXN = MAX( 1, N ) + TEMP = LOG( REAL( MAXN ) / REAL( MSUB+1 ) ) / LOG( TWO ) + LVL = INT( TEMP ) + 1 +* + I = N / 2 + INODE( 1 ) = I + 1 + NDIML( 1 ) = I + NDIMR( 1 ) = N - I - 1 + IL = 0 + IR = 1 + LLST = 1 + DO 20 NLVL = 1, LVL - 1 +* +* Constructing the tree at (NLVL+1)-st level. The number of +* nodes created on this level is LLST * 2. +* + DO 10 I = 0, LLST - 1 + IL = IL + 2 + IR = IR + 2 + NCRNT = LLST + I + NDIML( IL ) = NDIML( NCRNT ) / 2 + NDIMR( IL ) = NDIML( NCRNT ) - NDIML( IL ) - 1 + INODE( IL ) = INODE( NCRNT ) - NDIMR( IL ) - 1 + NDIML( IR ) = NDIMR( NCRNT ) / 2 + NDIMR( IR ) = NDIMR( NCRNT ) - NDIML( IR ) - 1 + INODE( IR ) = INODE( NCRNT ) + NDIML( IR ) + 1 + 10 CONTINUE + LLST = LLST*2 + 20 CONTINUE + ND = LLST*2 - 1 +* + RETURN +* +* End of SLASDT +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slaset.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slaset.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,114 @@ + SUBROUTINE SLASET( UPLO, M, N, ALPHA, BETA, A, LDA ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER LDA, M, N + REAL ALPHA, BETA +* .. +* .. Array Arguments .. + REAL A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* SLASET initializes an m-by-n matrix A to BETA on the diagonal and +* ALPHA on the offdiagonals. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies the part of the matrix A to be set. +* = 'U': Upper triangular part is set; the strictly lower +* triangular part of A is not changed. +* = 'L': Lower triangular part is set; the strictly upper +* triangular part of A is not changed. +* Otherwise: All of the matrix A is set. +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* ALPHA (input) REAL +* The constant to which the offdiagonal elements are to be set. +* +* BETA (input) REAL +* The constant to which the diagonal elements are to be set. +* +* A (input/output) REAL array, dimension (LDA,N) +* On exit, the leading m-by-n submatrix of A is set as follows: +* +* if UPLO = 'U', A(i,j) = ALPHA, 1<=i<=j-1, 1<=j<=n, +* if UPLO = 'L', A(i,j) = ALPHA, j+1<=i<=m, 1<=j<=n, +* otherwise, A(i,j) = ALPHA, 1<=i<=m, 1<=j<=n, i.ne.j, +* +* and, for all UPLO, A(i,i) = BETA, 1<=i<=min(m,n). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Set the strictly upper triangular or trapezoidal part of the +* array to ALPHA. +* + DO 20 J = 2, N + DO 10 I = 1, MIN( J-1, M ) + A( I, J ) = ALPHA + 10 CONTINUE + 20 CONTINUE +* + ELSE IF( LSAME( UPLO, 'L' ) ) THEN +* +* Set the strictly lower triangular or trapezoidal part of the +* array to ALPHA. +* + DO 40 J = 1, MIN( M, N ) + DO 30 I = J + 1, M + A( I, J ) = ALPHA + 30 CONTINUE + 40 CONTINUE +* + ELSE +* +* Set the leading m-by-n submatrix to ALPHA. +* + DO 60 J = 1, N + DO 50 I = 1, M + A( I, J ) = ALPHA + 50 CONTINUE + 60 CONTINUE + END IF +* +* Set the first min(M,N) diagonal elements to BETA. +* + DO 70 I = 1, MIN( M, N ) + A( I, I ) = BETA + 70 CONTINUE +* + RETURN +* +* End of SLASET +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slasq1.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slasq1.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,148 @@ + SUBROUTINE SLASQ1( N, D, E, WORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, N +* .. +* .. Array Arguments .. + REAL D( * ), E( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SLASQ1 computes the singular values of a real N-by-N bidiagonal +* matrix with diagonal D and off-diagonal E. The singular values +* are computed to high relative accuracy, in the absence of +* denormalization, underflow and overflow. The algorithm was first +* presented in +* +* "Accurate singular values and differential qd algorithms" by K. V. +* Fernando and B. N. Parlett, Numer. Math., Vol-67, No. 2, pp. 191-230, +* 1994, +* +* and the present implementation is described in "An implementation of +* the dqds Algorithm (Positive Case)", LAPACK Working Note. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of rows and columns in the matrix. N >= 0. +* +* D (input/output) REAL array, dimension (N) +* On entry, D contains the diagonal elements of the +* bidiagonal matrix whose SVD is desired. On normal exit, +* D contains the singular values in decreasing order. +* +* E (input/output) REAL array, dimension (N) +* On entry, elements E(1:N-1) contain the off-diagonal elements +* of the bidiagonal matrix whose SVD is desired. +* On exit, E is overwritten. +* +* WORK (workspace) REAL array, dimension (4*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: the algorithm failed +* = 1, a split was marked by a positive value in E +* = 2, current block of Z not diagonalized after 30*N +* iterations (in inner while loop) +* = 3, termination criterion of outer while loop not met +* (program created more than N unreduced blocks) +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E0 ) +* .. +* .. Local Scalars .. + INTEGER I, IINFO + REAL EPS, SCALE, SAFMIN, SIGMN, SIGMX +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLAS2, SLASCL, SLASQ2, SLASRT, XERBLA +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -2 + CALL XERBLA( 'SLASQ1', -INFO ) + RETURN + ELSE IF( N.EQ.0 ) THEN + RETURN + ELSE IF( N.EQ.1 ) THEN + D( 1 ) = ABS( D( 1 ) ) + RETURN + ELSE IF( N.EQ.2 ) THEN + CALL SLAS2( D( 1 ), E( 1 ), D( 2 ), SIGMN, SIGMX ) + D( 1 ) = SIGMX + D( 2 ) = SIGMN + RETURN + END IF +* +* Estimate the largest singular value. +* + SIGMX = ZERO + DO 10 I = 1, N - 1 + D( I ) = ABS( D( I ) ) + SIGMX = MAX( SIGMX, ABS( E( I ) ) ) + 10 CONTINUE + D( N ) = ABS( D( N ) ) +* +* Early return if SIGMX is zero (matrix is already diagonal). +* + IF( SIGMX.EQ.ZERO ) THEN + CALL SLASRT( 'D', N, D, IINFO ) + RETURN + END IF +* + DO 20 I = 1, N + SIGMX = MAX( SIGMX, D( I ) ) + 20 CONTINUE +* +* Copy D and E into WORK (in the Z format) and scale (squaring the +* input data makes scaling by a power of the radix pointless). +* + EPS = SLAMCH( 'Precision' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SCALE = SQRT( EPS / SAFMIN ) + CALL SCOPY( N, D, 1, WORK( 1 ), 2 ) + CALL SCOPY( N-1, E, 1, WORK( 2 ), 2 ) + CALL SLASCL( 'G', 0, 0, SIGMX, SCALE, 2*N-1, 1, WORK, 2*N-1, + $ IINFO ) +* +* Compute the q's and e's. +* + DO 30 I = 1, 2*N - 1 + WORK( I ) = WORK( I )**2 + 30 CONTINUE + WORK( 2*N ) = ZERO +* + CALL SLASQ2( N, WORK, INFO ) +* + IF( INFO.EQ.0 ) THEN + DO 40 I = 1, N + D( I ) = SQRT( WORK( I ) ) + 40 CONTINUE + CALL SLASCL( 'G', 0, 0, SCALE, SIGMX, N, 1, D, N, IINFO ) + END IF +* + RETURN +* +* End of SLASQ1 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slasq2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slasq2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,448 @@ + SUBROUTINE SLASQ2( N, Z, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* Modified to call SLAZQ3 in place of SLASQ3, 13 Feb 03, SJH. +* +* .. Scalar Arguments .. + INTEGER INFO, N +* .. +* .. Array Arguments .. + REAL Z( * ) +* .. +* +* Purpose +* ======= +* +* SLASQ2 computes all the eigenvalues of the symmetric positive +* definite tridiagonal matrix associated with the qd array Z to high +* relative accuracy are computed to high relative accuracy, in the +* absence of denormalization, underflow and overflow. +* +* To see the relation of Z to the tridiagonal matrix, let L be a +* unit lower bidiagonal matrix with subdiagonals Z(2,4,6,,..) and +* let U be an upper bidiagonal matrix with 1's above and diagonal +* Z(1,3,5,,..). The tridiagonal is L*U or, if you prefer, the +* symmetric tridiagonal to which it is similar. +* +* Note : SLASQ2 defines a logical variable, IEEE, which is true +* on machines which follow ieee-754 floating-point standard in their +* handling of infinities and NaNs, and false otherwise. This variable +* is passed to SLAZQ3. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of rows and columns in the matrix. N >= 0. +* +* Z (workspace) REAL array, dimension (4*N) +* On entry Z holds the qd array. On exit, entries 1 to N hold +* the eigenvalues in decreasing order, Z( 2*N+1 ) holds the +* trace, and Z( 2*N+2 ) holds the sum of the eigenvalues. If +* N > 2, then Z( 2*N+3 ) holds the iteration count, Z( 2*N+4 ) +* holds NDIVS/NIN^2, and Z( 2*N+5 ) holds the percentage of +* shifts that failed. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if the i-th argument is a scalar and had an illegal +* value, then INFO = -i, if the i-th argument is an +* array and the j-entry had an illegal value, then +* INFO = -(i*100+j) +* > 0: the algorithm failed +* = 1, a split was marked by a positive value in E +* = 2, current block of Z not diagonalized after 30*N +* iterations (in inner while loop) +* = 3, termination criterion of outer while loop not met +* (program created more than N unreduced blocks) +* +* Further Details +* =============== +* Local Variables: I0:N0 defines a current unreduced segment of Z. +* The shifts are accumulated in SIGMA. Iteration count is in ITER. +* Ping-pong is controlled by PP (alternates between 0 and 1). +* +* ===================================================================== +* +* .. Parameters .. + REAL CBIAS + PARAMETER ( CBIAS = 1.50E0 ) + REAL ZERO, HALF, ONE, TWO, FOUR, HUNDRD + PARAMETER ( ZERO = 0.0E0, HALF = 0.5E0, ONE = 1.0E0, + $ TWO = 2.0E0, FOUR = 4.0E0, HUNDRD = 100.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL IEEE + INTEGER I0, I4, IINFO, IPN4, ITER, IWHILA, IWHILB, K, + $ N0, NBIG, NDIV, NFAIL, PP, SPLT, TTYPE + REAL D, DESIG, DMIN, DMIN1, DMIN2, DN, DN1, DN2, E, + $ EMAX, EMIN, EPS, OLDEMN, QMAX, QMIN, S, SAFMIN, + $ SIGMA, T, TAU, TEMP, TOL, TOL2, TRACE, ZMAX +* .. +* .. External Subroutines .. + EXTERNAL SLAZQ3, SLASRT, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + REAL SLAMCH + EXTERNAL ILAENV, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, REAL, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* (in case SLASQ2 is not called by SLASQ1) +* + INFO = 0 + EPS = SLAMCH( 'Precision' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + TOL = EPS*HUNDRD + TOL2 = TOL**2 +* + IF( N.LT.0 ) THEN + INFO = -1 + CALL XERBLA( 'SLASQ2', 1 ) + RETURN + ELSE IF( N.EQ.0 ) THEN + RETURN + ELSE IF( N.EQ.1 ) THEN +* +* 1-by-1 case. +* + IF( Z( 1 ).LT.ZERO ) THEN + INFO = -201 + CALL XERBLA( 'SLASQ2', 2 ) + END IF + RETURN + ELSE IF( N.EQ.2 ) THEN +* +* 2-by-2 case. +* + IF( Z( 2 ).LT.ZERO .OR. Z( 3 ).LT.ZERO ) THEN + INFO = -2 + CALL XERBLA( 'SLASQ2', 2 ) + RETURN + ELSE IF( Z( 3 ).GT.Z( 1 ) ) THEN + D = Z( 3 ) + Z( 3 ) = Z( 1 ) + Z( 1 ) = D + END IF + Z( 5 ) = Z( 1 ) + Z( 2 ) + Z( 3 ) + IF( Z( 2 ).GT.Z( 3 )*TOL2 ) THEN + T = HALF*( ( Z( 1 )-Z( 3 ) )+Z( 2 ) ) + S = Z( 3 )*( Z( 2 ) / T ) + IF( S.LE.T ) THEN + S = Z( 3 )*( Z( 2 ) / ( T*( ONE+SQRT( ONE+S / T ) ) ) ) + ELSE + S = Z( 3 )*( Z( 2 ) / ( T+SQRT( T )*SQRT( T+S ) ) ) + END IF + T = Z( 1 ) + ( S+Z( 2 ) ) + Z( 3 ) = Z( 3 )*( Z( 1 ) / T ) + Z( 1 ) = T + END IF + Z( 2 ) = Z( 3 ) + Z( 6 ) = Z( 2 ) + Z( 1 ) + RETURN + END IF +* +* Check for negative data and compute sums of q's and e's. +* + Z( 2*N ) = ZERO + EMIN = Z( 2 ) + QMAX = ZERO + ZMAX = ZERO + D = ZERO + E = ZERO +* + DO 10 K = 1, 2*( N-1 ), 2 + IF( Z( K ).LT.ZERO ) THEN + INFO = -( 200+K ) + CALL XERBLA( 'SLASQ2', 2 ) + RETURN + ELSE IF( Z( K+1 ).LT.ZERO ) THEN + INFO = -( 200+K+1 ) + CALL XERBLA( 'SLASQ2', 2 ) + RETURN + END IF + D = D + Z( K ) + E = E + Z( K+1 ) + QMAX = MAX( QMAX, Z( K ) ) + EMIN = MIN( EMIN, Z( K+1 ) ) + ZMAX = MAX( QMAX, ZMAX, Z( K+1 ) ) + 10 CONTINUE + IF( Z( 2*N-1 ).LT.ZERO ) THEN + INFO = -( 200+2*N-1 ) + CALL XERBLA( 'SLASQ2', 2 ) + RETURN + END IF + D = D + Z( 2*N-1 ) + QMAX = MAX( QMAX, Z( 2*N-1 ) ) + ZMAX = MAX( QMAX, ZMAX ) +* +* Check for diagonality. +* + IF( E.EQ.ZERO ) THEN + DO 20 K = 2, N + Z( K ) = Z( 2*K-1 ) + 20 CONTINUE + CALL SLASRT( 'D', N, Z, IINFO ) + Z( 2*N-1 ) = D + RETURN + END IF +* + TRACE = D + E +* +* Check for zero data. +* + IF( TRACE.EQ.ZERO ) THEN + Z( 2*N-1 ) = ZERO + RETURN + END IF +* +* Check whether the machine is IEEE conformable. +* + IEEE = ILAENV( 10, 'SLASQ2', 'N', 1, 2, 3, 4 ).EQ.1 .AND. + $ ILAENV( 11, 'SLASQ2', 'N', 1, 2, 3, 4 ).EQ.1 +* +* Rearrange data for locality: Z=(q1,qq1,e1,ee1,q2,qq2,e2,ee2,...). +* + DO 30 K = 2*N, 2, -2 + Z( 2*K ) = ZERO + Z( 2*K-1 ) = Z( K ) + Z( 2*K-2 ) = ZERO + Z( 2*K-3 ) = Z( K-1 ) + 30 CONTINUE +* + I0 = 1 + N0 = N +* +* Reverse the qd-array, if warranted. +* + IF( CBIAS*Z( 4*I0-3 ).LT.Z( 4*N0-3 ) ) THEN + IPN4 = 4*( I0+N0 ) + DO 40 I4 = 4*I0, 2*( I0+N0-1 ), 4 + TEMP = Z( I4-3 ) + Z( I4-3 ) = Z( IPN4-I4-3 ) + Z( IPN4-I4-3 ) = TEMP + TEMP = Z( I4-1 ) + Z( I4-1 ) = Z( IPN4-I4-5 ) + Z( IPN4-I4-5 ) = TEMP + 40 CONTINUE + END IF +* +* Initial split checking via dqd and Li's test. +* + PP = 0 +* + DO 80 K = 1, 2 +* + D = Z( 4*N0+PP-3 ) + DO 50 I4 = 4*( N0-1 ) + PP, 4*I0 + PP, -4 + IF( Z( I4-1 ).LE.TOL2*D ) THEN + Z( I4-1 ) = -ZERO + D = Z( I4-3 ) + ELSE + D = Z( I4-3 )*( D / ( D+Z( I4-1 ) ) ) + END IF + 50 CONTINUE +* +* dqd maps Z to ZZ plus Li's test. +* + EMIN = Z( 4*I0+PP+1 ) + D = Z( 4*I0+PP-3 ) + DO 60 I4 = 4*I0 + PP, 4*( N0-1 ) + PP, 4 + Z( I4-2*PP-2 ) = D + Z( I4-1 ) + IF( Z( I4-1 ).LE.TOL2*D ) THEN + Z( I4-1 ) = -ZERO + Z( I4-2*PP-2 ) = D + Z( I4-2*PP ) = ZERO + D = Z( I4+1 ) + ELSE IF( SAFMIN*Z( I4+1 ).LT.Z( I4-2*PP-2 ) .AND. + $ SAFMIN*Z( I4-2*PP-2 ).LT.Z( I4+1 ) ) THEN + TEMP = Z( I4+1 ) / Z( I4-2*PP-2 ) + Z( I4-2*PP ) = Z( I4-1 )*TEMP + D = D*TEMP + ELSE + Z( I4-2*PP ) = Z( I4+1 )*( Z( I4-1 ) / Z( I4-2*PP-2 ) ) + D = Z( I4+1 )*( D / Z( I4-2*PP-2 ) ) + END IF + EMIN = MIN( EMIN, Z( I4-2*PP ) ) + 60 CONTINUE + Z( 4*N0-PP-2 ) = D +* +* Now find qmax. +* + QMAX = Z( 4*I0-PP-2 ) + DO 70 I4 = 4*I0 - PP + 2, 4*N0 - PP - 2, 4 + QMAX = MAX( QMAX, Z( I4 ) ) + 70 CONTINUE +* +* Prepare for the next iteration on K. +* + PP = 1 - PP + 80 CONTINUE +* +* Initialise variables to pass to SLAZQ3 +* + TTYPE = 0 + DMIN1 = ZERO + DMIN2 = ZERO + DN = ZERO + DN1 = ZERO + DN2 = ZERO + TAU = ZERO +* + ITER = 2 + NFAIL = 0 + NDIV = 2*( N0-I0 ) +* + DO 140 IWHILA = 1, N + 1 + IF( N0.LT.1 ) + $ GO TO 150 +* +* While array unfinished do +* +* E(N0) holds the value of SIGMA when submatrix in I0:N0 +* splits from the rest of the array, but is negated. +* + DESIG = ZERO + IF( N0.EQ.N ) THEN + SIGMA = ZERO + ELSE + SIGMA = -Z( 4*N0-1 ) + END IF + IF( SIGMA.LT.ZERO ) THEN + INFO = 1 + RETURN + END IF +* +* Find last unreduced submatrix's top index I0, find QMAX and +* EMIN. Find Gershgorin-type bound if Q's much greater than E's. +* + EMAX = ZERO + IF( N0.GT.I0 ) THEN + EMIN = ABS( Z( 4*N0-5 ) ) + ELSE + EMIN = ZERO + END IF + QMIN = Z( 4*N0-3 ) + QMAX = QMIN + DO 90 I4 = 4*N0, 8, -4 + IF( Z( I4-5 ).LE.ZERO ) + $ GO TO 100 + IF( QMIN.GE.FOUR*EMAX ) THEN + QMIN = MIN( QMIN, Z( I4-3 ) ) + EMAX = MAX( EMAX, Z( I4-5 ) ) + END IF + QMAX = MAX( QMAX, Z( I4-7 )+Z( I4-5 ) ) + EMIN = MIN( EMIN, Z( I4-5 ) ) + 90 CONTINUE + I4 = 4 +* + 100 CONTINUE + I0 = I4 / 4 +* +* Store EMIN for passing to SLAZQ3. +* + Z( 4*N0-1 ) = EMIN +* +* Put -(initial shift) into DMIN. +* + DMIN = -MAX( ZERO, QMIN-TWO*SQRT( QMIN )*SQRT( EMAX ) ) +* +* Now I0:N0 is unreduced. PP = 0 for ping, PP = 1 for pong. +* + PP = 0 +* + NBIG = 30*( N0-I0+1 ) + DO 120 IWHILB = 1, NBIG + IF( I0.GT.N0 ) + $ GO TO 130 +* +* While submatrix unfinished take a good dqds step. +* + CALL SLAZQ3( I0, N0, Z, PP, DMIN, SIGMA, DESIG, QMAX, NFAIL, + $ ITER, NDIV, IEEE, TTYPE, DMIN1, DMIN2, DN, DN1, + $ DN2, TAU ) +* + PP = 1 - PP +* +* When EMIN is very small check for splits. +* + IF( PP.EQ.0 .AND. N0-I0.GE.3 ) THEN + IF( Z( 4*N0 ).LE.TOL2*QMAX .OR. + $ Z( 4*N0-1 ).LE.TOL2*SIGMA ) THEN + SPLT = I0 - 1 + QMAX = Z( 4*I0-3 ) + EMIN = Z( 4*I0-1 ) + OLDEMN = Z( 4*I0 ) + DO 110 I4 = 4*I0, 4*( N0-3 ), 4 + IF( Z( I4 ).LE.TOL2*Z( I4-3 ) .OR. + $ Z( I4-1 ).LE.TOL2*SIGMA ) THEN + Z( I4-1 ) = -SIGMA + SPLT = I4 / 4 + QMAX = ZERO + EMIN = Z( I4+3 ) + OLDEMN = Z( I4+4 ) + ELSE + QMAX = MAX( QMAX, Z( I4+1 ) ) + EMIN = MIN( EMIN, Z( I4-1 ) ) + OLDEMN = MIN( OLDEMN, Z( I4 ) ) + END IF + 110 CONTINUE + Z( 4*N0-1 ) = EMIN + Z( 4*N0 ) = OLDEMN + I0 = SPLT + 1 + END IF + END IF +* + 120 CONTINUE +* + INFO = 2 + RETURN +* +* end IWHILB +* + 130 CONTINUE +* + 140 CONTINUE +* + INFO = 3 + RETURN +* +* end IWHILA +* + 150 CONTINUE +* +* Move q's to the front. +* + DO 160 K = 2, N + Z( K ) = Z( 4*K-3 ) + 160 CONTINUE +* +* Sort and compute sum of eigenvalues. +* + CALL SLASRT( 'D', N, Z, IINFO ) +* + E = ZERO + DO 170 K = N, 1, -1 + E = E + Z( K ) + 170 CONTINUE +* +* Store trace, sum(eigenvalues) and information on performance. +* + Z( 2*N+1 ) = TRACE + Z( 2*N+2 ) = E + Z( 2*N+3 ) = REAL( ITER ) + Z( 2*N+4 ) = REAL( NDIV ) / REAL( N**2 ) + Z( 2*N+5 ) = HUNDRD*NFAIL / REAL( ITER ) + RETURN +* +* End of SLASQ2 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slasq3.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slasq3.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,295 @@ + SUBROUTINE SLASQ3( I0, N0, Z, PP, DMIN, SIGMA, DESIG, QMAX, NFAIL, + $ ITER, NDIV, IEEE ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + LOGICAL IEEE + INTEGER I0, ITER, N0, NDIV, NFAIL, PP + REAL DESIG, DMIN, QMAX, SIGMA +* .. +* .. Array Arguments .. + REAL Z( * ) +* .. +* +* Purpose +* ======= +* +* SLASQ3 checks for deflation, computes a shift (TAU) and calls dqds. +* In case of failure it changes shifts, and tries again until output +* is positive. +* +* Arguments +* ========= +* +* I0 (input) INTEGER +* First index. +* +* N0 (input) INTEGER +* Last index. +* +* Z (input) REAL array, dimension ( 4*N ) +* Z holds the qd array. +* +* PP (input) INTEGER +* PP=0 for ping, PP=1 for pong. +* +* DMIN (output) REAL +* Minimum value of d. +* +* SIGMA (output) REAL +* Sum of shifts used in current segment. +* +* DESIG (input/output) REAL +* Lower order part of SIGMA +* +* QMAX (input) REAL +* Maximum value of q. +* +* NFAIL (output) INTEGER +* Number of times shift was too big. +* +* ITER (output) INTEGER +* Number of iterations. +* +* NDIV (output) INTEGER +* Number of divisions. +* +* TTYPE (output) INTEGER +* Shift type. +* +* IEEE (input) LOGICAL +* Flag for IEEE or non IEEE arithmetic (passed to SLASQ5). +* +* ===================================================================== +* +* .. Parameters .. + REAL CBIAS + PARAMETER ( CBIAS = 1.50E0 ) + REAL ZERO, QURTR, HALF, ONE, TWO, HUNDRD + PARAMETER ( ZERO = 0.0E0, QURTR = 0.250E0, HALF = 0.5E0, + $ ONE = 1.0E0, TWO = 2.0E0, HUNDRD = 100.0E0 ) +* .. +* .. Local Scalars .. + INTEGER IPN4, J4, N0IN, NN, TTYPE + REAL DMIN1, DMIN2, DN, DN1, DN2, EPS, S, SAFMIN, T, + $ TAU, TEMP, TOL, TOL2 +* .. +* .. External Subroutines .. + EXTERNAL SLASQ4, SLASQ5, SLASQ6 +* .. +* .. External Function .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Save statement .. + SAVE TTYPE + SAVE DMIN1, DMIN2, DN, DN1, DN2, TAU +* .. +* .. Data statement .. + DATA TTYPE / 0 / + DATA DMIN1 / ZERO /, DMIN2 / ZERO /, DN / ZERO /, + $ DN1 / ZERO /, DN2 / ZERO /, TAU / ZERO / +* .. +* .. Executable Statements .. +* + N0IN = N0 + EPS = SLAMCH( 'Precision' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + TOL = EPS*HUNDRD + TOL2 = TOL**2 +* +* Check for deflation. +* + 10 CONTINUE +* + IF( N0.LT.I0 ) + $ RETURN + IF( N0.EQ.I0 ) + $ GO TO 20 + NN = 4*N0 + PP + IF( N0.EQ.( I0+1 ) ) + $ GO TO 40 +* +* Check whether E(N0-1) is negligible, 1 eigenvalue. +* + IF( Z( NN-5 ).GT.TOL2*( SIGMA+Z( NN-3 ) ) .AND. + $ Z( NN-2*PP-4 ).GT.TOL2*Z( NN-7 ) ) + $ GO TO 30 +* + 20 CONTINUE +* + Z( 4*N0-3 ) = Z( 4*N0+PP-3 ) + SIGMA + N0 = N0 - 1 + GO TO 10 +* +* Check whether E(N0-2) is negligible, 2 eigenvalues. +* + 30 CONTINUE +* + IF( Z( NN-9 ).GT.TOL2*SIGMA .AND. + $ Z( NN-2*PP-8 ).GT.TOL2*Z( NN-11 ) ) + $ GO TO 50 +* + 40 CONTINUE +* + IF( Z( NN-3 ).GT.Z( NN-7 ) ) THEN + S = Z( NN-3 ) + Z( NN-3 ) = Z( NN-7 ) + Z( NN-7 ) = S + END IF + IF( Z( NN-5 ).GT.Z( NN-3 )*TOL2 ) THEN + T = HALF*( ( Z( NN-7 )-Z( NN-3 ) )+Z( NN-5 ) ) + S = Z( NN-3 )*( Z( NN-5 ) / T ) + IF( S.LE.T ) THEN + S = Z( NN-3 )*( Z( NN-5 ) / + $ ( T*( ONE+SQRT( ONE+S / T ) ) ) ) + ELSE + S = Z( NN-3 )*( Z( NN-5 ) / ( T+SQRT( T )*SQRT( T+S ) ) ) + END IF + T = Z( NN-7 ) + ( S+Z( NN-5 ) ) + Z( NN-3 ) = Z( NN-3 )*( Z( NN-7 ) / T ) + Z( NN-7 ) = T + END IF + Z( 4*N0-7 ) = Z( NN-7 ) + SIGMA + Z( 4*N0-3 ) = Z( NN-3 ) + SIGMA + N0 = N0 - 2 + GO TO 10 +* + 50 CONTINUE +* +* Reverse the qd-array, if warranted. +* + IF( DMIN.LE.ZERO .OR. N0.LT.N0IN ) THEN + IF( CBIAS*Z( 4*I0+PP-3 ).LT.Z( 4*N0+PP-3 ) ) THEN + IPN4 = 4*( I0+N0 ) + DO 60 J4 = 4*I0, 2*( I0+N0-1 ), 4 + TEMP = Z( J4-3 ) + Z( J4-3 ) = Z( IPN4-J4-3 ) + Z( IPN4-J4-3 ) = TEMP + TEMP = Z( J4-2 ) + Z( J4-2 ) = Z( IPN4-J4-2 ) + Z( IPN4-J4-2 ) = TEMP + TEMP = Z( J4-1 ) + Z( J4-1 ) = Z( IPN4-J4-5 ) + Z( IPN4-J4-5 ) = TEMP + TEMP = Z( J4 ) + Z( J4 ) = Z( IPN4-J4-4 ) + Z( IPN4-J4-4 ) = TEMP + 60 CONTINUE + IF( N0-I0.LE.4 ) THEN + Z( 4*N0+PP-1 ) = Z( 4*I0+PP-1 ) + Z( 4*N0-PP ) = Z( 4*I0-PP ) + END IF + DMIN2 = MIN( DMIN2, Z( 4*N0+PP-1 ) ) + Z( 4*N0+PP-1 ) = MIN( Z( 4*N0+PP-1 ), Z( 4*I0+PP-1 ), + $ Z( 4*I0+PP+3 ) ) + Z( 4*N0-PP ) = MIN( Z( 4*N0-PP ), Z( 4*I0-PP ), + $ Z( 4*I0-PP+4 ) ) + QMAX = MAX( QMAX, Z( 4*I0+PP-3 ), Z( 4*I0+PP+1 ) ) + DMIN = -ZERO + END IF + END IF +* + IF( DMIN.LT.ZERO .OR. SAFMIN*QMAX.LT.MIN( Z( 4*N0+PP-1 ), + $ Z( 4*N0+PP-9 ), DMIN2+Z( 4*N0-PP ) ) ) THEN +* +* Choose a shift. +* + CALL SLASQ4( I0, N0, Z, PP, N0IN, DMIN, DMIN1, DMIN2, DN, DN1, + $ DN2, TAU, TTYPE ) +* +* Call dqds until DMIN > 0. +* + 80 CONTINUE +* + CALL SLASQ5( I0, N0, Z, PP, TAU, DMIN, DMIN1, DMIN2, DN, + $ DN1, DN2, IEEE ) +* + NDIV = NDIV + ( N0-I0+2 ) + ITER = ITER + 1 +* +* Check status. +* + IF( DMIN.GE.ZERO .AND. DMIN1.GT.ZERO ) THEN +* +* Success. +* + GO TO 100 +* + ELSE IF( DMIN.LT.ZERO .AND. DMIN1.GT.ZERO .AND. + $ Z( 4*( N0-1 )-PP ).LT.TOL*( SIGMA+DN1 ) .AND. + $ ABS( DN ).LT.TOL*SIGMA ) THEN +* +* Convergence hidden by negative DN. +* + Z( 4*( N0-1 )-PP+2 ) = ZERO + DMIN = ZERO + GO TO 100 + ELSE IF( DMIN.LT.ZERO ) THEN +* +* TAU too big. Select new TAU and try again. +* + NFAIL = NFAIL + 1 + IF( TTYPE.LT.-22 ) THEN +* +* Failed twice. Play it safe. +* + TAU = ZERO + ELSE IF( DMIN1.GT.ZERO ) THEN +* +* Late failure. Gives excellent shift. +* + TAU = ( TAU+DMIN )*( ONE-TWO*EPS ) + TTYPE = TTYPE - 11 + ELSE +* +* Early failure. Divide by 4. +* + TAU = QURTR*TAU + TTYPE = TTYPE - 12 + END IF + GO TO 80 + ELSE IF( DMIN.NE.DMIN ) THEN +* +* NaN. +* + TAU = ZERO + GO TO 80 + ELSE +* +* Possible underflow. Play it safe. +* + GO TO 90 + END IF + END IF +* +* Risk of underflow. +* + 90 CONTINUE + CALL SLASQ6( I0, N0, Z, PP, DMIN, DMIN1, DMIN2, DN, DN1, DN2 ) + NDIV = NDIV + ( N0-I0+2 ) + ITER = ITER + 1 + TAU = ZERO +* + 100 CONTINUE + IF( TAU.LT.SIGMA ) THEN + DESIG = DESIG + TAU + T = SIGMA + DESIG + DESIG = DESIG - ( T-SIGMA ) + ELSE + T = SIGMA + TAU + DESIG = SIGMA - ( T-TAU ) + DESIG + END IF + SIGMA = T +* + RETURN +* +* End of SLASQ3 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slasq4.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slasq4.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,329 @@ + SUBROUTINE SLASQ4( I0, N0, Z, PP, N0IN, DMIN, DMIN1, DMIN2, DN, + $ DN1, DN2, TAU, TTYPE ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER I0, N0, N0IN, PP, TTYPE + REAL DMIN, DMIN1, DMIN2, DN, DN1, DN2, TAU +* .. +* .. Array Arguments .. + REAL Z( * ) +* .. +* +* Purpose +* ======= +* +* SLASQ4 computes an approximation TAU to the smallest eigenvalue +* using values of d from the previous transform. +* +* I0 (input) INTEGER +* First index. +* +* N0 (input) INTEGER +* Last index. +* +* Z (input) REAL array, dimension ( 4*N ) +* Z holds the qd array. +* +* PP (input) INTEGER +* PP=0 for ping, PP=1 for pong. +* +* N0IN (input) INTEGER +* The value of N0 at start of EIGTEST. +* +* DMIN (input) REAL +* Minimum value of d. +* +* DMIN1 (input) REAL +* Minimum value of d, excluding D( N0 ). +* +* DMIN2 (input) REAL +* Minimum value of d, excluding D( N0 ) and D( N0-1 ). +* +* DN (input) REAL +* d(N) +* +* DN1 (input) REAL +* d(N-1) +* +* DN2 (input) REAL +* d(N-2) +* +* TAU (output) REAL +* This is the shift. +* +* TTYPE (output) INTEGER +* Shift type. +* +* Further Details +* =============== +* CNST1 = 9/16 +* +* ===================================================================== +* +* .. Parameters .. + REAL CNST1, CNST2, CNST3 + PARAMETER ( CNST1 = 0.5630E0, CNST2 = 1.010E0, + $ CNST3 = 1.050E0 ) + REAL QURTR, THIRD, HALF, ZERO, ONE, TWO, HUNDRD + PARAMETER ( QURTR = 0.250E0, THIRD = 0.3330E0, + $ HALF = 0.50E0, ZERO = 0.0E0, ONE = 1.0E0, + $ TWO = 2.0E0, HUNDRD = 100.0E0 ) +* .. +* .. Local Scalars .. + INTEGER I4, NN, NP + REAL A2, B1, B2, G, GAM, GAP1, GAP2, S +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Save statement .. + SAVE G +* .. +* .. Data statement .. + DATA G / ZERO / +* .. +* .. Executable Statements .. +* +* A negative DMIN forces the shift to take that absolute value +* TTYPE records the type of shift. +* + IF( DMIN.LE.ZERO ) THEN + TAU = -DMIN + TTYPE = -1 + RETURN + END IF +* + NN = 4*N0 + PP + IF( N0IN.EQ.N0 ) THEN +* +* No eigenvalues deflated. +* + IF( DMIN.EQ.DN .OR. DMIN.EQ.DN1 ) THEN +* + B1 = SQRT( Z( NN-3 ) )*SQRT( Z( NN-5 ) ) + B2 = SQRT( Z( NN-7 ) )*SQRT( Z( NN-9 ) ) + A2 = Z( NN-7 ) + Z( NN-5 ) +* +* Cases 2 and 3. +* + IF( DMIN.EQ.DN .AND. DMIN1.EQ.DN1 ) THEN + GAP2 = DMIN2 - A2 - DMIN2*QURTR + IF( GAP2.GT.ZERO .AND. GAP2.GT.B2 ) THEN + GAP1 = A2 - DN - ( B2 / GAP2 )*B2 + ELSE + GAP1 = A2 - DN - ( B1+B2 ) + END IF + IF( GAP1.GT.ZERO .AND. GAP1.GT.B1 ) THEN + S = MAX( DN-( B1 / GAP1 )*B1, HALF*DMIN ) + TTYPE = -2 + ELSE + S = ZERO + IF( DN.GT.B1 ) + $ S = DN - B1 + IF( A2.GT.( B1+B2 ) ) + $ S = MIN( S, A2-( B1+B2 ) ) + S = MAX( S, THIRD*DMIN ) + TTYPE = -3 + END IF + ELSE +* +* Case 4. +* + TTYPE = -4 + S = QURTR*DMIN + IF( DMIN.EQ.DN ) THEN + GAM = DN + A2 = ZERO + IF( Z( NN-5 ) .GT. Z( NN-7 ) ) + $ RETURN + B2 = Z( NN-5 ) / Z( NN-7 ) + NP = NN - 9 + ELSE + NP = NN - 2*PP + B2 = Z( NP-2 ) + GAM = DN1 + IF( Z( NP-4 ) .GT. Z( NP-2 ) ) + $ RETURN + A2 = Z( NP-4 ) / Z( NP-2 ) + IF( Z( NN-9 ) .GT. Z( NN-11 ) ) + $ RETURN + B2 = Z( NN-9 ) / Z( NN-11 ) + NP = NN - 13 + END IF +* +* Approximate contribution to norm squared from I < NN-1. +* + A2 = A2 + B2 + DO 10 I4 = NP, 4*I0 - 1 + PP, -4 + IF( B2.EQ.ZERO ) + $ GO TO 20 + B1 = B2 + IF( Z( I4 ) .GT. Z( I4-2 ) ) + $ RETURN + B2 = B2*( Z( I4 ) / Z( I4-2 ) ) + A2 = A2 + B2 + IF( HUNDRD*MAX( B2, B1 ).LT.A2 .OR. CNST1.LT.A2 ) + $ GO TO 20 + 10 CONTINUE + 20 CONTINUE + A2 = CNST3*A2 +* +* Rayleigh quotient residual bound. +* + IF( A2.LT.CNST1 ) + $ S = GAM*( ONE-SQRT( A2 ) ) / ( ONE+A2 ) + END IF + ELSE IF( DMIN.EQ.DN2 ) THEN +* +* Case 5. +* + TTYPE = -5 + S = QURTR*DMIN +* +* Compute contribution to norm squared from I > NN-2. +* + NP = NN - 2*PP + B1 = Z( NP-2 ) + B2 = Z( NP-6 ) + GAM = DN2 + IF( Z( NP-8 ).GT.B2 .OR. Z( NP-4 ).GT.B1 ) + $ RETURN + A2 = ( Z( NP-8 ) / B2 )*( ONE+Z( NP-4 ) / B1 ) +* +* Approximate contribution to norm squared from I < NN-2. +* + IF( N0-I0.GT.2 ) THEN + B2 = Z( NN-13 ) / Z( NN-15 ) + A2 = A2 + B2 + DO 30 I4 = NN - 17, 4*I0 - 1 + PP, -4 + IF( B2.EQ.ZERO ) + $ GO TO 40 + B1 = B2 + IF( Z( I4 ) .GT. Z( I4-2 ) ) + $ RETURN + B2 = B2*( Z( I4 ) / Z( I4-2 ) ) + A2 = A2 + B2 + IF( HUNDRD*MAX( B2, B1 ).LT.A2 .OR. CNST1.LT.A2 ) + $ GO TO 40 + 30 CONTINUE + 40 CONTINUE + A2 = CNST3*A2 + END IF +* + IF( A2.LT.CNST1 ) + $ S = GAM*( ONE-SQRT( A2 ) ) / ( ONE+A2 ) + ELSE +* +* Case 6, no information to guide us. +* + IF( TTYPE.EQ.-6 ) THEN + G = G + THIRD*( ONE-G ) + ELSE IF( TTYPE.EQ.-18 ) THEN + G = QURTR*THIRD + ELSE + G = QURTR + END IF + S = G*DMIN + TTYPE = -6 + END IF +* + ELSE IF( N0IN.EQ.( N0+1 ) ) THEN +* +* One eigenvalue just deflated. Use DMIN1, DN1 for DMIN and DN. +* + IF( DMIN1.EQ.DN1 .AND. DMIN2.EQ.DN2 ) THEN +* +* Cases 7 and 8. +* + TTYPE = -7 + S = THIRD*DMIN1 + IF( Z( NN-5 ).GT.Z( NN-7 ) ) + $ RETURN + B1 = Z( NN-5 ) / Z( NN-7 ) + B2 = B1 + IF( B2.EQ.ZERO ) + $ GO TO 60 + DO 50 I4 = 4*N0 - 9 + PP, 4*I0 - 1 + PP, -4 + A2 = B1 + IF( Z( I4 ).GT.Z( I4-2 ) ) + $ RETURN + B1 = B1*( Z( I4 ) / Z( I4-2 ) ) + B2 = B2 + B1 + IF( HUNDRD*MAX( B1, A2 ).LT.B2 ) + $ GO TO 60 + 50 CONTINUE + 60 CONTINUE + B2 = SQRT( CNST3*B2 ) + A2 = DMIN1 / ( ONE+B2**2 ) + GAP2 = HALF*DMIN2 - A2 + IF( GAP2.GT.ZERO .AND. GAP2.GT.B2*A2 ) THEN + S = MAX( S, A2*( ONE-CNST2*A2*( B2 / GAP2 )*B2 ) ) + ELSE + S = MAX( S, A2*( ONE-CNST2*B2 ) ) + TTYPE = -8 + END IF + ELSE +* +* Case 9. +* + S = QURTR*DMIN1 + IF( DMIN1.EQ.DN1 ) + $ S = HALF*DMIN1 + TTYPE = -9 + END IF +* + ELSE IF( N0IN.EQ.( N0+2 ) ) THEN +* +* Two eigenvalues deflated. Use DMIN2, DN2 for DMIN and DN. +* +* Cases 10 and 11. +* + IF( DMIN2.EQ.DN2 .AND. TWO*Z( NN-5 ).LT.Z( NN-7 ) ) THEN + TTYPE = -10 + S = THIRD*DMIN2 + IF( Z( NN-5 ).GT.Z( NN-7 ) ) + $ RETURN + B1 = Z( NN-5 ) / Z( NN-7 ) + B2 = B1 + IF( B2.EQ.ZERO ) + $ GO TO 80 + DO 70 I4 = 4*N0 - 9 + PP, 4*I0 - 1 + PP, -4 + IF( Z( I4 ).GT.Z( I4-2 ) ) + $ RETURN + B1 = B1*( Z( I4 ) / Z( I4-2 ) ) + B2 = B2 + B1 + IF( HUNDRD*B1.LT.B2 ) + $ GO TO 80 + 70 CONTINUE + 80 CONTINUE + B2 = SQRT( CNST3*B2 ) + A2 = DMIN2 / ( ONE+B2**2 ) + GAP2 = Z( NN-7 ) + Z( NN-9 ) - + $ SQRT( Z( NN-11 ) )*SQRT( Z( NN-9 ) ) - A2 + IF( GAP2.GT.ZERO .AND. GAP2.GT.B2*A2 ) THEN + S = MAX( S, A2*( ONE-CNST2*A2*( B2 / GAP2 )*B2 ) ) + ELSE + S = MAX( S, A2*( ONE-CNST2*B2 ) ) + END IF + ELSE + S = QURTR*DMIN2 + TTYPE = -11 + END IF + ELSE IF( N0IN.GT.( N0+2 ) ) THEN +* +* Case 12, more than two eigenvalues deflated. No information. +* + S = ZERO + TTYPE = -12 + END IF +* + TAU = S + RETURN +* +* End of SLASQ4 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slasq5.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slasq5.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,195 @@ + SUBROUTINE SLASQ5( I0, N0, Z, PP, TAU, DMIN, DMIN1, DMIN2, DN, + $ DNM1, DNM2, IEEE ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + LOGICAL IEEE + INTEGER I0, N0, PP + REAL DMIN, DMIN1, DMIN2, DN, DNM1, DNM2, TAU +* .. +* .. Array Arguments .. + REAL Z( * ) +* .. +* +* Purpose +* ======= +* +* SLASQ5 computes one dqds transform in ping-pong form, one +* version for IEEE machines another for non IEEE machines. +* +* Arguments +* ========= +* +* I0 (input) INTEGER +* First index. +* +* N0 (input) INTEGER +* Last index. +* +* Z (input) REAL array, dimension ( 4*N ) +* Z holds the qd array. EMIN is stored in Z(4*N0) to avoid +* an extra argument. +* +* PP (input) INTEGER +* PP=0 for ping, PP=1 for pong. +* +* TAU (input) REAL +* This is the shift. +* +* DMIN (output) REAL +* Minimum value of d. +* +* DMIN1 (output) REAL +* Minimum value of d, excluding D( N0 ). +* +* DMIN2 (output) REAL +* Minimum value of d, excluding D( N0 ) and D( N0-1 ). +* +* DN (output) REAL +* d(N0), the last value of d. +* +* DNM1 (output) REAL +* d(N0-1). +* +* DNM2 (output) REAL +* d(N0-2). +* +* IEEE (input) LOGICAL +* Flag for IEEE or non IEEE arithmetic. +* +* ===================================================================== +* +* .. Parameter .. + REAL ZERO + PARAMETER ( ZERO = 0.0E0 ) +* .. +* .. Local Scalars .. + INTEGER J4, J4P2 + REAL D, EMIN, TEMP +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* + IF( ( N0-I0-1 ).LE.0 ) + $ RETURN +* + J4 = 4*I0 + PP - 3 + EMIN = Z( J4+4 ) + D = Z( J4 ) - TAU + DMIN = D + DMIN1 = -Z( J4 ) +* + IF( IEEE ) THEN +* +* Code for IEEE arithmetic. +* + IF( PP.EQ.0 ) THEN + DO 10 J4 = 4*I0, 4*( N0-3 ), 4 + Z( J4-2 ) = D + Z( J4-1 ) + TEMP = Z( J4+1 ) / Z( J4-2 ) + D = D*TEMP - TAU + DMIN = MIN( DMIN, D ) + Z( J4 ) = Z( J4-1 )*TEMP + EMIN = MIN( Z( J4 ), EMIN ) + 10 CONTINUE + ELSE + DO 20 J4 = 4*I0, 4*( N0-3 ), 4 + Z( J4-3 ) = D + Z( J4 ) + TEMP = Z( J4+2 ) / Z( J4-3 ) + D = D*TEMP - TAU + DMIN = MIN( DMIN, D ) + Z( J4-1 ) = Z( J4 )*TEMP + EMIN = MIN( Z( J4-1 ), EMIN ) + 20 CONTINUE + END IF +* +* Unroll last two steps. +* + DNM2 = D + DMIN2 = DMIN + J4 = 4*( N0-2 ) - PP + J4P2 = J4 + 2*PP - 1 + Z( J4-2 ) = DNM2 + Z( J4P2 ) + Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) ) + DNM1 = Z( J4P2+2 )*( DNM2 / Z( J4-2 ) ) - TAU + DMIN = MIN( DMIN, DNM1 ) +* + DMIN1 = DMIN + J4 = J4 + 4 + J4P2 = J4 + 2*PP - 1 + Z( J4-2 ) = DNM1 + Z( J4P2 ) + Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) ) + DN = Z( J4P2+2 )*( DNM1 / Z( J4-2 ) ) - TAU + DMIN = MIN( DMIN, DN ) +* + ELSE +* +* Code for non IEEE arithmetic. +* + IF( PP.EQ.0 ) THEN + DO 30 J4 = 4*I0, 4*( N0-3 ), 4 + Z( J4-2 ) = D + Z( J4-1 ) + IF( D.LT.ZERO ) THEN + RETURN + ELSE + Z( J4 ) = Z( J4+1 )*( Z( J4-1 ) / Z( J4-2 ) ) + D = Z( J4+1 )*( D / Z( J4-2 ) ) - TAU + END IF + DMIN = MIN( DMIN, D ) + EMIN = MIN( EMIN, Z( J4 ) ) + 30 CONTINUE + ELSE + DO 40 J4 = 4*I0, 4*( N0-3 ), 4 + Z( J4-3 ) = D + Z( J4 ) + IF( D.LT.ZERO ) THEN + RETURN + ELSE + Z( J4-1 ) = Z( J4+2 )*( Z( J4 ) / Z( J4-3 ) ) + D = Z( J4+2 )*( D / Z( J4-3 ) ) - TAU + END IF + DMIN = MIN( DMIN, D ) + EMIN = MIN( EMIN, Z( J4-1 ) ) + 40 CONTINUE + END IF +* +* Unroll last two steps. +* + DNM2 = D + DMIN2 = DMIN + J4 = 4*( N0-2 ) - PP + J4P2 = J4 + 2*PP - 1 + Z( J4-2 ) = DNM2 + Z( J4P2 ) + IF( DNM2.LT.ZERO ) THEN + RETURN + ELSE + Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) ) + DNM1 = Z( J4P2+2 )*( DNM2 / Z( J4-2 ) ) - TAU + END IF + DMIN = MIN( DMIN, DNM1 ) +* + DMIN1 = DMIN + J4 = J4 + 4 + J4P2 = J4 + 2*PP - 1 + Z( J4-2 ) = DNM1 + Z( J4P2 ) + IF( DNM1.LT.ZERO ) THEN + RETURN + ELSE + Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) ) + DN = Z( J4P2+2 )*( DNM1 / Z( J4-2 ) ) - TAU + END IF + DMIN = MIN( DMIN, DN ) +* + END IF +* + Z( J4+2 ) = DN + Z( 4*N0-PP ) = EMIN + RETURN +* +* End of SLASQ5 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slasq6.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slasq6.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,175 @@ + SUBROUTINE SLASQ6( I0, N0, Z, PP, DMIN, DMIN1, DMIN2, DN, + $ DNM1, DNM2 ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER I0, N0, PP + REAL DMIN, DMIN1, DMIN2, DN, DNM1, DNM2 +* .. +* .. Array Arguments .. + REAL Z( * ) +* .. +* +* Purpose +* ======= +* +* SLASQ6 computes one dqd (shift equal to zero) transform in +* ping-pong form, with protection against underflow and overflow. +* +* Arguments +* ========= +* +* I0 (input) INTEGER +* First index. +* +* N0 (input) INTEGER +* Last index. +* +* Z (input) REAL array, dimension ( 4*N ) +* Z holds the qd array. EMIN is stored in Z(4*N0) to avoid +* an extra argument. +* +* PP (input) INTEGER +* PP=0 for ping, PP=1 for pong. +* +* DMIN (output) REAL +* Minimum value of d. +* +* DMIN1 (output) REAL +* Minimum value of d, excluding D( N0 ). +* +* DMIN2 (output) REAL +* Minimum value of d, excluding D( N0 ) and D( N0-1 ). +* +* DN (output) REAL +* d(N0), the last value of d. +* +* DNM1 (output) REAL +* d(N0-1). +* +* DNM2 (output) REAL +* d(N0-2). +* +* ===================================================================== +* +* .. Parameter .. + REAL ZERO + PARAMETER ( ZERO = 0.0E0 ) +* .. +* .. Local Scalars .. + INTEGER J4, J4P2 + REAL D, EMIN, SAFMIN, TEMP +* .. +* .. External Function .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* + IF( ( N0-I0-1 ).LE.0 ) + $ RETURN +* + SAFMIN = SLAMCH( 'Safe minimum' ) + J4 = 4*I0 + PP - 3 + EMIN = Z( J4+4 ) + D = Z( J4 ) + DMIN = D +* + IF( PP.EQ.0 ) THEN + DO 10 J4 = 4*I0, 4*( N0-3 ), 4 + Z( J4-2 ) = D + Z( J4-1 ) + IF( Z( J4-2 ).EQ.ZERO ) THEN + Z( J4 ) = ZERO + D = Z( J4+1 ) + DMIN = D + EMIN = ZERO + ELSE IF( SAFMIN*Z( J4+1 ).LT.Z( J4-2 ) .AND. + $ SAFMIN*Z( J4-2 ).LT.Z( J4+1 ) ) THEN + TEMP = Z( J4+1 ) / Z( J4-2 ) + Z( J4 ) = Z( J4-1 )*TEMP + D = D*TEMP + ELSE + Z( J4 ) = Z( J4+1 )*( Z( J4-1 ) / Z( J4-2 ) ) + D = Z( J4+1 )*( D / Z( J4-2 ) ) + END IF + DMIN = MIN( DMIN, D ) + EMIN = MIN( EMIN, Z( J4 ) ) + 10 CONTINUE + ELSE + DO 20 J4 = 4*I0, 4*( N0-3 ), 4 + Z( J4-3 ) = D + Z( J4 ) + IF( Z( J4-3 ).EQ.ZERO ) THEN + Z( J4-1 ) = ZERO + D = Z( J4+2 ) + DMIN = D + EMIN = ZERO + ELSE IF( SAFMIN*Z( J4+2 ).LT.Z( J4-3 ) .AND. + $ SAFMIN*Z( J4-3 ).LT.Z( J4+2 ) ) THEN + TEMP = Z( J4+2 ) / Z( J4-3 ) + Z( J4-1 ) = Z( J4 )*TEMP + D = D*TEMP + ELSE + Z( J4-1 ) = Z( J4+2 )*( Z( J4 ) / Z( J4-3 ) ) + D = Z( J4+2 )*( D / Z( J4-3 ) ) + END IF + DMIN = MIN( DMIN, D ) + EMIN = MIN( EMIN, Z( J4-1 ) ) + 20 CONTINUE + END IF +* +* Unroll last two steps. +* + DNM2 = D + DMIN2 = DMIN + J4 = 4*( N0-2 ) - PP + J4P2 = J4 + 2*PP - 1 + Z( J4-2 ) = DNM2 + Z( J4P2 ) + IF( Z( J4-2 ).EQ.ZERO ) THEN + Z( J4 ) = ZERO + DNM1 = Z( J4P2+2 ) + DMIN = DNM1 + EMIN = ZERO + ELSE IF( SAFMIN*Z( J4P2+2 ).LT.Z( J4-2 ) .AND. + $ SAFMIN*Z( J4-2 ).LT.Z( J4P2+2 ) ) THEN + TEMP = Z( J4P2+2 ) / Z( J4-2 ) + Z( J4 ) = Z( J4P2 )*TEMP + DNM1 = DNM2*TEMP + ELSE + Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) ) + DNM1 = Z( J4P2+2 )*( DNM2 / Z( J4-2 ) ) + END IF + DMIN = MIN( DMIN, DNM1 ) +* + DMIN1 = DMIN + J4 = J4 + 4 + J4P2 = J4 + 2*PP - 1 + Z( J4-2 ) = DNM1 + Z( J4P2 ) + IF( Z( J4-2 ).EQ.ZERO ) THEN + Z( J4 ) = ZERO + DN = Z( J4P2+2 ) + DMIN = DN + EMIN = ZERO + ELSE IF( SAFMIN*Z( J4P2+2 ).LT.Z( J4-2 ) .AND. + $ SAFMIN*Z( J4-2 ).LT.Z( J4P2+2 ) ) THEN + TEMP = Z( J4P2+2 ) / Z( J4-2 ) + Z( J4 ) = Z( J4P2 )*TEMP + DN = DNM1*TEMP + ELSE + Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) ) + DN = Z( J4P2+2 )*( DNM1 / Z( J4-2 ) ) + END IF + DMIN = MIN( DMIN, DN ) +* + Z( J4+2 ) = DN + Z( 4*N0-PP ) = EMIN + RETURN +* +* End of SLASQ6 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slasr.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slasr.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,361 @@ + SUBROUTINE SLASR( SIDE, PIVOT, DIRECT, M, N, C, S, A, LDA ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, PIVOT, SIDE + INTEGER LDA, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), C( * ), S( * ) +* .. +* +* Purpose +* ======= +* +* SLASR applies a sequence of plane rotations to a real matrix A, +* from either the left or the right. +* +* When SIDE = 'L', the transformation takes the form +* +* A := P*A +* +* and when SIDE = 'R', the transformation takes the form +* +* A := A*P**T +* +* where P is an orthogonal matrix consisting of a sequence of z plane +* rotations, with z = M when SIDE = 'L' and z = N when SIDE = 'R', +* and P**T is the transpose of P. +* +* When DIRECT = 'F' (Forward sequence), then +* +* P = P(z-1) * ... * P(2) * P(1) +* +* and when DIRECT = 'B' (Backward sequence), then +* +* P = P(1) * P(2) * ... * P(z-1) +* +* where P(k) is a plane rotation matrix defined by the 2-by-2 rotation +* +* R(k) = ( c(k) s(k) ) +* = ( -s(k) c(k) ). +* +* When PIVOT = 'V' (Variable pivot), the rotation is performed +* for the plane (k,k+1), i.e., P(k) has the form +* +* P(k) = ( 1 ) +* ( ... ) +* ( 1 ) +* ( c(k) s(k) ) +* ( -s(k) c(k) ) +* ( 1 ) +* ( ... ) +* ( 1 ) +* +* where R(k) appears as a rank-2 modification to the identity matrix in +* rows and columns k and k+1. +* +* When PIVOT = 'T' (Top pivot), the rotation is performed for the +* plane (1,k+1), so P(k) has the form +* +* P(k) = ( c(k) s(k) ) +* ( 1 ) +* ( ... ) +* ( 1 ) +* ( -s(k) c(k) ) +* ( 1 ) +* ( ... ) +* ( 1 ) +* +* where R(k) appears in rows and columns 1 and k+1. +* +* Similarly, when PIVOT = 'B' (Bottom pivot), the rotation is +* performed for the plane (k,z), giving P(k) the form +* +* P(k) = ( 1 ) +* ( ... ) +* ( 1 ) +* ( c(k) s(k) ) +* ( 1 ) +* ( ... ) +* ( 1 ) +* ( -s(k) c(k) ) +* +* where R(k) appears in rows and columns k and z. The rotations are +* performed without ever forming P(k) explicitly. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* Specifies whether the plane rotation matrix P is applied to +* A on the left or the right. +* = 'L': Left, compute A := P*A +* = 'R': Right, compute A:= A*P**T +* +* PIVOT (input) CHARACTER*1 +* Specifies the plane for which P(k) is a plane rotation +* matrix. +* = 'V': Variable pivot, the plane (k,k+1) +* = 'T': Top pivot, the plane (1,k+1) +* = 'B': Bottom pivot, the plane (k,z) +* +* DIRECT (input) CHARACTER*1 +* Specifies whether P is a forward or backward sequence of +* plane rotations. +* = 'F': Forward, P = P(z-1)*...*P(2)*P(1) +* = 'B': Backward, P = P(1)*P(2)*...*P(z-1) +* +* M (input) INTEGER +* The number of rows of the matrix A. If m <= 1, an immediate +* return is effected. +* +* N (input) INTEGER +* The number of columns of the matrix A. If n <= 1, an +* immediate return is effected. +* +* C (input) REAL array, dimension +* (M-1) if SIDE = 'L' +* (N-1) if SIDE = 'R' +* The cosines c(k) of the plane rotations. +* +* S (input) REAL array, dimension +* (M-1) if SIDE = 'L' +* (N-1) if SIDE = 'R' +* The sines s(k) of the plane rotations. The 2-by-2 plane +* rotation part of the matrix P(k), R(k), has the form +* R(k) = ( c(k) s(k) ) +* ( -s(k) c(k) ). +* +* A (input/output) REAL array, dimension (LDA,N) +* The M-by-N matrix A. On exit, A is overwritten by P*A if +* SIDE = 'R' or by A*P**T if SIDE = 'L'. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, INFO, J + REAL CTEMP, STEMP, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( .NOT.( LSAME( SIDE, 'L' ) .OR. LSAME( SIDE, 'R' ) ) ) THEN + INFO = 1 + ELSE IF( .NOT.( LSAME( PIVOT, 'V' ) .OR. LSAME( PIVOT, + $ 'T' ) .OR. LSAME( PIVOT, 'B' ) ) ) THEN + INFO = 2 + ELSE IF( .NOT.( LSAME( DIRECT, 'F' ) .OR. LSAME( DIRECT, 'B' ) ) ) + $ THEN + INFO = 3 + ELSE IF( M.LT.0 ) THEN + INFO = 4 + ELSE IF( N.LT.0 ) THEN + INFO = 5 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = 9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLASR ', INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( ( M.EQ.0 ) .OR. ( N.EQ.0 ) ) + $ RETURN + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form P * A +* + IF( LSAME( PIVOT, 'V' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 20 J = 1, M - 1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 10 I = 1, N + TEMP = A( J+1, I ) + A( J+1, I ) = CTEMP*TEMP - STEMP*A( J, I ) + A( J, I ) = STEMP*TEMP + CTEMP*A( J, I ) + 10 CONTINUE + END IF + 20 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 40 J = M - 1, 1, -1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 30 I = 1, N + TEMP = A( J+1, I ) + A( J+1, I ) = CTEMP*TEMP - STEMP*A( J, I ) + A( J, I ) = STEMP*TEMP + CTEMP*A( J, I ) + 30 CONTINUE + END IF + 40 CONTINUE + END IF + ELSE IF( LSAME( PIVOT, 'T' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 60 J = 2, M + CTEMP = C( J-1 ) + STEMP = S( J-1 ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 50 I = 1, N + TEMP = A( J, I ) + A( J, I ) = CTEMP*TEMP - STEMP*A( 1, I ) + A( 1, I ) = STEMP*TEMP + CTEMP*A( 1, I ) + 50 CONTINUE + END IF + 60 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 80 J = M, 2, -1 + CTEMP = C( J-1 ) + STEMP = S( J-1 ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 70 I = 1, N + TEMP = A( J, I ) + A( J, I ) = CTEMP*TEMP - STEMP*A( 1, I ) + A( 1, I ) = STEMP*TEMP + CTEMP*A( 1, I ) + 70 CONTINUE + END IF + 80 CONTINUE + END IF + ELSE IF( LSAME( PIVOT, 'B' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 100 J = 1, M - 1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 90 I = 1, N + TEMP = A( J, I ) + A( J, I ) = STEMP*A( M, I ) + CTEMP*TEMP + A( M, I ) = CTEMP*A( M, I ) - STEMP*TEMP + 90 CONTINUE + END IF + 100 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 120 J = M - 1, 1, -1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 110 I = 1, N + TEMP = A( J, I ) + A( J, I ) = STEMP*A( M, I ) + CTEMP*TEMP + A( M, I ) = CTEMP*A( M, I ) - STEMP*TEMP + 110 CONTINUE + END IF + 120 CONTINUE + END IF + END IF + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form A * P' +* + IF( LSAME( PIVOT, 'V' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 140 J = 1, N - 1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 130 I = 1, M + TEMP = A( I, J+1 ) + A( I, J+1 ) = CTEMP*TEMP - STEMP*A( I, J ) + A( I, J ) = STEMP*TEMP + CTEMP*A( I, J ) + 130 CONTINUE + END IF + 140 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 160 J = N - 1, 1, -1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 150 I = 1, M + TEMP = A( I, J+1 ) + A( I, J+1 ) = CTEMP*TEMP - STEMP*A( I, J ) + A( I, J ) = STEMP*TEMP + CTEMP*A( I, J ) + 150 CONTINUE + END IF + 160 CONTINUE + END IF + ELSE IF( LSAME( PIVOT, 'T' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 180 J = 2, N + CTEMP = C( J-1 ) + STEMP = S( J-1 ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 170 I = 1, M + TEMP = A( I, J ) + A( I, J ) = CTEMP*TEMP - STEMP*A( I, 1 ) + A( I, 1 ) = STEMP*TEMP + CTEMP*A( I, 1 ) + 170 CONTINUE + END IF + 180 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 200 J = N, 2, -1 + CTEMP = C( J-1 ) + STEMP = S( J-1 ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 190 I = 1, M + TEMP = A( I, J ) + A( I, J ) = CTEMP*TEMP - STEMP*A( I, 1 ) + A( I, 1 ) = STEMP*TEMP + CTEMP*A( I, 1 ) + 190 CONTINUE + END IF + 200 CONTINUE + END IF + ELSE IF( LSAME( PIVOT, 'B' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 220 J = 1, N - 1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 210 I = 1, M + TEMP = A( I, J ) + A( I, J ) = STEMP*A( I, N ) + CTEMP*TEMP + A( I, N ) = CTEMP*A( I, N ) - STEMP*TEMP + 210 CONTINUE + END IF + 220 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 240 J = N - 1, 1, -1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 230 I = 1, M + TEMP = A( I, J ) + A( I, J ) = STEMP*A( I, N ) + CTEMP*TEMP + A( I, N ) = CTEMP*A( I, N ) - STEMP*TEMP + 230 CONTINUE + END IF + 240 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of SLASR +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slasrt.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slasrt.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,243 @@ + SUBROUTINE SLASRT( ID, N, D, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER ID + INTEGER INFO, N +* .. +* .. Array Arguments .. + REAL D( * ) +* .. +* +* Purpose +* ======= +* +* Sort the numbers in D in increasing order (if ID = 'I') or +* in decreasing order (if ID = 'D' ). +* +* Use Quick Sort, reverting to Insertion sort on arrays of +* size <= 20. Dimension of STACK limits N to about 2**32. +* +* Arguments +* ========= +* +* ID (input) CHARACTER*1 +* = 'I': sort D in increasing order; +* = 'D': sort D in decreasing order. +* +* N (input) INTEGER +* The length of the array D. +* +* D (input/output) REAL array, dimension (N) +* On entry, the array to be sorted. +* On exit, D has been sorted into increasing order +* (D(1) <= ... <= D(N) ) or into decreasing order +* (D(1) >= ... >= D(N) ), depending on ID. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + INTEGER SELECT + PARAMETER ( SELECT = 20 ) +* .. +* .. Local Scalars .. + INTEGER DIR, ENDD, I, J, START, STKPNT + REAL D1, D2, D3, DMNMX, TMP +* .. +* .. Local Arrays .. + INTEGER STACK( 2, 32 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input paramters. +* + INFO = 0 + DIR = -1 + IF( LSAME( ID, 'D' ) ) THEN + DIR = 0 + ELSE IF( LSAME( ID, 'I' ) ) THEN + DIR = 1 + END IF + IF( DIR.EQ.-1 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLASRT', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.1 ) + $ RETURN +* + STKPNT = 1 + STACK( 1, 1 ) = 1 + STACK( 2, 1 ) = N + 10 CONTINUE + START = STACK( 1, STKPNT ) + ENDD = STACK( 2, STKPNT ) + STKPNT = STKPNT - 1 + IF( ENDD-START.LE.SELECT .AND. ENDD-START.GT.0 ) THEN +* +* Do Insertion sort on D( START:ENDD ) +* + IF( DIR.EQ.0 ) THEN +* +* Sort into decreasing order +* + DO 30 I = START + 1, ENDD + DO 20 J = I, START + 1, -1 + IF( D( J ).GT.D( J-1 ) ) THEN + DMNMX = D( J ) + D( J ) = D( J-1 ) + D( J-1 ) = DMNMX + ELSE + GO TO 30 + END IF + 20 CONTINUE + 30 CONTINUE +* + ELSE +* +* Sort into increasing order +* + DO 50 I = START + 1, ENDD + DO 40 J = I, START + 1, -1 + IF( D( J ).LT.D( J-1 ) ) THEN + DMNMX = D( J ) + D( J ) = D( J-1 ) + D( J-1 ) = DMNMX + ELSE + GO TO 50 + END IF + 40 CONTINUE + 50 CONTINUE +* + END IF +* + ELSE IF( ENDD-START.GT.SELECT ) THEN +* +* Partition D( START:ENDD ) and stack parts, largest one first +* +* Choose partition entry as median of 3 +* + D1 = D( START ) + D2 = D( ENDD ) + I = ( START+ENDD ) / 2 + D3 = D( I ) + IF( D1.LT.D2 ) THEN + IF( D3.LT.D1 ) THEN + DMNMX = D1 + ELSE IF( D3.LT.D2 ) THEN + DMNMX = D3 + ELSE + DMNMX = D2 + END IF + ELSE + IF( D3.LT.D2 ) THEN + DMNMX = D2 + ELSE IF( D3.LT.D1 ) THEN + DMNMX = D3 + ELSE + DMNMX = D1 + END IF + END IF +* + IF( DIR.EQ.0 ) THEN +* +* Sort into decreasing order +* + I = START - 1 + J = ENDD + 1 + 60 CONTINUE + 70 CONTINUE + J = J - 1 + IF( D( J ).LT.DMNMX ) + $ GO TO 70 + 80 CONTINUE + I = I + 1 + IF( D( I ).GT.DMNMX ) + $ GO TO 80 + IF( I.LT.J ) THEN + TMP = D( I ) + D( I ) = D( J ) + D( J ) = TMP + GO TO 60 + END IF + IF( J-START.GT.ENDD-J-1 ) THEN + STKPNT = STKPNT + 1 + STACK( 1, STKPNT ) = START + STACK( 2, STKPNT ) = J + STKPNT = STKPNT + 1 + STACK( 1, STKPNT ) = J + 1 + STACK( 2, STKPNT ) = ENDD + ELSE + STKPNT = STKPNT + 1 + STACK( 1, STKPNT ) = J + 1 + STACK( 2, STKPNT ) = ENDD + STKPNT = STKPNT + 1 + STACK( 1, STKPNT ) = START + STACK( 2, STKPNT ) = J + END IF + ELSE +* +* Sort into increasing order +* + I = START - 1 + J = ENDD + 1 + 90 CONTINUE + 100 CONTINUE + J = J - 1 + IF( D( J ).GT.DMNMX ) + $ GO TO 100 + 110 CONTINUE + I = I + 1 + IF( D( I ).LT.DMNMX ) + $ GO TO 110 + IF( I.LT.J ) THEN + TMP = D( I ) + D( I ) = D( J ) + D( J ) = TMP + GO TO 90 + END IF + IF( J-START.GT.ENDD-J-1 ) THEN + STKPNT = STKPNT + 1 + STACK( 1, STKPNT ) = START + STACK( 2, STKPNT ) = J + STKPNT = STKPNT + 1 + STACK( 1, STKPNT ) = J + 1 + STACK( 2, STKPNT ) = ENDD + ELSE + STKPNT = STKPNT + 1 + STACK( 1, STKPNT ) = J + 1 + STACK( 2, STKPNT ) = ENDD + STKPNT = STKPNT + 1 + STACK( 1, STKPNT ) = START + STACK( 2, STKPNT ) = J + END IF + END IF + END IF + IF( STKPNT.GT.0 ) + $ GO TO 10 + RETURN +* +* End of SLASRT +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slassq.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slassq.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,88 @@ + SUBROUTINE SLASSQ( N, X, INCX, SCALE, SUMSQ ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INCX, N + REAL SCALE, SUMSQ +* .. +* .. Array Arguments .. + REAL X( * ) +* .. +* +* Purpose +* ======= +* +* SLASSQ returns the values scl and smsq such that +* +* ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, +* +* where x( i ) = X( 1 + ( i - 1 )*INCX ). The value of sumsq is +* assumed to be non-negative and scl returns the value +* +* scl = max( scale, abs( x( i ) ) ). +* +* scale and sumsq must be supplied in SCALE and SUMSQ and +* scl and smsq are overwritten on SCALE and SUMSQ respectively. +* +* The routine makes only one pass through the vector x. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of elements to be used from the vector X. +* +* X (input) REAL array, dimension (N) +* The vector for which a scaled sum of squares is computed. +* x( i ) = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n. +* +* INCX (input) INTEGER +* The increment between successive values of the vector X. +* INCX > 0. +* +* SCALE (input/output) REAL +* On entry, the value scale in the equation above. +* On exit, SCALE is overwritten with scl , the scaling factor +* for the sum of squares. +* +* SUMSQ (input/output) REAL +* On entry, the value sumsq in the equation above. +* On exit, SUMSQ is overwritten with smsq , the basic sum of +* squares from which scl has been factored out. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER IX + REAL ABSXI +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* + IF( N.GT.0 ) THEN + DO 10 IX = 1, 1 + ( N-1 )*INCX, INCX + IF( X( IX ).NE.ZERO ) THEN + ABSXI = ABS( X( IX ) ) + IF( SCALE.LT.ABSXI ) THEN + SUMSQ = 1 + SUMSQ*( SCALE / ABSXI )**2 + SCALE = ABSXI + ELSE + SUMSQ = SUMSQ + ( ABSXI / SCALE )**2 + END IF + END IF + 10 CONTINUE + END IF + RETURN +* +* End of SLASSQ +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slasv2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slasv2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,249 @@ + SUBROUTINE SLASV2( F, G, H, SSMIN, SSMAX, SNR, CSR, SNL, CSL ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + REAL CSL, CSR, F, G, H, SNL, SNR, SSMAX, SSMIN +* .. +* +* Purpose +* ======= +* +* SLASV2 computes the singular value decomposition of a 2-by-2 +* triangular matrix +* [ F G ] +* [ 0 H ]. +* On return, abs(SSMAX) is the larger singular value, abs(SSMIN) is the +* smaller singular value, and (CSL,SNL) and (CSR,SNR) are the left and +* right singular vectors for abs(SSMAX), giving the decomposition +* +* [ CSL SNL ] [ F G ] [ CSR -SNR ] = [ SSMAX 0 ] +* [-SNL CSL ] [ 0 H ] [ SNR CSR ] [ 0 SSMIN ]. +* +* Arguments +* ========= +* +* F (input) REAL +* The (1,1) element of the 2-by-2 matrix. +* +* G (input) REAL +* The (1,2) element of the 2-by-2 matrix. +* +* H (input) REAL +* The (2,2) element of the 2-by-2 matrix. +* +* SSMIN (output) REAL +* abs(SSMIN) is the smaller singular value. +* +* SSMAX (output) REAL +* abs(SSMAX) is the larger singular value. +* +* SNL (output) REAL +* CSL (output) REAL +* The vector (CSL, SNL) is a unit left singular vector for the +* singular value abs(SSMAX). +* +* SNR (output) REAL +* CSR (output) REAL +* The vector (CSR, SNR) is a unit right singular vector for the +* singular value abs(SSMAX). +* +* Further Details +* =============== +* +* Any input parameter may be aliased with any output parameter. +* +* Barring over/underflow and assuming a guard digit in subtraction, all +* output quantities are correct to within a few units in the last +* place (ulps). +* +* In IEEE arithmetic, the code works correctly if one matrix element is +* infinite. +* +* Overflow will not occur unless the largest singular value itself +* overflows or is within a few ulps of overflow. (On machines with +* partial overflow, like the Cray, overflow may occur if the largest +* singular value is within a factor of 2 of overflow.) +* +* Underflow is harmless if underflow is gradual. Otherwise, results +* may correspond to a matrix modified by perturbations of size near +* the underflow threshold. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E0 ) + REAL HALF + PARAMETER ( HALF = 0.5E0 ) + REAL ONE + PARAMETER ( ONE = 1.0E0 ) + REAL TWO + PARAMETER ( TWO = 2.0E0 ) + REAL FOUR + PARAMETER ( FOUR = 4.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL GASMAL, SWAP + INTEGER PMAX + REAL A, CLT, CRT, D, FA, FT, GA, GT, HA, HT, L, M, + $ MM, R, S, SLT, SRT, T, TEMP, TSIGN, TT +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SIGN, SQRT +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. Executable Statements .. +* + FT = F + FA = ABS( FT ) + HT = H + HA = ABS( H ) +* +* PMAX points to the maximum absolute element of matrix +* PMAX = 1 if F largest in absolute values +* PMAX = 2 if G largest in absolute values +* PMAX = 3 if H largest in absolute values +* + PMAX = 1 + SWAP = ( HA.GT.FA ) + IF( SWAP ) THEN + PMAX = 3 + TEMP = FT + FT = HT + HT = TEMP + TEMP = FA + FA = HA + HA = TEMP +* +* Now FA .ge. HA +* + END IF + GT = G + GA = ABS( GT ) + IF( GA.EQ.ZERO ) THEN +* +* Diagonal matrix +* + SSMIN = HA + SSMAX = FA + CLT = ONE + CRT = ONE + SLT = ZERO + SRT = ZERO + ELSE + GASMAL = .TRUE. + IF( GA.GT.FA ) THEN + PMAX = 2 + IF( ( FA / GA ).LT.SLAMCH( 'EPS' ) ) THEN +* +* Case of very large GA +* + GASMAL = .FALSE. + SSMAX = GA + IF( HA.GT.ONE ) THEN + SSMIN = FA / ( GA / HA ) + ELSE + SSMIN = ( FA / GA )*HA + END IF + CLT = ONE + SLT = HT / GT + SRT = ONE + CRT = FT / GT + END IF + END IF + IF( GASMAL ) THEN +* +* Normal case +* + D = FA - HA + IF( D.EQ.FA ) THEN +* +* Copes with infinite F or H +* + L = ONE + ELSE + L = D / FA + END IF +* +* Note that 0 .le. L .le. 1 +* + M = GT / FT +* +* Note that abs(M) .le. 1/macheps +* + T = TWO - L +* +* Note that T .ge. 1 +* + MM = M*M + TT = T*T + S = SQRT( TT+MM ) +* +* Note that 1 .le. S .le. 1 + 1/macheps +* + IF( L.EQ.ZERO ) THEN + R = ABS( M ) + ELSE + R = SQRT( L*L+MM ) + END IF +* +* Note that 0 .le. R .le. 1 + 1/macheps +* + A = HALF*( S+R ) +* +* Note that 1 .le. A .le. 1 + abs(M) +* + SSMIN = HA / A + SSMAX = FA*A + IF( MM.EQ.ZERO ) THEN +* +* Note that M is very tiny +* + IF( L.EQ.ZERO ) THEN + T = SIGN( TWO, FT )*SIGN( ONE, GT ) + ELSE + T = GT / SIGN( D, FT ) + M / T + END IF + ELSE + T = ( M / ( S+T )+M / ( R+L ) )*( ONE+A ) + END IF + L = SQRT( T*T+FOUR ) + CRT = TWO / L + SRT = T / L + CLT = ( CRT+SRT*M ) / A + SLT = ( HT / FT )*SRT / A + END IF + END IF + IF( SWAP ) THEN + CSL = SRT + SNL = CRT + CSR = SLT + SNR = CLT + ELSE + CSL = CLT + SNL = SLT + CSR = CRT + SNR = SRT + END IF +* +* Correct signs of SSMAX and SSMIN +* + IF( PMAX.EQ.1 ) + $ TSIGN = SIGN( ONE, CSR )*SIGN( ONE, CSL )*SIGN( ONE, F ) + IF( PMAX.EQ.2 ) + $ TSIGN = SIGN( ONE, SNR )*SIGN( ONE, CSL )*SIGN( ONE, G ) + IF( PMAX.EQ.3 ) + $ TSIGN = SIGN( ONE, SNR )*SIGN( ONE, SNL )*SIGN( ONE, H ) + SSMAX = SIGN( SSMAX, TSIGN ) + SSMIN = SIGN( SSMIN, TSIGN*SIGN( ONE, F )*SIGN( ONE, H ) ) + RETURN +* +* End of SLASV2 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slaswp.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slaswp.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,119 @@ + SUBROUTINE SLASWP( N, A, LDA, K1, K2, IPIV, INCX ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INCX, K1, K2, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* SLASWP performs a series of row interchanges on the matrix A. +* One row interchange is initiated for each of rows K1 through K2 of A. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of columns of the matrix A. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the matrix of column dimension N to which the row +* interchanges will be applied. +* On exit, the permuted matrix. +* +* LDA (input) INTEGER +* The leading dimension of the array A. +* +* K1 (input) INTEGER +* The first element of IPIV for which a row interchange will +* be done. +* +* K2 (input) INTEGER +* The last element of IPIV for which a row interchange will +* be done. +* +* IPIV (input) INTEGER array, dimension (K2*abs(INCX)) +* The vector of pivot indices. Only the elements in positions +* K1 through K2 of IPIV are accessed. +* IPIV(K) = L implies rows K and L are to be interchanged. +* +* INCX (input) INTEGER +* The increment between successive values of IPIV. If IPIV +* is negative, the pivots are applied in reverse order. +* +* Further Details +* =============== +* +* Modified by +* R. C. Whaley, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, I1, I2, INC, IP, IX, IX0, J, K, N32 + REAL TEMP +* .. +* .. Executable Statements .. +* +* Interchange row I with row IPIV(I) for each of rows K1 through K2. +* + IF( INCX.GT.0 ) THEN + IX0 = K1 + I1 = K1 + I2 = K2 + INC = 1 + ELSE IF( INCX.LT.0 ) THEN + IX0 = 1 + ( 1-K2 )*INCX + I1 = K2 + I2 = K1 + INC = -1 + ELSE + RETURN + END IF +* + N32 = ( N / 32 )*32 + IF( N32.NE.0 ) THEN + DO 30 J = 1, N32, 32 + IX = IX0 + DO 20 I = I1, I2, INC + IP = IPIV( IX ) + IF( IP.NE.I ) THEN + DO 10 K = J, J + 31 + TEMP = A( I, K ) + A( I, K ) = A( IP, K ) + A( IP, K ) = TEMP + 10 CONTINUE + END IF + IX = IX + INCX + 20 CONTINUE + 30 CONTINUE + END IF + IF( N32.NE.N ) THEN + N32 = N32 + 1 + IX = IX0 + DO 50 I = I1, I2, INC + IP = IPIV( IX ) + IF( IP.NE.I ) THEN + DO 40 K = N32, N + TEMP = A( I, K ) + A( I, K ) = A( IP, K ) + A( IP, K ) = TEMP + 40 CONTINUE + END IF + IX = IX + INCX + 50 CONTINUE + END IF +* + RETURN +* +* End of SLASWP +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slasy2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slasy2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,381 @@ + SUBROUTINE SLASY2( LTRANL, LTRANR, ISGN, N1, N2, TL, LDTL, TR, + $ LDTR, B, LDB, SCALE, X, LDX, XNORM, INFO ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + LOGICAL LTRANL, LTRANR + INTEGER INFO, ISGN, LDB, LDTL, LDTR, LDX, N1, N2 + REAL SCALE, XNORM +* .. +* .. Array Arguments .. + REAL B( LDB, * ), TL( LDTL, * ), TR( LDTR, * ), + $ X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* SLASY2 solves for the N1 by N2 matrix X, 1 <= N1,N2 <= 2, in +* +* op(TL)*X + ISGN*X*op(TR) = SCALE*B, +* +* where TL is N1 by N1, TR is N2 by N2, B is N1 by N2, and ISGN = 1 or +* -1. op(T) = T or T', where T' denotes the transpose of T. +* +* Arguments +* ========= +* +* LTRANL (input) LOGICAL +* On entry, LTRANL specifies the op(TL): +* = .FALSE., op(TL) = TL, +* = .TRUE., op(TL) = TL'. +* +* LTRANR (input) LOGICAL +* On entry, LTRANR specifies the op(TR): +* = .FALSE., op(TR) = TR, +* = .TRUE., op(TR) = TR'. +* +* ISGN (input) INTEGER +* On entry, ISGN specifies the sign of the equation +* as described before. ISGN may only be 1 or -1. +* +* N1 (input) INTEGER +* On entry, N1 specifies the order of matrix TL. +* N1 may only be 0, 1 or 2. +* +* N2 (input) INTEGER +* On entry, N2 specifies the order of matrix TR. +* N2 may only be 0, 1 or 2. +* +* TL (input) REAL array, dimension (LDTL,2) +* On entry, TL contains an N1 by N1 matrix. +* +* LDTL (input) INTEGER +* The leading dimension of the matrix TL. LDTL >= max(1,N1). +* +* TR (input) REAL array, dimension (LDTR,2) +* On entry, TR contains an N2 by N2 matrix. +* +* LDTR (input) INTEGER +* The leading dimension of the matrix TR. LDTR >= max(1,N2). +* +* B (input) REAL array, dimension (LDB,2) +* On entry, the N1 by N2 matrix B contains the right-hand +* side of the equation. +* +* LDB (input) INTEGER +* The leading dimension of the matrix B. LDB >= max(1,N1). +* +* SCALE (output) REAL +* On exit, SCALE contains the scale factor. SCALE is chosen +* less than or equal to 1 to prevent the solution overflowing. +* +* X (output) REAL array, dimension (LDX,2) +* On exit, X contains the N1 by N2 solution. +* +* LDX (input) INTEGER +* The leading dimension of the matrix X. LDX >= max(1,N1). +* +* XNORM (output) REAL +* On exit, XNORM is the infinity-norm of the solution. +* +* INFO (output) INTEGER +* On exit, INFO is set to +* 0: successful exit. +* 1: TL and TR have too close eigenvalues, so TL or +* TR is perturbed to get a nonsingular equation. +* NOTE: In the interests of speed, this routine does not +* check the inputs for errors. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL TWO, HALF, EIGHT + PARAMETER ( TWO = 2.0E+0, HALF = 0.5E+0, EIGHT = 8.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL BSWAP, XSWAP + INTEGER I, IP, IPIV, IPSV, J, JP, JPSV, K + REAL BET, EPS, GAM, L21, SGN, SMIN, SMLNUM, TAU1, + $ TEMP, U11, U12, U22, XMAX +* .. +* .. Local Arrays .. + LOGICAL BSWPIV( 4 ), XSWPIV( 4 ) + INTEGER JPIV( 4 ), LOCL21( 4 ), LOCU12( 4 ), + $ LOCU22( 4 ) + REAL BTMP( 4 ), T16( 4, 4 ), TMP( 4 ), X2( 2 ) +* .. +* .. External Functions .. + INTEGER ISAMAX + REAL SLAMCH + EXTERNAL ISAMAX, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Data statements .. + DATA LOCU12 / 3, 4, 1, 2 / , LOCL21 / 2, 1, 4, 3 / , + $ LOCU22 / 4, 3, 2, 1 / + DATA XSWPIV / .FALSE., .FALSE., .TRUE., .TRUE. / + DATA BSWPIV / .FALSE., .TRUE., .FALSE., .TRUE. / +* .. +* .. Executable Statements .. +* +* Do not check the input parameters for errors +* + INFO = 0 +* +* Quick return if possible +* + IF( N1.EQ.0 .OR. N2.EQ.0 ) + $ RETURN +* +* Set constants to control overflow +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) / EPS + SGN = ISGN +* + K = N1 + N1 + N2 - 2 + GO TO ( 10, 20, 30, 50 )K +* +* 1 by 1: TL11*X + SGN*X*TR11 = B11 +* + 10 CONTINUE + TAU1 = TL( 1, 1 ) + SGN*TR( 1, 1 ) + BET = ABS( TAU1 ) + IF( BET.LE.SMLNUM ) THEN + TAU1 = SMLNUM + BET = SMLNUM + INFO = 1 + END IF +* + SCALE = ONE + GAM = ABS( B( 1, 1 ) ) + IF( SMLNUM*GAM.GT.BET ) + $ SCALE = ONE / GAM +* + X( 1, 1 ) = ( B( 1, 1 )*SCALE ) / TAU1 + XNORM = ABS( X( 1, 1 ) ) + RETURN +* +* 1 by 2: +* TL11*[X11 X12] + ISGN*[X11 X12]*op[TR11 TR12] = [B11 B12] +* [TR21 TR22] +* + 20 CONTINUE +* + SMIN = MAX( EPS*MAX( ABS( TL( 1, 1 ) ), ABS( TR( 1, 1 ) ), + $ ABS( TR( 1, 2 ) ), ABS( TR( 2, 1 ) ), ABS( TR( 2, 2 ) ) ), + $ SMLNUM ) + TMP( 1 ) = TL( 1, 1 ) + SGN*TR( 1, 1 ) + TMP( 4 ) = TL( 1, 1 ) + SGN*TR( 2, 2 ) + IF( LTRANR ) THEN + TMP( 2 ) = SGN*TR( 2, 1 ) + TMP( 3 ) = SGN*TR( 1, 2 ) + ELSE + TMP( 2 ) = SGN*TR( 1, 2 ) + TMP( 3 ) = SGN*TR( 2, 1 ) + END IF + BTMP( 1 ) = B( 1, 1 ) + BTMP( 2 ) = B( 1, 2 ) + GO TO 40 +* +* 2 by 1: +* op[TL11 TL12]*[X11] + ISGN* [X11]*TR11 = [B11] +* [TL21 TL22] [X21] [X21] [B21] +* + 30 CONTINUE + SMIN = MAX( EPS*MAX( ABS( TR( 1, 1 ) ), ABS( TL( 1, 1 ) ), + $ ABS( TL( 1, 2 ) ), ABS( TL( 2, 1 ) ), ABS( TL( 2, 2 ) ) ), + $ SMLNUM ) + TMP( 1 ) = TL( 1, 1 ) + SGN*TR( 1, 1 ) + TMP( 4 ) = TL( 2, 2 ) + SGN*TR( 1, 1 ) + IF( LTRANL ) THEN + TMP( 2 ) = TL( 1, 2 ) + TMP( 3 ) = TL( 2, 1 ) + ELSE + TMP( 2 ) = TL( 2, 1 ) + TMP( 3 ) = TL( 1, 2 ) + END IF + BTMP( 1 ) = B( 1, 1 ) + BTMP( 2 ) = B( 2, 1 ) + 40 CONTINUE +* +* Solve 2 by 2 system using complete pivoting. +* Set pivots less than SMIN to SMIN. +* + IPIV = ISAMAX( 4, TMP, 1 ) + U11 = TMP( IPIV ) + IF( ABS( U11 ).LE.SMIN ) THEN + INFO = 1 + U11 = SMIN + END IF + U12 = TMP( LOCU12( IPIV ) ) + L21 = TMP( LOCL21( IPIV ) ) / U11 + U22 = TMP( LOCU22( IPIV ) ) - U12*L21 + XSWAP = XSWPIV( IPIV ) + BSWAP = BSWPIV( IPIV ) + IF( ABS( U22 ).LE.SMIN ) THEN + INFO = 1 + U22 = SMIN + END IF + IF( BSWAP ) THEN + TEMP = BTMP( 2 ) + BTMP( 2 ) = BTMP( 1 ) - L21*TEMP + BTMP( 1 ) = TEMP + ELSE + BTMP( 2 ) = BTMP( 2 ) - L21*BTMP( 1 ) + END IF + SCALE = ONE + IF( ( TWO*SMLNUM )*ABS( BTMP( 2 ) ).GT.ABS( U22 ) .OR. + $ ( TWO*SMLNUM )*ABS( BTMP( 1 ) ).GT.ABS( U11 ) ) THEN + SCALE = HALF / MAX( ABS( BTMP( 1 ) ), ABS( BTMP( 2 ) ) ) + BTMP( 1 ) = BTMP( 1 )*SCALE + BTMP( 2 ) = BTMP( 2 )*SCALE + END IF + X2( 2 ) = BTMP( 2 ) / U22 + X2( 1 ) = BTMP( 1 ) / U11 - ( U12 / U11 )*X2( 2 ) + IF( XSWAP ) THEN + TEMP = X2( 2 ) + X2( 2 ) = X2( 1 ) + X2( 1 ) = TEMP + END IF + X( 1, 1 ) = X2( 1 ) + IF( N1.EQ.1 ) THEN + X( 1, 2 ) = X2( 2 ) + XNORM = ABS( X( 1, 1 ) ) + ABS( X( 1, 2 ) ) + ELSE + X( 2, 1 ) = X2( 2 ) + XNORM = MAX( ABS( X( 1, 1 ) ), ABS( X( 2, 1 ) ) ) + END IF + RETURN +* +* 2 by 2: +* op[TL11 TL12]*[X11 X12] +ISGN* [X11 X12]*op[TR11 TR12] = [B11 B12] +* [TL21 TL22] [X21 X22] [X21 X22] [TR21 TR22] [B21 B22] +* +* Solve equivalent 4 by 4 system using complete pivoting. +* Set pivots less than SMIN to SMIN. +* + 50 CONTINUE + SMIN = MAX( ABS( TR( 1, 1 ) ), ABS( TR( 1, 2 ) ), + $ ABS( TR( 2, 1 ) ), ABS( TR( 2, 2 ) ) ) + SMIN = MAX( SMIN, ABS( TL( 1, 1 ) ), ABS( TL( 1, 2 ) ), + $ ABS( TL( 2, 1 ) ), ABS( TL( 2, 2 ) ) ) + SMIN = MAX( EPS*SMIN, SMLNUM ) + BTMP( 1 ) = ZERO + CALL SCOPY( 16, BTMP, 0, T16, 1 ) + T16( 1, 1 ) = TL( 1, 1 ) + SGN*TR( 1, 1 ) + T16( 2, 2 ) = TL( 2, 2 ) + SGN*TR( 1, 1 ) + T16( 3, 3 ) = TL( 1, 1 ) + SGN*TR( 2, 2 ) + T16( 4, 4 ) = TL( 2, 2 ) + SGN*TR( 2, 2 ) + IF( LTRANL ) THEN + T16( 1, 2 ) = TL( 2, 1 ) + T16( 2, 1 ) = TL( 1, 2 ) + T16( 3, 4 ) = TL( 2, 1 ) + T16( 4, 3 ) = TL( 1, 2 ) + ELSE + T16( 1, 2 ) = TL( 1, 2 ) + T16( 2, 1 ) = TL( 2, 1 ) + T16( 3, 4 ) = TL( 1, 2 ) + T16( 4, 3 ) = TL( 2, 1 ) + END IF + IF( LTRANR ) THEN + T16( 1, 3 ) = SGN*TR( 1, 2 ) + T16( 2, 4 ) = SGN*TR( 1, 2 ) + T16( 3, 1 ) = SGN*TR( 2, 1 ) + T16( 4, 2 ) = SGN*TR( 2, 1 ) + ELSE + T16( 1, 3 ) = SGN*TR( 2, 1 ) + T16( 2, 4 ) = SGN*TR( 2, 1 ) + T16( 3, 1 ) = SGN*TR( 1, 2 ) + T16( 4, 2 ) = SGN*TR( 1, 2 ) + END IF + BTMP( 1 ) = B( 1, 1 ) + BTMP( 2 ) = B( 2, 1 ) + BTMP( 3 ) = B( 1, 2 ) + BTMP( 4 ) = B( 2, 2 ) +* +* Perform elimination +* + DO 100 I = 1, 3 + XMAX = ZERO + DO 70 IP = I, 4 + DO 60 JP = I, 4 + IF( ABS( T16( IP, JP ) ).GE.XMAX ) THEN + XMAX = ABS( T16( IP, JP ) ) + IPSV = IP + JPSV = JP + END IF + 60 CONTINUE + 70 CONTINUE + IF( IPSV.NE.I ) THEN + CALL SSWAP( 4, T16( IPSV, 1 ), 4, T16( I, 1 ), 4 ) + TEMP = BTMP( I ) + BTMP( I ) = BTMP( IPSV ) + BTMP( IPSV ) = TEMP + END IF + IF( JPSV.NE.I ) + $ CALL SSWAP( 4, T16( 1, JPSV ), 1, T16( 1, I ), 1 ) + JPIV( I ) = JPSV + IF( ABS( T16( I, I ) ).LT.SMIN ) THEN + INFO = 1 + T16( I, I ) = SMIN + END IF + DO 90 J = I + 1, 4 + T16( J, I ) = T16( J, I ) / T16( I, I ) + BTMP( J ) = BTMP( J ) - T16( J, I )*BTMP( I ) + DO 80 K = I + 1, 4 + T16( J, K ) = T16( J, K ) - T16( J, I )*T16( I, K ) + 80 CONTINUE + 90 CONTINUE + 100 CONTINUE + IF( ABS( T16( 4, 4 ) ).LT.SMIN ) + $ T16( 4, 4 ) = SMIN + SCALE = ONE + IF( ( EIGHT*SMLNUM )*ABS( BTMP( 1 ) ).GT.ABS( T16( 1, 1 ) ) .OR. + $ ( EIGHT*SMLNUM )*ABS( BTMP( 2 ) ).GT.ABS( T16( 2, 2 ) ) .OR. + $ ( EIGHT*SMLNUM )*ABS( BTMP( 3 ) ).GT.ABS( T16( 3, 3 ) ) .OR. + $ ( EIGHT*SMLNUM )*ABS( BTMP( 4 ) ).GT.ABS( T16( 4, 4 ) ) ) THEN + SCALE = ( ONE / EIGHT ) / MAX( ABS( BTMP( 1 ) ), + $ ABS( BTMP( 2 ) ), ABS( BTMP( 3 ) ), ABS( BTMP( 4 ) ) ) + BTMP( 1 ) = BTMP( 1 )*SCALE + BTMP( 2 ) = BTMP( 2 )*SCALE + BTMP( 3 ) = BTMP( 3 )*SCALE + BTMP( 4 ) = BTMP( 4 )*SCALE + END IF + DO 120 I = 1, 4 + K = 5 - I + TEMP = ONE / T16( K, K ) + TMP( K ) = BTMP( K )*TEMP + DO 110 J = K + 1, 4 + TMP( K ) = TMP( K ) - ( TEMP*T16( K, J ) )*TMP( J ) + 110 CONTINUE + 120 CONTINUE + DO 130 I = 1, 3 + IF( JPIV( 4-I ).NE.4-I ) THEN + TEMP = TMP( 4-I ) + TMP( 4-I ) = TMP( JPIV( 4-I ) ) + TMP( JPIV( 4-I ) ) = TEMP + END IF + 130 CONTINUE + X( 1, 1 ) = TMP( 1 ) + X( 2, 1 ) = TMP( 2 ) + X( 1, 2 ) = TMP( 3 ) + X( 2, 2 ) = TMP( 4 ) + XNORM = MAX( ABS( TMP( 1 ) )+ABS( TMP( 3 ) ), + $ ABS( TMP( 2 ) )+ABS( TMP( 4 ) ) ) + RETURN +* +* End of SLASY2 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slatbs.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slatbs.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,723 @@ + SUBROUTINE SLATBS( UPLO, TRANS, DIAG, NORMIN, N, KD, AB, LDAB, X, + $ SCALE, CNORM, INFO ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORMIN, TRANS, UPLO + INTEGER INFO, KD, LDAB, N + REAL SCALE +* .. +* .. Array Arguments .. + REAL AB( LDAB, * ), CNORM( * ), X( * ) +* .. +* +* Purpose +* ======= +* +* SLATBS solves one of the triangular systems +* +* A *x = s*b or A'*x = s*b +* +* with scaling to prevent overflow, where A is an upper or lower +* triangular band matrix. Here A' denotes the transpose of A, x and b +* are n-element vectors, and s is a scaling factor, usually less than +* or equal to 1, chosen so that the components of x will be less than +* the overflow threshold. If the unscaled problem will not cause +* overflow, the Level 2 BLAS routine STBSV is called. If the matrix A +* is singular (A(j,j) = 0 for some j), then s is set to 0 and a +* non-trivial solution to A*x = 0 is returned. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the matrix A is upper or lower triangular. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* TRANS (input) CHARACTER*1 +* Specifies the operation applied to A. +* = 'N': Solve A * x = s*b (No transpose) +* = 'T': Solve A'* x = s*b (Transpose) +* = 'C': Solve A'* x = s*b (Conjugate transpose = Transpose) +* +* DIAG (input) CHARACTER*1 +* Specifies whether or not the matrix A is unit triangular. +* = 'N': Non-unit triangular +* = 'U': Unit triangular +* +* NORMIN (input) CHARACTER*1 +* Specifies whether CNORM has been set or not. +* = 'Y': CNORM contains the column norms on entry +* = 'N': CNORM is not set on entry. On exit, the norms will +* be computed and stored in CNORM. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of subdiagonals or superdiagonals in the +* triangular matrix A. KD >= 0. +* +* AB (input) REAL array, dimension (LDAB,N) +* The upper or lower triangular band matrix A, stored in the +* first KD+1 rows of the array. The j-th column of A is stored +* in the j-th column of the array AB as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* X (input/output) REAL array, dimension (N) +* On entry, the right hand side b of the triangular system. +* On exit, X is overwritten by the solution vector x. +* +* SCALE (output) REAL +* The scaling factor s for the triangular system +* A * x = s*b or A'* x = s*b. +* If SCALE = 0, the matrix A is singular or badly scaled, and +* the vector x is an exact or approximate solution to A*x = 0. +* +* CNORM (input or output) REAL array, dimension (N) +* +* If NORMIN = 'Y', CNORM is an input argument and CNORM(j) +* contains the norm of the off-diagonal part of the j-th column +* of A. If TRANS = 'N', CNORM(j) must be greater than or equal +* to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) +* must be greater than or equal to the 1-norm. +* +* If NORMIN = 'N', CNORM is an output argument and CNORM(j) +* returns the 1-norm of the offdiagonal part of the j-th column +* of A. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* +* Further Details +* ======= ======= +* +* A rough bound on x is computed; if that is less than overflow, STBSV +* is called, otherwise, specific code is used which checks for possible +* overflow or divide-by-zero at every operation. +* +* A columnwise scheme is used for solving A*x = b. The basic algorithm +* if A is lower triangular is +* +* x[1:n] := b[1:n] +* for j = 1, ..., n +* x(j) := x(j) / A(j,j) +* x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] +* end +* +* Define bounds on the components of x after j iterations of the loop: +* M(j) = bound on x[1:j] +* G(j) = bound on x[j+1:n] +* Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. +* +* Then for iteration j+1 we have +* M(j+1) <= G(j) / | A(j+1,j+1) | +* G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | +* <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) +* +* where CNORM(j+1) is greater than or equal to the infinity-norm of +* column j+1 of A, not counting the diagonal. Hence +* +* G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) +* 1<=i<=j +* and +* +* |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) +* 1<=i< j +* +* Since |x(j)| <= M(j), we use the Level 2 BLAS routine STBSV if the +* reciprocal of the largest M(j), j=1,..,n, is larger than +* max(underflow, 1/overflow). +* +* The bound on x(j) is also used to determine when a step in the +* columnwise method can be performed without fear of overflow. If +* the computed bound is greater than a large constant, x is scaled to +* prevent overflow, but if the bound overflows, x is set to 0, x(j) to +* 1, and scale to 0, and a non-trivial solution to A*x = 0 is found. +* +* Similarly, a row-wise scheme is used to solve A'*x = b. The basic +* algorithm for A upper triangular is +* +* for j = 1, ..., n +* x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j) +* end +* +* We simultaneously compute two bounds +* G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j +* M(j) = bound on x(i), 1<=i<=j +* +* The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we +* add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. +* Then the bound on x(j) is +* +* M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | +* +* <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) +* 1<=i<=j +* +* and we can safely call STBSV if 1/M(n) and 1/G(n) are both greater +* than max(underflow, 1/overflow). +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, HALF, ONE + PARAMETER ( ZERO = 0.0E+0, HALF = 0.5E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + INTEGER I, IMAX, J, JFIRST, JINC, JLAST, JLEN, MAIND + REAL BIGNUM, GROW, REC, SMLNUM, SUMJ, TJJ, TJJS, + $ TMAX, TSCAL, USCAL, XBND, XJ, XMAX +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX + REAL SASUM, SDOT, SLAMCH + EXTERNAL LSAME, ISAMAX, SASUM, SDOT, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SSCAL, STBSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* +* Test the input parameters. +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT. + $ LSAME( NORMIN, 'N' ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( KD.LT.0 ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLATBS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine machine dependent parameters to control overflow. +* + SMLNUM = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' ) + BIGNUM = ONE / SMLNUM + SCALE = ONE +* + IF( LSAME( NORMIN, 'N' ) ) THEN +* +* Compute the 1-norm of each column, not including the diagonal. +* + IF( UPPER ) THEN +* +* A is upper triangular. +* + DO 10 J = 1, N + JLEN = MIN( KD, J-1 ) + CNORM( J ) = SASUM( JLEN, AB( KD+1-JLEN, J ), 1 ) + 10 CONTINUE + ELSE +* +* A is lower triangular. +* + DO 20 J = 1, N + JLEN = MIN( KD, N-J ) + IF( JLEN.GT.0 ) THEN + CNORM( J ) = SASUM( JLEN, AB( 2, J ), 1 ) + ELSE + CNORM( J ) = ZERO + END IF + 20 CONTINUE + END IF + END IF +* +* Scale the column norms by TSCAL if the maximum element in CNORM is +* greater than BIGNUM. +* + IMAX = ISAMAX( N, CNORM, 1 ) + TMAX = CNORM( IMAX ) + IF( TMAX.LE.BIGNUM ) THEN + TSCAL = ONE + ELSE + TSCAL = ONE / ( SMLNUM*TMAX ) + CALL SSCAL( N, TSCAL, CNORM, 1 ) + END IF +* +* Compute a bound on the computed solution vector to see if the +* Level 2 BLAS routine STBSV can be used. +* + J = ISAMAX( N, X, 1 ) + XMAX = ABS( X( J ) ) + XBND = XMAX + IF( NOTRAN ) THEN +* +* Compute the growth in A * x = b. +* + IF( UPPER ) THEN + JFIRST = N + JLAST = 1 + JINC = -1 + MAIND = KD + 1 + ELSE + JFIRST = 1 + JLAST = N + JINC = 1 + MAIND = 1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 50 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, G(0) = max{x(i), i=1,...,n}. +* + GROW = ONE / MAX( XBND, SMLNUM ) + XBND = GROW + DO 30 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 50 +* +* M(j) = G(j-1) / abs(A(j,j)) +* + TJJ = ABS( AB( MAIND, J ) ) + XBND = MIN( XBND, MIN( ONE, TJJ )*GROW ) + IF( TJJ+CNORM( J ).GE.SMLNUM ) THEN +* +* G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) +* + GROW = GROW*( TJJ / ( TJJ+CNORM( J ) ) ) + ELSE +* +* G(j) could overflow, set GROW to 0. +* + GROW = ZERO + END IF + 30 CONTINUE + GROW = XBND + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, ONE / MAX( XBND, SMLNUM ) ) + DO 40 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 50 +* +* G(j) = G(j-1)*( 1 + CNORM(j) ) +* + GROW = GROW*( ONE / ( ONE+CNORM( J ) ) ) + 40 CONTINUE + END IF + 50 CONTINUE +* + ELSE +* +* Compute the growth in A' * x = b. +* + IF( UPPER ) THEN + JFIRST = 1 + JLAST = N + JINC = 1 + MAIND = KD + 1 + ELSE + JFIRST = N + JLAST = 1 + JINC = -1 + MAIND = 1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 80 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, M(0) = max{x(i), i=1,...,n}. +* + GROW = ONE / MAX( XBND, SMLNUM ) + XBND = GROW + DO 60 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 80 +* +* G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) +* + XJ = ONE + CNORM( J ) + GROW = MIN( GROW, XBND / XJ ) +* +* M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) +* + TJJ = ABS( AB( MAIND, J ) ) + IF( XJ.GT.TJJ ) + $ XBND = XBND*( TJJ / XJ ) + 60 CONTINUE + GROW = MIN( GROW, XBND ) + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, ONE / MAX( XBND, SMLNUM ) ) + DO 70 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 80 +* +* G(j) = ( 1 + CNORM(j) )*G(j-1) +* + XJ = ONE + CNORM( J ) + GROW = GROW / XJ + 70 CONTINUE + END IF + 80 CONTINUE + END IF +* + IF( ( GROW*TSCAL ).GT.SMLNUM ) THEN +* +* Use the Level 2 BLAS solve if the reciprocal of the bound on +* elements of X is not too small. +* + CALL STBSV( UPLO, TRANS, DIAG, N, KD, AB, LDAB, X, 1 ) + ELSE +* +* Use a Level 1 BLAS solve, scaling intermediate results. +* + IF( XMAX.GT.BIGNUM ) THEN +* +* Scale X so that its components are less than or equal to +* BIGNUM in absolute value. +* + SCALE = BIGNUM / XMAX + CALL SSCAL( N, SCALE, X, 1 ) + XMAX = BIGNUM + END IF +* + IF( NOTRAN ) THEN +* +* Solve A * x = b +* + DO 100 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) / A(j,j), scaling x if necessary. +* + XJ = ABS( X( J ) ) + IF( NOUNIT ) THEN + TJJS = AB( MAIND, J )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 95 + END IF + TJJ = ABS( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by 1/b(j). +* + REC = ONE / XJ + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = X( J ) / TJJS + XJ = ABS( X( J ) ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM +* to avoid overflow when dividing by A(j,j). +* + REC = ( TJJ*BIGNUM ) / XJ + IF( CNORM( J ).GT.ONE ) THEN +* +* Scale by 1/CNORM(j) to avoid overflow when +* multiplying x(j) times column j. +* + REC = REC / CNORM( J ) + END IF + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = X( J ) / TJJS + XJ = ABS( X( J ) ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0, and compute a solution to A*x = 0. +* + DO 90 I = 1, N + X( I ) = ZERO + 90 CONTINUE + X( J ) = ONE + XJ = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 95 CONTINUE +* +* Scale x if necessary to avoid overflow when adding a +* multiple of column j of A. +* + IF( XJ.GT.ONE ) THEN + REC = ONE / XJ + IF( CNORM( J ).GT.( BIGNUM-XMAX )*REC ) THEN +* +* Scale x by 1/(2*abs(x(j))). +* + REC = REC*HALF + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + END IF + ELSE IF( XJ*CNORM( J ).GT.( BIGNUM-XMAX ) ) THEN +* +* Scale x by 1/2. +* + CALL SSCAL( N, HALF, X, 1 ) + SCALE = SCALE*HALF + END IF +* + IF( UPPER ) THEN + IF( J.GT.1 ) THEN +* +* Compute the update +* x(max(1,j-kd):j-1) := x(max(1,j-kd):j-1) - +* x(j)* A(max(1,j-kd):j-1,j) +* + JLEN = MIN( KD, J-1 ) + CALL SAXPY( JLEN, -X( J )*TSCAL, + $ AB( KD+1-JLEN, J ), 1, X( J-JLEN ), 1 ) + I = ISAMAX( J-1, X, 1 ) + XMAX = ABS( X( I ) ) + END IF + ELSE IF( J.LT.N ) THEN +* +* Compute the update +* x(j+1:min(j+kd,n)) := x(j+1:min(j+kd,n)) - +* x(j) * A(j+1:min(j+kd,n),j) +* + JLEN = MIN( KD, N-J ) + IF( JLEN.GT.0 ) + $ CALL SAXPY( JLEN, -X( J )*TSCAL, AB( 2, J ), 1, + $ X( J+1 ), 1 ) + I = J + ISAMAX( N-J, X( J+1 ), 1 ) + XMAX = ABS( X( I ) ) + END IF + 100 CONTINUE +* + ELSE +* +* Solve A' * x = b +* + DO 140 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) - sum A(k,j)*x(k). +* k<>j +* + XJ = ABS( X( J ) ) + USCAL = TSCAL + REC = ONE / MAX( XMAX, ONE ) + IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN +* +* If x(j) could overflow, scale x by 1/(2*XMAX). +* + REC = REC*HALF + IF( NOUNIT ) THEN + TJJS = AB( MAIND, J )*TSCAL + ELSE + TJJS = TSCAL + END IF + TJJ = ABS( TJJS ) + IF( TJJ.GT.ONE ) THEN +* +* Divide by A(j,j) when scaling x if A(j,j) > 1. +* + REC = MIN( ONE, REC*TJJ ) + USCAL = USCAL / TJJS + END IF + IF( REC.LT.ONE ) THEN + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + SUMJ = ZERO + IF( USCAL.EQ.ONE ) THEN +* +* If the scaling needed for A in the dot product is 1, +* call SDOT to perform the dot product. +* + IF( UPPER ) THEN + JLEN = MIN( KD, J-1 ) + SUMJ = SDOT( JLEN, AB( KD+1-JLEN, J ), 1, + $ X( J-JLEN ), 1 ) + ELSE + JLEN = MIN( KD, N-J ) + IF( JLEN.GT.0 ) + $ SUMJ = SDOT( JLEN, AB( 2, J ), 1, X( J+1 ), 1 ) + END IF + ELSE +* +* Otherwise, use in-line code for the dot product. +* + IF( UPPER ) THEN + JLEN = MIN( KD, J-1 ) + DO 110 I = 1, JLEN + SUMJ = SUMJ + ( AB( KD+I-JLEN, J )*USCAL )* + $ X( J-JLEN-1+I ) + 110 CONTINUE + ELSE + JLEN = MIN( KD, N-J ) + DO 120 I = 1, JLEN + SUMJ = SUMJ + ( AB( I+1, J )*USCAL )*X( J+I ) + 120 CONTINUE + END IF + END IF +* + IF( USCAL.EQ.TSCAL ) THEN +* +* Compute x(j) := ( x(j) - sumj ) / A(j,j) if 1/A(j,j) +* was not used to scale the dotproduct. +* + X( J ) = X( J ) - SUMJ + XJ = ABS( X( J ) ) + IF( NOUNIT ) THEN +* +* Compute x(j) = x(j) / A(j,j), scaling if necessary. +* + TJJS = AB( MAIND, J )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 135 + END IF + TJJ = ABS( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale X by 1/abs(x(j)). +* + REC = ONE / XJ + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = X( J ) / TJJS + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. +* + REC = ( TJJ*BIGNUM ) / XJ + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = X( J ) / TJJS + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0, and compute a solution to A'*x = 0. +* + DO 130 I = 1, N + X( I ) = ZERO + 130 CONTINUE + X( J ) = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 135 CONTINUE + ELSE +* +* Compute x(j) := x(j) / A(j,j) - sumj if the dot +* product has already been divided by 1/A(j,j). +* + X( J ) = X( J ) / TJJS - SUMJ + END IF + XMAX = MAX( XMAX, ABS( X( J ) ) ) + 140 CONTINUE + END IF + SCALE = SCALE / TSCAL + END IF +* +* Scale the column norms by 1/TSCAL for return. +* + IF( TSCAL.NE.ONE ) THEN + CALL SSCAL( N, ONE / TSCAL, CNORM, 1 ) + END IF +* + RETURN +* +* End of SLATBS +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slatrd.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slatrd.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,258 @@ + SUBROUTINE SLATRD( UPLO, N, NB, A, LDA, E, TAU, W, LDW ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER LDA, LDW, N, NB +* .. +* .. Array Arguments .. + REAL A( LDA, * ), E( * ), TAU( * ), W( LDW, * ) +* .. +* +* Purpose +* ======= +* +* SLATRD reduces NB rows and columns of a real symmetric matrix A to +* symmetric tridiagonal form by an orthogonal similarity +* transformation Q' * A * Q, and returns the matrices V and W which are +* needed to apply the transformation to the unreduced part of A. +* +* If UPLO = 'U', SLATRD reduces the last NB rows and columns of a +* matrix, of which the upper triangle is supplied; +* if UPLO = 'L', SLATRD reduces the first NB rows and columns of a +* matrix, of which the lower triangle is supplied. +* +* This is an auxiliary routine called by SSYTRD. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* symmetric matrix A is stored: +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. +* +* NB (input) INTEGER +* The number of rows and columns to be reduced. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the symmetric matrix A. If UPLO = 'U', the leading +* n-by-n upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading n-by-n lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* On exit: +* if UPLO = 'U', the last NB columns have been reduced to +* tridiagonal form, with the diagonal elements overwriting +* the diagonal elements of A; the elements above the diagonal +* with the array TAU, represent the orthogonal matrix Q as a +* product of elementary reflectors; +* if UPLO = 'L', the first NB columns have been reduced to +* tridiagonal form, with the diagonal elements overwriting +* the diagonal elements of A; the elements below the diagonal +* with the array TAU, represent the orthogonal matrix Q as a +* product of elementary reflectors. +* See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= (1,N). +* +* E (output) REAL array, dimension (N-1) +* If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal +* elements of the last NB columns of the reduced matrix; +* if UPLO = 'L', E(1:nb) contains the subdiagonal elements of +* the first NB columns of the reduced matrix. +* +* TAU (output) REAL array, dimension (N-1) +* The scalar factors of the elementary reflectors, stored in +* TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'. +* See Further Details. +* +* W (output) REAL array, dimension (LDW,NB) +* The n-by-nb matrix W required to update the unreduced part +* of A. +* +* LDW (input) INTEGER +* The leading dimension of the array W. LDW >= max(1,N). +* +* Further Details +* =============== +* +* If UPLO = 'U', the matrix Q is represented as a product of elementary +* reflectors +* +* Q = H(n) H(n-1) . . . H(n-nb+1). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i), +* and tau in TAU(i-1). +* +* If UPLO = 'L', the matrix Q is represented as a product of elementary +* reflectors +* +* Q = H(1) H(2) . . . H(nb). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), +* and tau in TAU(i). +* +* The elements of the vectors v together form the n-by-nb matrix V +* which is needed, with W, to apply the transformation to the unreduced +* part of the matrix, using a symmetric rank-2k update of the form: +* A := A - V*W' - W*V'. +* +* The contents of A on exit are illustrated by the following examples +* with n = 5 and nb = 2: +* +* if UPLO = 'U': if UPLO = 'L': +* +* ( a a a v4 v5 ) ( d ) +* ( a a v4 v5 ) ( 1 d ) +* ( a 1 v5 ) ( v1 1 a ) +* ( d 1 ) ( v1 v2 a a ) +* ( d ) ( v1 v2 a a a ) +* +* where d denotes a diagonal element of the reduced matrix, a denotes +* an element of the original matrix that is unchanged, and vi denotes +* an element of the vector defining H(i). +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, HALF + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, HALF = 0.5E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IW + REAL ALPHA +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SGEMV, SLARFG, SSCAL, SSYMV +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SDOT + EXTERNAL LSAME, SDOT +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Reduce last NB columns of upper triangle +* + DO 10 I = N, N - NB + 1, -1 + IW = I - N + NB + IF( I.LT.N ) THEN +* +* Update A(1:i,i) +* + CALL SGEMV( 'No transpose', I, N-I, -ONE, A( 1, I+1 ), + $ LDA, W( I, IW+1 ), LDW, ONE, A( 1, I ), 1 ) + CALL SGEMV( 'No transpose', I, N-I, -ONE, W( 1, IW+1 ), + $ LDW, A( I, I+1 ), LDA, ONE, A( 1, I ), 1 ) + END IF + IF( I.GT.1 ) THEN +* +* Generate elementary reflector H(i) to annihilate +* A(1:i-2,i) +* + CALL SLARFG( I-1, A( I-1, I ), A( 1, I ), 1, TAU( I-1 ) ) + E( I-1 ) = A( I-1, I ) + A( I-1, I ) = ONE +* +* Compute W(1:i-1,i) +* + CALL SSYMV( 'Upper', I-1, ONE, A, LDA, A( 1, I ), 1, + $ ZERO, W( 1, IW ), 1 ) + IF( I.LT.N ) THEN + CALL SGEMV( 'Transpose', I-1, N-I, ONE, W( 1, IW+1 ), + $ LDW, A( 1, I ), 1, ZERO, W( I+1, IW ), 1 ) + CALL SGEMV( 'No transpose', I-1, N-I, -ONE, + $ A( 1, I+1 ), LDA, W( I+1, IW ), 1, ONE, + $ W( 1, IW ), 1 ) + CALL SGEMV( 'Transpose', I-1, N-I, ONE, A( 1, I+1 ), + $ LDA, A( 1, I ), 1, ZERO, W( I+1, IW ), 1 ) + CALL SGEMV( 'No transpose', I-1, N-I, -ONE, + $ W( 1, IW+1 ), LDW, W( I+1, IW ), 1, ONE, + $ W( 1, IW ), 1 ) + END IF + CALL SSCAL( I-1, TAU( I-1 ), W( 1, IW ), 1 ) + ALPHA = -HALF*TAU( I-1 )*SDOT( I-1, W( 1, IW ), 1, + $ A( 1, I ), 1 ) + CALL SAXPY( I-1, ALPHA, A( 1, I ), 1, W( 1, IW ), 1 ) + END IF +* + 10 CONTINUE + ELSE +* +* Reduce first NB columns of lower triangle +* + DO 20 I = 1, NB +* +* Update A(i:n,i) +* + CALL SGEMV( 'No transpose', N-I+1, I-1, -ONE, A( I, 1 ), + $ LDA, W( I, 1 ), LDW, ONE, A( I, I ), 1 ) + CALL SGEMV( 'No transpose', N-I+1, I-1, -ONE, W( I, 1 ), + $ LDW, A( I, 1 ), LDA, ONE, A( I, I ), 1 ) + IF( I.LT.N ) THEN +* +* Generate elementary reflector H(i) to annihilate +* A(i+2:n,i) +* + CALL SLARFG( N-I, A( I+1, I ), A( MIN( I+2, N ), I ), 1, + $ TAU( I ) ) + E( I ) = A( I+1, I ) + A( I+1, I ) = ONE +* +* Compute W(i+1:n,i) +* + CALL SSYMV( 'Lower', N-I, ONE, A( I+1, I+1 ), LDA, + $ A( I+1, I ), 1, ZERO, W( I+1, I ), 1 ) + CALL SGEMV( 'Transpose', N-I, I-1, ONE, W( I+1, 1 ), LDW, + $ A( I+1, I ), 1, ZERO, W( 1, I ), 1 ) + CALL SGEMV( 'No transpose', N-I, I-1, -ONE, A( I+1, 1 ), + $ LDA, W( 1, I ), 1, ONE, W( I+1, I ), 1 ) + CALL SGEMV( 'Transpose', N-I, I-1, ONE, A( I+1, 1 ), LDA, + $ A( I+1, I ), 1, ZERO, W( 1, I ), 1 ) + CALL SGEMV( 'No transpose', N-I, I-1, -ONE, W( I+1, 1 ), + $ LDW, W( 1, I ), 1, ONE, W( I+1, I ), 1 ) + CALL SSCAL( N-I, TAU( I ), W( I+1, I ), 1 ) + ALPHA = -HALF*TAU( I )*SDOT( N-I, W( I+1, I ), 1, + $ A( I+1, I ), 1 ) + CALL SAXPY( N-I, ALPHA, A( I+1, I ), 1, W( I+1, I ), 1 ) + END IF +* + 20 CONTINUE + END IF +* + RETURN +* +* End of SLATRD +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slatrs.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slatrs.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,701 @@ + SUBROUTINE SLATRS( UPLO, TRANS, DIAG, NORMIN, N, A, LDA, X, SCALE, + $ CNORM, INFO ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORMIN, TRANS, UPLO + INTEGER INFO, LDA, N + REAL SCALE +* .. +* .. Array Arguments .. + REAL A( LDA, * ), CNORM( * ), X( * ) +* .. +* +* Purpose +* ======= +* +* SLATRS solves one of the triangular systems +* +* A *x = s*b or A'*x = s*b +* +* with scaling to prevent overflow. Here A is an upper or lower +* triangular matrix, A' denotes the transpose of A, x and b are +* n-element vectors, and s is a scaling factor, usually less than +* or equal to 1, chosen so that the components of x will be less than +* the overflow threshold. If the unscaled problem will not cause +* overflow, the Level 2 BLAS routine STRSV is called. If the matrix A +* is singular (A(j,j) = 0 for some j), then s is set to 0 and a +* non-trivial solution to A*x = 0 is returned. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the matrix A is upper or lower triangular. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* TRANS (input) CHARACTER*1 +* Specifies the operation applied to A. +* = 'N': Solve A * x = s*b (No transpose) +* = 'T': Solve A'* x = s*b (Transpose) +* = 'C': Solve A'* x = s*b (Conjugate transpose = Transpose) +* +* DIAG (input) CHARACTER*1 +* Specifies whether or not the matrix A is unit triangular. +* = 'N': Non-unit triangular +* = 'U': Unit triangular +* +* NORMIN (input) CHARACTER*1 +* Specifies whether CNORM has been set or not. +* = 'Y': CNORM contains the column norms on entry +* = 'N': CNORM is not set on entry. On exit, the norms will +* be computed and stored in CNORM. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input) REAL array, dimension (LDA,N) +* The triangular matrix A. If UPLO = 'U', the leading n by n +* upper triangular part of the array A contains the upper +* triangular matrix, and the strictly lower triangular part of +* A is not referenced. If UPLO = 'L', the leading n by n lower +* triangular part of the array A contains the lower triangular +* matrix, and the strictly upper triangular part of A is not +* referenced. If DIAG = 'U', the diagonal elements of A are +* also not referenced and are assumed to be 1. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max (1,N). +* +* X (input/output) REAL array, dimension (N) +* On entry, the right hand side b of the triangular system. +* On exit, X is overwritten by the solution vector x. +* +* SCALE (output) REAL +* The scaling factor s for the triangular system +* A * x = s*b or A'* x = s*b. +* If SCALE = 0, the matrix A is singular or badly scaled, and +* the vector x is an exact or approximate solution to A*x = 0. +* +* CNORM (input or output) REAL array, dimension (N) +* +* If NORMIN = 'Y', CNORM is an input argument and CNORM(j) +* contains the norm of the off-diagonal part of the j-th column +* of A. If TRANS = 'N', CNORM(j) must be greater than or equal +* to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) +* must be greater than or equal to the 1-norm. +* +* If NORMIN = 'N', CNORM is an output argument and CNORM(j) +* returns the 1-norm of the offdiagonal part of the j-th column +* of A. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* +* Further Details +* ======= ======= +* +* A rough bound on x is computed; if that is less than overflow, STRSV +* is called, otherwise, specific code is used which checks for possible +* overflow or divide-by-zero at every operation. +* +* A columnwise scheme is used for solving A*x = b. The basic algorithm +* if A is lower triangular is +* +* x[1:n] := b[1:n] +* for j = 1, ..., n +* x(j) := x(j) / A(j,j) +* x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] +* end +* +* Define bounds on the components of x after j iterations of the loop: +* M(j) = bound on x[1:j] +* G(j) = bound on x[j+1:n] +* Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. +* +* Then for iteration j+1 we have +* M(j+1) <= G(j) / | A(j+1,j+1) | +* G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | +* <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) +* +* where CNORM(j+1) is greater than or equal to the infinity-norm of +* column j+1 of A, not counting the diagonal. Hence +* +* G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) +* 1<=i<=j +* and +* +* |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) +* 1<=i< j +* +* Since |x(j)| <= M(j), we use the Level 2 BLAS routine STRSV if the +* reciprocal of the largest M(j), j=1,..,n, is larger than +* max(underflow, 1/overflow). +* +* The bound on x(j) is also used to determine when a step in the +* columnwise method can be performed without fear of overflow. If +* the computed bound is greater than a large constant, x is scaled to +* prevent overflow, but if the bound overflows, x is set to 0, x(j) to +* 1, and scale to 0, and a non-trivial solution to A*x = 0 is found. +* +* Similarly, a row-wise scheme is used to solve A'*x = b. The basic +* algorithm for A upper triangular is +* +* for j = 1, ..., n +* x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j) +* end +* +* We simultaneously compute two bounds +* G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j +* M(j) = bound on x(i), 1<=i<=j +* +* The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we +* add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. +* Then the bound on x(j) is +* +* M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | +* +* <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) +* 1<=i<=j +* +* and we can safely call STRSV if 1/M(n) and 1/G(n) are both greater +* than max(underflow, 1/overflow). +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, HALF, ONE + PARAMETER ( ZERO = 0.0E+0, HALF = 0.5E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + INTEGER I, IMAX, J, JFIRST, JINC, JLAST + REAL BIGNUM, GROW, REC, SMLNUM, SUMJ, TJJ, TJJS, + $ TMAX, TSCAL, USCAL, XBND, XJ, XMAX +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX + REAL SASUM, SDOT, SLAMCH + EXTERNAL LSAME, ISAMAX, SASUM, SDOT, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SSCAL, STRSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* +* Test the input parameters. +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT. + $ LSAME( NORMIN, 'N' ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLATRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine machine dependent parameters to control overflow. +* + SMLNUM = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' ) + BIGNUM = ONE / SMLNUM + SCALE = ONE +* + IF( LSAME( NORMIN, 'N' ) ) THEN +* +* Compute the 1-norm of each column, not including the diagonal. +* + IF( UPPER ) THEN +* +* A is upper triangular. +* + DO 10 J = 1, N + CNORM( J ) = SASUM( J-1, A( 1, J ), 1 ) + 10 CONTINUE + ELSE +* +* A is lower triangular. +* + DO 20 J = 1, N - 1 + CNORM( J ) = SASUM( N-J, A( J+1, J ), 1 ) + 20 CONTINUE + CNORM( N ) = ZERO + END IF + END IF +* +* Scale the column norms by TSCAL if the maximum element in CNORM is +* greater than BIGNUM. +* + IMAX = ISAMAX( N, CNORM, 1 ) + TMAX = CNORM( IMAX ) + IF( TMAX.LE.BIGNUM ) THEN + TSCAL = ONE + ELSE + TSCAL = ONE / ( SMLNUM*TMAX ) + CALL SSCAL( N, TSCAL, CNORM, 1 ) + END IF +* +* Compute a bound on the computed solution vector to see if the +* Level 2 BLAS routine STRSV can be used. +* + J = ISAMAX( N, X, 1 ) + XMAX = ABS( X( J ) ) + XBND = XMAX + IF( NOTRAN ) THEN +* +* Compute the growth in A * x = b. +* + IF( UPPER ) THEN + JFIRST = N + JLAST = 1 + JINC = -1 + ELSE + JFIRST = 1 + JLAST = N + JINC = 1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 50 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, G(0) = max{x(i), i=1,...,n}. +* + GROW = ONE / MAX( XBND, SMLNUM ) + XBND = GROW + DO 30 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 50 +* +* M(j) = G(j-1) / abs(A(j,j)) +* + TJJ = ABS( A( J, J ) ) + XBND = MIN( XBND, MIN( ONE, TJJ )*GROW ) + IF( TJJ+CNORM( J ).GE.SMLNUM ) THEN +* +* G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) +* + GROW = GROW*( TJJ / ( TJJ+CNORM( J ) ) ) + ELSE +* +* G(j) could overflow, set GROW to 0. +* + GROW = ZERO + END IF + 30 CONTINUE + GROW = XBND + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, ONE / MAX( XBND, SMLNUM ) ) + DO 40 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 50 +* +* G(j) = G(j-1)*( 1 + CNORM(j) ) +* + GROW = GROW*( ONE / ( ONE+CNORM( J ) ) ) + 40 CONTINUE + END IF + 50 CONTINUE +* + ELSE +* +* Compute the growth in A' * x = b. +* + IF( UPPER ) THEN + JFIRST = 1 + JLAST = N + JINC = 1 + ELSE + JFIRST = N + JLAST = 1 + JINC = -1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 80 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, M(0) = max{x(i), i=1,...,n}. +* + GROW = ONE / MAX( XBND, SMLNUM ) + XBND = GROW + DO 60 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 80 +* +* G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) +* + XJ = ONE + CNORM( J ) + GROW = MIN( GROW, XBND / XJ ) +* +* M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) +* + TJJ = ABS( A( J, J ) ) + IF( XJ.GT.TJJ ) + $ XBND = XBND*( TJJ / XJ ) + 60 CONTINUE + GROW = MIN( GROW, XBND ) + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, ONE / MAX( XBND, SMLNUM ) ) + DO 70 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 80 +* +* G(j) = ( 1 + CNORM(j) )*G(j-1) +* + XJ = ONE + CNORM( J ) + GROW = GROW / XJ + 70 CONTINUE + END IF + 80 CONTINUE + END IF +* + IF( ( GROW*TSCAL ).GT.SMLNUM ) THEN +* +* Use the Level 2 BLAS solve if the reciprocal of the bound on +* elements of X is not too small. +* + CALL STRSV( UPLO, TRANS, DIAG, N, A, LDA, X, 1 ) + ELSE +* +* Use a Level 1 BLAS solve, scaling intermediate results. +* + IF( XMAX.GT.BIGNUM ) THEN +* +* Scale X so that its components are less than or equal to +* BIGNUM in absolute value. +* + SCALE = BIGNUM / XMAX + CALL SSCAL( N, SCALE, X, 1 ) + XMAX = BIGNUM + END IF +* + IF( NOTRAN ) THEN +* +* Solve A * x = b +* + DO 100 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) / A(j,j), scaling x if necessary. +* + XJ = ABS( X( J ) ) + IF( NOUNIT ) THEN + TJJS = A( J, J )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 95 + END IF + TJJ = ABS( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by 1/b(j). +* + REC = ONE / XJ + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = X( J ) / TJJS + XJ = ABS( X( J ) ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM +* to avoid overflow when dividing by A(j,j). +* + REC = ( TJJ*BIGNUM ) / XJ + IF( CNORM( J ).GT.ONE ) THEN +* +* Scale by 1/CNORM(j) to avoid overflow when +* multiplying x(j) times column j. +* + REC = REC / CNORM( J ) + END IF + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = X( J ) / TJJS + XJ = ABS( X( J ) ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0, and compute a solution to A*x = 0. +* + DO 90 I = 1, N + X( I ) = ZERO + 90 CONTINUE + X( J ) = ONE + XJ = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 95 CONTINUE +* +* Scale x if necessary to avoid overflow when adding a +* multiple of column j of A. +* + IF( XJ.GT.ONE ) THEN + REC = ONE / XJ + IF( CNORM( J ).GT.( BIGNUM-XMAX )*REC ) THEN +* +* Scale x by 1/(2*abs(x(j))). +* + REC = REC*HALF + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + END IF + ELSE IF( XJ*CNORM( J ).GT.( BIGNUM-XMAX ) ) THEN +* +* Scale x by 1/2. +* + CALL SSCAL( N, HALF, X, 1 ) + SCALE = SCALE*HALF + END IF +* + IF( UPPER ) THEN + IF( J.GT.1 ) THEN +* +* Compute the update +* x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j) +* + CALL SAXPY( J-1, -X( J )*TSCAL, A( 1, J ), 1, X, + $ 1 ) + I = ISAMAX( J-1, X, 1 ) + XMAX = ABS( X( I ) ) + END IF + ELSE + IF( J.LT.N ) THEN +* +* Compute the update +* x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j) +* + CALL SAXPY( N-J, -X( J )*TSCAL, A( J+1, J ), 1, + $ X( J+1 ), 1 ) + I = J + ISAMAX( N-J, X( J+1 ), 1 ) + XMAX = ABS( X( I ) ) + END IF + END IF + 100 CONTINUE +* + ELSE +* +* Solve A' * x = b +* + DO 140 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) - sum A(k,j)*x(k). +* k<>j +* + XJ = ABS( X( J ) ) + USCAL = TSCAL + REC = ONE / MAX( XMAX, ONE ) + IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN +* +* If x(j) could overflow, scale x by 1/(2*XMAX). +* + REC = REC*HALF + IF( NOUNIT ) THEN + TJJS = A( J, J )*TSCAL + ELSE + TJJS = TSCAL + END IF + TJJ = ABS( TJJS ) + IF( TJJ.GT.ONE ) THEN +* +* Divide by A(j,j) when scaling x if A(j,j) > 1. +* + REC = MIN( ONE, REC*TJJ ) + USCAL = USCAL / TJJS + END IF + IF( REC.LT.ONE ) THEN + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + SUMJ = ZERO + IF( USCAL.EQ.ONE ) THEN +* +* If the scaling needed for A in the dot product is 1, +* call SDOT to perform the dot product. +* + IF( UPPER ) THEN + SUMJ = SDOT( J-1, A( 1, J ), 1, X, 1 ) + ELSE IF( J.LT.N ) THEN + SUMJ = SDOT( N-J, A( J+1, J ), 1, X( J+1 ), 1 ) + END IF + ELSE +* +* Otherwise, use in-line code for the dot product. +* + IF( UPPER ) THEN + DO 110 I = 1, J - 1 + SUMJ = SUMJ + ( A( I, J )*USCAL )*X( I ) + 110 CONTINUE + ELSE IF( J.LT.N ) THEN + DO 120 I = J + 1, N + SUMJ = SUMJ + ( A( I, J )*USCAL )*X( I ) + 120 CONTINUE + END IF + END IF +* + IF( USCAL.EQ.TSCAL ) THEN +* +* Compute x(j) := ( x(j) - sumj ) / A(j,j) if 1/A(j,j) +* was not used to scale the dotproduct. +* + X( J ) = X( J ) - SUMJ + XJ = ABS( X( J ) ) + IF( NOUNIT ) THEN + TJJS = A( J, J )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 135 + END IF +* +* Compute x(j) = x(j) / A(j,j), scaling if necessary. +* + TJJ = ABS( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale X by 1/abs(x(j)). +* + REC = ONE / XJ + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = X( J ) / TJJS + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. +* + REC = ( TJJ*BIGNUM ) / XJ + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = X( J ) / TJJS + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0, and compute a solution to A'*x = 0. +* + DO 130 I = 1, N + X( I ) = ZERO + 130 CONTINUE + X( J ) = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 135 CONTINUE + ELSE +* +* Compute x(j) := x(j) / A(j,j) - sumj if the dot +* product has already been divided by 1/A(j,j). +* + X( J ) = X( J ) / TJJS - SUMJ + END IF + XMAX = MAX( XMAX, ABS( X( J ) ) ) + 140 CONTINUE + END IF + SCALE = SCALE / TSCAL + END IF +* +* Scale the column norms by 1/TSCAL for return. +* + IF( TSCAL.NE.ONE ) THEN + CALL SSCAL( N, ONE / TSCAL, CNORM, 1 ) + END IF +* + RETURN +* +* End of SLATRS +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slatrz.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slatrz.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,127 @@ + SUBROUTINE SLATRZ( M, N, L, A, LDA, TAU, WORK ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER L, LDA, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SLATRZ factors the M-by-(M+L) real upper trapezoidal matrix +* [ A1 A2 ] = [ A(1:M,1:M) A(1:M,N-L+1:N) ] as ( R 0 ) * Z, by means +* of orthogonal transformations. Z is an (M+L)-by-(M+L) orthogonal +* matrix and, R and A1 are M-by-M upper triangular matrices. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* L (input) INTEGER +* The number of columns of the matrix A containing the +* meaningful part of the Householder vectors. N-M >= L >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the leading M-by-N upper trapezoidal part of the +* array A must contain the matrix to be factorized. +* On exit, the leading M-by-M upper triangular part of A +* contains the upper triangular matrix R, and elements N-L+1 to +* N of the first M rows of A, with the array TAU, represent the +* orthogonal matrix Z as a product of M elementary reflectors. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) REAL array, dimension (M) +* The scalar factors of the elementary reflectors. +* +* WORK (workspace) REAL array, dimension (M) +* +* Further Details +* =============== +* +* Based on contributions by +* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +* The factorization is obtained by Householder's method. The kth +* transformation matrix, Z( k ), which is used to introduce zeros into +* the ( m - k + 1 )th row of A, is given in the form +* +* Z( k ) = ( I 0 ), +* ( 0 T( k ) ) +* +* where +* +* T( k ) = I - tau*u( k )*u( k )', u( k ) = ( 1 ), +* ( 0 ) +* ( z( k ) ) +* +* tau is a scalar and z( k ) is an l element vector. tau and z( k ) +* are chosen to annihilate the elements of the kth row of A2. +* +* The scalar tau is returned in the kth element of TAU and the vector +* u( k ) in the kth row of A2, such that the elements of z( k ) are +* in a( k, l + 1 ), ..., a( k, n ). The elements of R are returned in +* the upper triangular part of A1. +* +* Z is given by +* +* Z = Z( 1 ) * Z( 2 ) * ... * Z( m ). +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I +* .. +* .. External Subroutines .. + EXTERNAL SLARFG, SLARZ +* .. +* .. Executable Statements .. +* +* Test the input arguments +* +* Quick return if possible +* + IF( M.EQ.0 ) THEN + RETURN + ELSE IF( M.EQ.N ) THEN + DO 10 I = 1, N + TAU( I ) = ZERO + 10 CONTINUE + RETURN + END IF +* + DO 20 I = M, 1, -1 +* +* Generate elementary reflector H(i) to annihilate +* [ A(i,i) A(i,n-l+1:n) ] +* + CALL SLARFG( L+1, A( I, I ), A( I, N-L+1 ), LDA, TAU( I ) ) +* +* Apply H(i) to A(1:i-1,i:n) from the right +* + CALL SLARZ( 'Right', I-1, N-I+1, L, A( I, N-L+1 ), LDA, + $ TAU( I ), A( 1, I ), LDA, WORK ) +* + 20 CONTINUE +* + RETURN +* +* End of SLATRZ +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slauu2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slauu2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,135 @@ + SUBROUTINE SLAUU2( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* SLAUU2 computes the product U * U' or L' * L, where the triangular +* factor U or L is stored in the upper or lower triangular part of +* the array A. +* +* If UPLO = 'U' or 'u' then the upper triangle of the result is stored, +* overwriting the factor U in A. +* If UPLO = 'L' or 'l' then the lower triangle of the result is stored, +* overwriting the factor L in A. +* +* This is the unblocked form of the algorithm, calling Level 2 BLAS. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the triangular factor stored in the array A +* is upper or lower triangular: +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the triangular factor U or L. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the triangular factor U or L. +* On exit, if UPLO = 'U', the upper triangle of A is +* overwritten with the upper triangle of the product U * U'; +* if UPLO = 'L', the lower triangle of A is overwritten with +* the lower triangle of the product L' * L. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I + REAL AII +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SDOT + EXTERNAL LSAME, SDOT +* .. +* .. External Subroutines .. + EXTERNAL SGEMV, SSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLAUU2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Compute the product U * U'. +* + DO 10 I = 1, N + AII = A( I, I ) + IF( I.LT.N ) THEN + A( I, I ) = SDOT( N-I+1, A( I, I ), LDA, A( I, I ), LDA ) + CALL SGEMV( 'No transpose', I-1, N-I, ONE, A( 1, I+1 ), + $ LDA, A( I, I+1 ), LDA, AII, A( 1, I ), 1 ) + ELSE + CALL SSCAL( I, AII, A( 1, I ), 1 ) + END IF + 10 CONTINUE +* + ELSE +* +* Compute the product L' * L. +* + DO 20 I = 1, N + AII = A( I, I ) + IF( I.LT.N ) THEN + A( I, I ) = SDOT( N-I+1, A( I, I ), 1, A( I, I ), 1 ) + CALL SGEMV( 'Transpose', N-I, I-1, ONE, A( I+1, 1 ), LDA, + $ A( I+1, I ), 1, AII, A( I, 1 ), LDA ) + ELSE + CALL SSCAL( I, AII, A( I, 1 ), LDA ) + END IF + 20 CONTINUE + END IF +* + RETURN +* +* End of SLAUU2 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slauum.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slauum.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,155 @@ + SUBROUTINE SLAUUM( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* SLAUUM computes the product U * U' or L' * L, where the triangular +* factor U or L is stored in the upper or lower triangular part of +* the array A. +* +* If UPLO = 'U' or 'u' then the upper triangle of the result is stored, +* overwriting the factor U in A. +* If UPLO = 'L' or 'l' then the lower triangle of the result is stored, +* overwriting the factor L in A. +* +* This is the blocked form of the algorithm, calling Level 3 BLAS. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the triangular factor stored in the array A +* is upper or lower triangular: +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the triangular factor U or L. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the triangular factor U or L. +* On exit, if UPLO = 'U', the upper triangle of A is +* overwritten with the upper triangle of the product U * U'; +* if UPLO = 'L', the lower triangle of A is overwritten with +* the lower triangle of the product L' * L. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IB, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL SGEMM, SLAUU2, SSYRK, STRMM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLAUUM', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'SLAUUM', UPLO, N, -1, -1, -1 ) +* + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code +* + CALL SLAUU2( UPLO, N, A, LDA, INFO ) + ELSE +* +* Use blocked code +* + IF( UPPER ) THEN +* +* Compute the product U * U'. +* + DO 10 I = 1, N, NB + IB = MIN( NB, N-I+1 ) + CALL STRMM( 'Right', 'Upper', 'Transpose', 'Non-unit', + $ I-1, IB, ONE, A( I, I ), LDA, A( 1, I ), + $ LDA ) + CALL SLAUU2( 'Upper', IB, A( I, I ), LDA, INFO ) + IF( I+IB.LE.N ) THEN + CALL SGEMM( 'No transpose', 'Transpose', I-1, IB, + $ N-I-IB+1, ONE, A( 1, I+IB ), LDA, + $ A( I, I+IB ), LDA, ONE, A( 1, I ), LDA ) + CALL SSYRK( 'Upper', 'No transpose', IB, N-I-IB+1, + $ ONE, A( I, I+IB ), LDA, ONE, A( I, I ), + $ LDA ) + END IF + 10 CONTINUE + ELSE +* +* Compute the product L' * L. +* + DO 20 I = 1, N, NB + IB = MIN( NB, N-I+1 ) + CALL STRMM( 'Left', 'Lower', 'Transpose', 'Non-unit', IB, + $ I-1, ONE, A( I, I ), LDA, A( I, 1 ), LDA ) + CALL SLAUU2( 'Lower', IB, A( I, I ), LDA, INFO ) + IF( I+IB.LE.N ) THEN + CALL SGEMM( 'Transpose', 'No transpose', IB, I-1, + $ N-I-IB+1, ONE, A( I+IB, I ), LDA, + $ A( I+IB, 1 ), LDA, ONE, A( I, 1 ), LDA ) + CALL SSYRK( 'Lower', 'Transpose', IB, N-I-IB+1, ONE, + $ A( I+IB, I ), LDA, ONE, A( I, I ), LDA ) + END IF + 20 CONTINUE + END IF + END IF +* + RETURN +* +* End of SLAUUM +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slazq3.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slazq3.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,302 @@ + SUBROUTINE SLAZQ3( I0, N0, Z, PP, DMIN, SIGMA, DESIG, QMAX, NFAIL, + $ ITER, NDIV, IEEE, TTYPE, DMIN1, DMIN2, DN, DN1, + $ DN2, TAU ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + LOGICAL IEEE + INTEGER I0, ITER, N0, NDIV, NFAIL, PP, TTYPE + REAL DESIG, DMIN, DMIN1, DMIN2, DN, DN1, DN2, QMAX, + $ SIGMA, TAU +* .. +* .. Array Arguments .. + REAL Z( * ) +* .. +* +* Purpose +* ======= +* +* SLAZQ3 checks for deflation, computes a shift (TAU) and calls dqds. +* In case of failure it changes shifts, and tries again until output +* is positive. +* +* Arguments +* ========= +* +* I0 (input) INTEGER +* First index. +* +* N0 (input) INTEGER +* Last index. +* +* Z (input) REAL array, dimension ( 4*N ) +* Z holds the qd array. +* +* PP (input) INTEGER +* PP=0 for ping, PP=1 for pong. +* +* DMIN (output) REAL +* Minimum value of d. +* +* SIGMA (output) REAL +* Sum of shifts used in current segment. +* +* DESIG (input/output) REAL +* Lower order part of SIGMA +* +* QMAX (input) REAL +* Maximum value of q. +* +* NFAIL (output) INTEGER +* Number of times shift was too big. +* +* ITER (output) INTEGER +* Number of iterations. +* +* NDIV (output) INTEGER +* Number of divisions. +* +* IEEE (input) LOGICAL +* Flag for IEEE or non IEEE arithmetic (passed to SLASQ5). +* +* TTYPE (input/output) INTEGER +* Shift type. TTYPE is passed as an argument in order to save +* its value between calls to SLAZQ3 +* +* DMIN1 (input/output) REAL +* DMIN2 (input/output) REAL +* DN (input/output) REAL +* DN1 (input/output) REAL +* DN2 (input/output) REAL +* TAU (input/output) REAL +* These are passed as arguments in order to save their values +* between calls to SLAZQ3 +* +* This is a thread safe version of SLASQ3, which passes TTYPE, DMIN1, +* DMIN2, DN, DN1. DN2 and TAU through the argument list in place of +* declaring them in a SAVE statment. +* +* ===================================================================== +* +* .. Parameters .. + REAL CBIAS + PARAMETER ( CBIAS = 1.50E0 ) + REAL ZERO, QURTR, HALF, ONE, TWO, HUNDRD + PARAMETER ( ZERO = 0.0E0, QURTR = 0.250E0, HALF = 0.5E0, + $ ONE = 1.0E0, TWO = 2.0E0, HUNDRD = 100.0E0 ) +* .. +* .. Local Scalars .. + INTEGER IPN4, J4, N0IN, NN + REAL EPS, G, S, SAFMIN, T, TEMP, TOL, TOL2 +* .. +* .. External Subroutines .. + EXTERNAL SLASQ5, SLASQ6, SLAZQ4 +* .. +* .. External Function .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MIN, SQRT +* .. +* .. Executable Statements .. +* + N0IN = N0 + EPS = SLAMCH( 'Precision' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + TOL = EPS*HUNDRD + TOL2 = TOL**2 + G = ZERO +* +* Check for deflation. +* + 10 CONTINUE +* + IF( N0.LT.I0 ) + $ RETURN + IF( N0.EQ.I0 ) + $ GO TO 20 + NN = 4*N0 + PP + IF( N0.EQ.( I0+1 ) ) + $ GO TO 40 +* +* Check whether E(N0-1) is negligible, 1 eigenvalue. +* + IF( Z( NN-5 ).GT.TOL2*( SIGMA+Z( NN-3 ) ) .AND. + $ Z( NN-2*PP-4 ).GT.TOL2*Z( NN-7 ) ) + $ GO TO 30 +* + 20 CONTINUE +* + Z( 4*N0-3 ) = Z( 4*N0+PP-3 ) + SIGMA + N0 = N0 - 1 + GO TO 10 +* +* Check whether E(N0-2) is negligible, 2 eigenvalues. +* + 30 CONTINUE +* + IF( Z( NN-9 ).GT.TOL2*SIGMA .AND. + $ Z( NN-2*PP-8 ).GT.TOL2*Z( NN-11 ) ) + $ GO TO 50 +* + 40 CONTINUE +* + IF( Z( NN-3 ).GT.Z( NN-7 ) ) THEN + S = Z( NN-3 ) + Z( NN-3 ) = Z( NN-7 ) + Z( NN-7 ) = S + END IF + IF( Z( NN-5 ).GT.Z( NN-3 )*TOL2 ) THEN + T = HALF*( ( Z( NN-7 )-Z( NN-3 ) )+Z( NN-5 ) ) + S = Z( NN-3 )*( Z( NN-5 ) / T ) + IF( S.LE.T ) THEN + S = Z( NN-3 )*( Z( NN-5 ) / + $ ( T*( ONE+SQRT( ONE+S / T ) ) ) ) + ELSE + S = Z( NN-3 )*( Z( NN-5 ) / ( T+SQRT( T )*SQRT( T+S ) ) ) + END IF + T = Z( NN-7 ) + ( S+Z( NN-5 ) ) + Z( NN-3 ) = Z( NN-3 )*( Z( NN-7 ) / T ) + Z( NN-7 ) = T + END IF + Z( 4*N0-7 ) = Z( NN-7 ) + SIGMA + Z( 4*N0-3 ) = Z( NN-3 ) + SIGMA + N0 = N0 - 2 + GO TO 10 +* + 50 CONTINUE +* +* Reverse the qd-array, if warranted. +* + IF( DMIN.LE.ZERO .OR. N0.LT.N0IN ) THEN + IF( CBIAS*Z( 4*I0+PP-3 ).LT.Z( 4*N0+PP-3 ) ) THEN + IPN4 = 4*( I0+N0 ) + DO 60 J4 = 4*I0, 2*( I0+N0-1 ), 4 + TEMP = Z( J4-3 ) + Z( J4-3 ) = Z( IPN4-J4-3 ) + Z( IPN4-J4-3 ) = TEMP + TEMP = Z( J4-2 ) + Z( J4-2 ) = Z( IPN4-J4-2 ) + Z( IPN4-J4-2 ) = TEMP + TEMP = Z( J4-1 ) + Z( J4-1 ) = Z( IPN4-J4-5 ) + Z( IPN4-J4-5 ) = TEMP + TEMP = Z( J4 ) + Z( J4 ) = Z( IPN4-J4-4 ) + Z( IPN4-J4-4 ) = TEMP + 60 CONTINUE + IF( N0-I0.LE.4 ) THEN + Z( 4*N0+PP-1 ) = Z( 4*I0+PP-1 ) + Z( 4*N0-PP ) = Z( 4*I0-PP ) + END IF + DMIN2 = MIN( DMIN2, Z( 4*N0+PP-1 ) ) + Z( 4*N0+PP-1 ) = MIN( Z( 4*N0+PP-1 ), Z( 4*I0+PP-1 ), + $ Z( 4*I0+PP+3 ) ) + Z( 4*N0-PP ) = MIN( Z( 4*N0-PP ), Z( 4*I0-PP ), + $ Z( 4*I0-PP+4 ) ) + QMAX = MAX( QMAX, Z( 4*I0+PP-3 ), Z( 4*I0+PP+1 ) ) + DMIN = -ZERO + END IF + END IF +* + IF( DMIN.LT.ZERO .OR. SAFMIN*QMAX.LT.MIN( Z( 4*N0+PP-1 ), + $ Z( 4*N0+PP-9 ), DMIN2+Z( 4*N0-PP ) ) ) THEN +* +* Choose a shift. +* + CALL SLAZQ4( I0, N0, Z, PP, N0IN, DMIN, DMIN1, DMIN2, DN, DN1, + $ DN2, TAU, TTYPE, G ) +* +* Call dqds until DMIN > 0. +* + 80 CONTINUE +* + CALL SLASQ5( I0, N0, Z, PP, TAU, DMIN, DMIN1, DMIN2, DN, + $ DN1, DN2, IEEE ) +* + NDIV = NDIV + ( N0-I0+2 ) + ITER = ITER + 1 +* +* Check status. +* + IF( DMIN.GE.ZERO .AND. DMIN1.GT.ZERO ) THEN +* +* Success. +* + GO TO 100 +* + ELSE IF( DMIN.LT.ZERO .AND. DMIN1.GT.ZERO .AND. + $ Z( 4*( N0-1 )-PP ).LT.TOL*( SIGMA+DN1 ) .AND. + $ ABS( DN ).LT.TOL*SIGMA ) THEN +* +* Convergence hidden by negative DN. +* + Z( 4*( N0-1 )-PP+2 ) = ZERO + DMIN = ZERO + GO TO 100 + ELSE IF( DMIN.LT.ZERO ) THEN +* +* TAU too big. Select new TAU and try again. +* + NFAIL = NFAIL + 1 + IF( TTYPE.LT.-22 ) THEN +* +* Failed twice. Play it safe. +* + TAU = ZERO + ELSE IF( DMIN1.GT.ZERO ) THEN +* +* Late failure. Gives excellent shift. +* + TAU = ( TAU+DMIN )*( ONE-TWO*EPS ) + TTYPE = TTYPE - 11 + ELSE +* +* Early failure. Divide by 4. +* + TAU = QURTR*TAU + TTYPE = TTYPE - 12 + END IF + GO TO 80 + ELSE IF( DMIN.NE.DMIN ) THEN +* +* NaN. +* + TAU = ZERO + GO TO 80 + ELSE +* +* Possible underflow. Play it safe. +* + GO TO 90 + END IF + END IF +* +* Risk of underflow. +* + 90 CONTINUE + CALL SLASQ6( I0, N0, Z, PP, DMIN, DMIN1, DMIN2, DN, DN1, DN2 ) + NDIV = NDIV + ( N0-I0+2 ) + ITER = ITER + 1 + TAU = ZERO +* + 100 CONTINUE + IF( TAU.LT.SIGMA ) THEN + DESIG = DESIG + TAU + T = SIGMA + DESIG + DESIG = DESIG - ( T-SIGMA ) + ELSE + T = SIGMA + TAU + DESIG = SIGMA - ( T-TAU ) + DESIG + END IF + SIGMA = T +* + RETURN +* +* End of SLAZQ3 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/slazq4.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/slazq4.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,330 @@ + SUBROUTINE SLAZQ4( I0, N0, Z, PP, N0IN, DMIN, DMIN1, DMIN2, DN, + $ DN1, DN2, TAU, TTYPE, G ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER I0, N0, N0IN, PP, TTYPE + REAL DMIN, DMIN1, DMIN2, DN, DN1, DN2, G, TAU +* .. +* .. Array Arguments .. + REAL Z( * ) +* .. +* +* Purpose +* ======= +* +* SLAZQ4 computes an approximation TAU to the smallest eigenvalue +* using values of d from the previous transform. +* +* I0 (input) INTEGER +* First index. +* +* N0 (input) INTEGER +* Last index. +* +* Z (input) REAL array, dimension ( 4*N ) +* Z holds the qd array. +* +* PP (input) INTEGER +* PP=0 for ping, PP=1 for pong. +* +* N0IN (input) INTEGER +* The value of N0 at start of EIGTEST. +* +* DMIN (input) REAL +* Minimum value of d. +* +* DMIN1 (input) REAL +* Minimum value of d, excluding D( N0 ). +* +* DMIN2 (input) REAL +* Minimum value of d, excluding D( N0 ) and D( N0-1 ). +* +* DN (input) REAL +* d(N) +* +* DN1 (input) REAL +* d(N-1) +* +* DN2 (input) REAL +* d(N-2) +* +* TAU (output) REAL +* This is the shift. +* +* TTYPE (output) INTEGER +* Shift type. +* +* G (input/output) REAL +* G is passed as an argument in order to save its value between +* calls to SLAZQ4 +* +* Further Details +* =============== +* CNST1 = 9/16 +* +* This is a thread safe version of SLASQ4, which passes G through the +* argument list in place of declaring G in a SAVE statment. +* +* ===================================================================== +* +* .. Parameters .. + REAL CNST1, CNST2, CNST3 + PARAMETER ( CNST1 = 0.5630E0, CNST2 = 1.010E0, + $ CNST3 = 1.050E0 ) + REAL QURTR, THIRD, HALF, ZERO, ONE, TWO, HUNDRD + PARAMETER ( QURTR = 0.250E0, THIRD = 0.3330E0, + $ HALF = 0.50E0, ZERO = 0.0E0, ONE = 1.0E0, + $ TWO = 2.0E0, HUNDRD = 100.0E0 ) +* .. +* .. Local Scalars .. + INTEGER I4, NN, NP + REAL A2, B1, B2, GAM, GAP1, GAP2, S +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* A negative DMIN forces the shift to take that absolute value +* TTYPE records the type of shift. +* + IF( DMIN.LE.ZERO ) THEN + TAU = -DMIN + TTYPE = -1 + RETURN + END IF +* + NN = 4*N0 + PP + IF( N0IN.EQ.N0 ) THEN +* +* No eigenvalues deflated. +* + IF( DMIN.EQ.DN .OR. DMIN.EQ.DN1 ) THEN +* + B1 = SQRT( Z( NN-3 ) )*SQRT( Z( NN-5 ) ) + B2 = SQRT( Z( NN-7 ) )*SQRT( Z( NN-9 ) ) + A2 = Z( NN-7 ) + Z( NN-5 ) +* +* Cases 2 and 3. +* + IF( DMIN.EQ.DN .AND. DMIN1.EQ.DN1 ) THEN + GAP2 = DMIN2 - A2 - DMIN2*QURTR + IF( GAP2.GT.ZERO .AND. GAP2.GT.B2 ) THEN + GAP1 = A2 - DN - ( B2 / GAP2 )*B2 + ELSE + GAP1 = A2 - DN - ( B1+B2 ) + END IF + IF( GAP1.GT.ZERO .AND. GAP1.GT.B1 ) THEN + S = MAX( DN-( B1 / GAP1 )*B1, HALF*DMIN ) + TTYPE = -2 + ELSE + S = ZERO + IF( DN.GT.B1 ) + $ S = DN - B1 + IF( A2.GT.( B1+B2 ) ) + $ S = MIN( S, A2-( B1+B2 ) ) + S = MAX( S, THIRD*DMIN ) + TTYPE = -3 + END IF + ELSE +* +* Case 4. +* + TTYPE = -4 + S = QURTR*DMIN + IF( DMIN.EQ.DN ) THEN + GAM = DN + A2 = ZERO + IF( Z( NN-5 ) .GT. Z( NN-7 ) ) + $ RETURN + B2 = Z( NN-5 ) / Z( NN-7 ) + NP = NN - 9 + ELSE + NP = NN - 2*PP + B2 = Z( NP-2 ) + GAM = DN1 + IF( Z( NP-4 ) .GT. Z( NP-2 ) ) + $ RETURN + A2 = Z( NP-4 ) / Z( NP-2 ) + IF( Z( NN-9 ) .GT. Z( NN-11 ) ) + $ RETURN + B2 = Z( NN-9 ) / Z( NN-11 ) + NP = NN - 13 + END IF +* +* Approximate contribution to norm squared from I < NN-1. +* + A2 = A2 + B2 + DO 10 I4 = NP, 4*I0 - 1 + PP, -4 + IF( B2.EQ.ZERO ) + $ GO TO 20 + B1 = B2 + IF( Z( I4 ) .GT. Z( I4-2 ) ) + $ RETURN + B2 = B2*( Z( I4 ) / Z( I4-2 ) ) + A2 = A2 + B2 + IF( HUNDRD*MAX( B2, B1 ).LT.A2 .OR. CNST1.LT.A2 ) + $ GO TO 20 + 10 CONTINUE + 20 CONTINUE + A2 = CNST3*A2 +* +* Rayleigh quotient residual bound. +* + IF( A2.LT.CNST1 ) + $ S = GAM*( ONE-SQRT( A2 ) ) / ( ONE+A2 ) + END IF + ELSE IF( DMIN.EQ.DN2 ) THEN +* +* Case 5. +* + TTYPE = -5 + S = QURTR*DMIN +* +* Compute contribution to norm squared from I > NN-2. +* + NP = NN - 2*PP + B1 = Z( NP-2 ) + B2 = Z( NP-6 ) + GAM = DN2 + IF( Z( NP-8 ).GT.B2 .OR. Z( NP-4 ).GT.B1 ) + $ RETURN + A2 = ( Z( NP-8 ) / B2 )*( ONE+Z( NP-4 ) / B1 ) +* +* Approximate contribution to norm squared from I < NN-2. +* + IF( N0-I0.GT.2 ) THEN + B2 = Z( NN-13 ) / Z( NN-15 ) + A2 = A2 + B2 + DO 30 I4 = NN - 17, 4*I0 - 1 + PP, -4 + IF( B2.EQ.ZERO ) + $ GO TO 40 + B1 = B2 + IF( Z( I4 ) .GT. Z( I4-2 ) ) + $ RETURN + B2 = B2*( Z( I4 ) / Z( I4-2 ) ) + A2 = A2 + B2 + IF( HUNDRD*MAX( B2, B1 ).LT.A2 .OR. CNST1.LT.A2 ) + $ GO TO 40 + 30 CONTINUE + 40 CONTINUE + A2 = CNST3*A2 + END IF +* + IF( A2.LT.CNST1 ) + $ S = GAM*( ONE-SQRT( A2 ) ) / ( ONE+A2 ) + ELSE +* +* Case 6, no information to guide us. +* + IF( TTYPE.EQ.-6 ) THEN + G = G + THIRD*( ONE-G ) + ELSE IF( TTYPE.EQ.-18 ) THEN + G = QURTR*THIRD + ELSE + G = QURTR + END IF + S = G*DMIN + TTYPE = -6 + END IF +* + ELSE IF( N0IN.EQ.( N0+1 ) ) THEN +* +* One eigenvalue just deflated. Use DMIN1, DN1 for DMIN and DN. +* + IF( DMIN1.EQ.DN1 .AND. DMIN2.EQ.DN2 ) THEN +* +* Cases 7 and 8. +* + TTYPE = -7 + S = THIRD*DMIN1 + IF( Z( NN-5 ).GT.Z( NN-7 ) ) + $ RETURN + B1 = Z( NN-5 ) / Z( NN-7 ) + B2 = B1 + IF( B2.EQ.ZERO ) + $ GO TO 60 + DO 50 I4 = 4*N0 - 9 + PP, 4*I0 - 1 + PP, -4 + A2 = B1 + IF( Z( I4 ).GT.Z( I4-2 ) ) + $ RETURN + B1 = B1*( Z( I4 ) / Z( I4-2 ) ) + B2 = B2 + B1 + IF( HUNDRD*MAX( B1, A2 ).LT.B2 ) + $ GO TO 60 + 50 CONTINUE + 60 CONTINUE + B2 = SQRT( CNST3*B2 ) + A2 = DMIN1 / ( ONE+B2**2 ) + GAP2 = HALF*DMIN2 - A2 + IF( GAP2.GT.ZERO .AND. GAP2.GT.B2*A2 ) THEN + S = MAX( S, A2*( ONE-CNST2*A2*( B2 / GAP2 )*B2 ) ) + ELSE + S = MAX( S, A2*( ONE-CNST2*B2 ) ) + TTYPE = -8 + END IF + ELSE +* +* Case 9. +* + S = QURTR*DMIN1 + IF( DMIN1.EQ.DN1 ) + $ S = HALF*DMIN1 + TTYPE = -9 + END IF +* + ELSE IF( N0IN.EQ.( N0+2 ) ) THEN +* +* Two eigenvalues deflated. Use DMIN2, DN2 for DMIN and DN. +* +* Cases 10 and 11. +* + IF( DMIN2.EQ.DN2 .AND. TWO*Z( NN-5 ).LT.Z( NN-7 ) ) THEN + TTYPE = -10 + S = THIRD*DMIN2 + IF( Z( NN-5 ).GT.Z( NN-7 ) ) + $ RETURN + B1 = Z( NN-5 ) / Z( NN-7 ) + B2 = B1 + IF( B2.EQ.ZERO ) + $ GO TO 80 + DO 70 I4 = 4*N0 - 9 + PP, 4*I0 - 1 + PP, -4 + IF( Z( I4 ).GT.Z( I4-2 ) ) + $ RETURN + B1 = B1*( Z( I4 ) / Z( I4-2 ) ) + B2 = B2 + B1 + IF( HUNDRD*B1.LT.B2 ) + $ GO TO 80 + 70 CONTINUE + 80 CONTINUE + B2 = SQRT( CNST3*B2 ) + A2 = DMIN2 / ( ONE+B2**2 ) + GAP2 = Z( NN-7 ) + Z( NN-9 ) - + $ SQRT( Z( NN-11 ) )*SQRT( Z( NN-9 ) ) - A2 + IF( GAP2.GT.ZERO .AND. GAP2.GT.B2*A2 ) THEN + S = MAX( S, A2*( ONE-CNST2*A2*( B2 / GAP2 )*B2 ) ) + ELSE + S = MAX( S, A2*( ONE-CNST2*B2 ) ) + END IF + ELSE + S = QURTR*DMIN2 + TTYPE = -11 + END IF + ELSE IF( N0IN.GT.( N0+2 ) ) THEN +* +* Case 12, more than two eigenvalues deflated. No information. +* + S = ZERO + TTYPE = -12 + END IF +* + TAU = S + RETURN +* +* End of SLAZQ4 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sorg2l.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sorg2l.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,127 @@ + SUBROUTINE SORG2L( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SORG2L generates an m by n real matrix Q with orthonormal columns, +* which is defined as the last n columns of a product of k elementary +* reflectors of order m +* +* Q = H(k) . . . H(2) H(1) +* +* as returned by SGEQLF. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix Q. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix Q. M >= N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines the +* matrix Q. N >= K >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the (n-k+i)-th column must contain the vector which +* defines the elementary reflector H(i), for i = 1,2,...,k, as +* returned by SGEQLF in the last k columns of its array +* argument A. +* On exit, the m by n matrix Q. +* +* LDA (input) INTEGER +* The first dimension of the array A. LDA >= max(1,M). +* +* TAU (input) REAL array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by SGEQLF. +* +* WORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument has an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, II, J, L +* .. +* .. External Subroutines .. + EXTERNAL SLARF, SSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.GT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORG2L', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* +* Initialise columns 1:n-k to columns of the unit matrix +* + DO 20 J = 1, N - K + DO 10 L = 1, M + A( L, J ) = ZERO + 10 CONTINUE + A( M-N+J, J ) = ONE + 20 CONTINUE +* + DO 40 I = 1, K + II = N - K + I +* +* Apply H(i) to A(1:m-k+i,1:n-k+i) from the left +* + A( M-N+II, II ) = ONE + CALL SLARF( 'Left', M-N+II, II-1, A( 1, II ), 1, TAU( I ), A, + $ LDA, WORK ) + CALL SSCAL( M-N+II-1, -TAU( I ), A( 1, II ), 1 ) + A( M-N+II, II ) = ONE - TAU( I ) +* +* Set A(m-k+i+1:m,n-k+i) to zero +* + DO 30 L = M - N + II + 1, M + A( L, II ) = ZERO + 30 CONTINUE + 40 CONTINUE + RETURN +* +* End of SORG2L +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sorg2r.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sorg2r.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,129 @@ + SUBROUTINE SORG2R( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SORG2R generates an m by n real matrix Q with orthonormal columns, +* which is defined as the first n columns of a product of k elementary +* reflectors of order m +* +* Q = H(1) H(2) . . . H(k) +* +* as returned by SGEQRF. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix Q. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix Q. M >= N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines the +* matrix Q. N >= K >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the i-th column must contain the vector which +* defines the elementary reflector H(i), for i = 1,2,...,k, as +* returned by SGEQRF in the first k columns of its array +* argument A. +* On exit, the m-by-n matrix Q. +* +* LDA (input) INTEGER +* The first dimension of the array A. LDA >= max(1,M). +* +* TAU (input) REAL array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by SGEQRF. +* +* WORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument has an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, L +* .. +* .. External Subroutines .. + EXTERNAL SLARF, SSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.GT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORG2R', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* +* Initialise columns k+1:n to columns of the unit matrix +* + DO 20 J = K + 1, N + DO 10 L = 1, M + A( L, J ) = ZERO + 10 CONTINUE + A( J, J ) = ONE + 20 CONTINUE +* + DO 40 I = K, 1, -1 +* +* Apply H(i) to A(i:m,i:n) from the left +* + IF( I.LT.N ) THEN + A( I, I ) = ONE + CALL SLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAU( I ), + $ A( I, I+1 ), LDA, WORK ) + END IF + IF( I.LT.M ) + $ CALL SSCAL( M-I, -TAU( I ), A( I+1, I ), 1 ) + A( I, I ) = ONE - TAU( I ) +* +* Set A(1:i-1,i) to zero +* + DO 30 L = 1, I - 1 + A( L, I ) = ZERO + 30 CONTINUE + 40 CONTINUE + RETURN +* +* End of SORG2R +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sorgbr.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sorgbr.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,244 @@ + SUBROUTINE SORGBR( VECT, M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER VECT + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SORGBR generates one of the real orthogonal matrices Q or P**T +* determined by SGEBRD when reducing a real matrix A to bidiagonal +* form: A = Q * B * P**T. Q and P**T are defined as products of +* elementary reflectors H(i) or G(i) respectively. +* +* If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q +* is of order M: +* if m >= k, Q = H(1) H(2) . . . H(k) and SORGBR returns the first n +* columns of Q, where m >= n >= k; +* if m < k, Q = H(1) H(2) . . . H(m-1) and SORGBR returns Q as an +* M-by-M matrix. +* +* If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**T +* is of order N: +* if k < n, P**T = G(k) . . . G(2) G(1) and SORGBR returns the first m +* rows of P**T, where n >= m >= k; +* if k >= n, P**T = G(n-1) . . . G(2) G(1) and SORGBR returns P**T as +* an N-by-N matrix. +* +* Arguments +* ========= +* +* VECT (input) CHARACTER*1 +* Specifies whether the matrix Q or the matrix P**T is +* required, as defined in the transformation applied by SGEBRD: +* = 'Q': generate Q; +* = 'P': generate P**T. +* +* M (input) INTEGER +* The number of rows of the matrix Q or P**T to be returned. +* M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix Q or P**T to be returned. +* N >= 0. +* If VECT = 'Q', M >= N >= min(M,K); +* if VECT = 'P', N >= M >= min(N,K). +* +* K (input) INTEGER +* If VECT = 'Q', the number of columns in the original M-by-K +* matrix reduced by SGEBRD. +* If VECT = 'P', the number of rows in the original K-by-N +* matrix reduced by SGEBRD. +* K >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the vectors which define the elementary reflectors, +* as returned by SGEBRD. +* On exit, the M-by-N matrix Q or P**T. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (input) REAL array, dimension +* (min(M,K)) if VECT = 'Q' +* (min(N,K)) if VECT = 'P' +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i) or G(i), which determines Q or P**T, as +* returned by SGEBRD in its array argument TAUQ or TAUP. +* +* WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,min(M,N)). +* For optimum performance LWORK >= min(M,N)*NB, where NB +* is the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WANTQ + INTEGER I, IINFO, J, LWKOPT, MN, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL SORGLQ, SORGQR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + WANTQ = LSAME( VECT, 'Q' ) + MN = MIN( M, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.WANTQ .AND. .NOT.LSAME( VECT, 'P' ) ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 .OR. ( WANTQ .AND. ( N.GT.M .OR. N.LT.MIN( M, + $ K ) ) ) .OR. ( .NOT.WANTQ .AND. ( M.GT.N .OR. M.LT. + $ MIN( N, K ) ) ) ) THEN + INFO = -3 + ELSE IF( K.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LWORK.LT.MAX( 1, MN ) .AND. .NOT.LQUERY ) THEN + INFO = -9 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( WANTQ ) THEN + NB = ILAENV( 1, 'SORGQR', ' ', M, N, K, -1 ) + ELSE + NB = ILAENV( 1, 'SORGLQ', ' ', M, N, K, -1 ) + END IF + LWKOPT = MAX( 1, MN )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORGBR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( WANTQ ) THEN +* +* Form Q, determined by a call to SGEBRD to reduce an m-by-k +* matrix +* + IF( M.GE.K ) THEN +* +* If m >= k, assume m >= n >= k +* + CALL SORGQR( M, N, K, A, LDA, TAU, WORK, LWORK, IINFO ) +* + ELSE +* +* If m < k, assume m = n +* +* Shift the vectors which define the elementary reflectors one +* column to the right, and set the first row and column of Q +* to those of the unit matrix +* + DO 20 J = M, 2, -1 + A( 1, J ) = ZERO + DO 10 I = J + 1, M + A( I, J ) = A( I, J-1 ) + 10 CONTINUE + 20 CONTINUE + A( 1, 1 ) = ONE + DO 30 I = 2, M + A( I, 1 ) = ZERO + 30 CONTINUE + IF( M.GT.1 ) THEN +* +* Form Q(2:m,2:m) +* + CALL SORGQR( M-1, M-1, M-1, A( 2, 2 ), LDA, TAU, WORK, + $ LWORK, IINFO ) + END IF + END IF + ELSE +* +* Form P', determined by a call to SGEBRD to reduce a k-by-n +* matrix +* + IF( K.LT.N ) THEN +* +* If k < n, assume k <= m <= n +* + CALL SORGLQ( M, N, K, A, LDA, TAU, WORK, LWORK, IINFO ) +* + ELSE +* +* If k >= n, assume m = n +* +* Shift the vectors which define the elementary reflectors one +* row downward, and set the first row and column of P' to +* those of the unit matrix +* + A( 1, 1 ) = ONE + DO 40 I = 2, N + A( I, 1 ) = ZERO + 40 CONTINUE + DO 60 J = 2, N + DO 50 I = J - 1, 2, -1 + A( I, J ) = A( I-1, J ) + 50 CONTINUE + A( 1, J ) = ZERO + 60 CONTINUE + IF( N.GT.1 ) THEN +* +* Form P'(2:n,2:n) +* + CALL SORGLQ( N-1, N-1, N-1, A( 2, 2 ), LDA, TAU, WORK, + $ LWORK, IINFO ) + END IF + END IF + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of SORGBR +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sorghr.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sorghr.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,164 @@ + SUBROUTINE SORGHR( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER IHI, ILO, INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SORGHR generates a real orthogonal matrix Q which is defined as the +* product of IHI-ILO elementary reflectors of order N, as returned by +* SGEHRD: +* +* Q = H(ilo) H(ilo+1) . . . H(ihi-1). +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix Q. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* ILO and IHI must have the same values as in the previous call +* of SGEHRD. Q is equal to the unit matrix except in the +* submatrix Q(ilo+1:ihi,ilo+1:ihi). +* 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the vectors which define the elementary reflectors, +* as returned by SGEHRD. +* On exit, the N-by-N orthogonal matrix Q. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* TAU (input) REAL array, dimension (N-1) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by SGEHRD. +* +* WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= IHI-ILO. +* For optimum performance LWORK >= (IHI-ILO)*NB, where NB is +* the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IINFO, J, LWKOPT, NB, NH +* .. +* .. External Subroutines .. + EXTERNAL SORGQR, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NH = IHI - ILO + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN + INFO = -2 + ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, NH ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'SORGQR', ' ', NH, NH, NH, -1 ) + LWKOPT = MAX( 1, NH )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORGHR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* +* Shift the vectors which define the elementary reflectors one +* column to the right, and set the first ilo and the last n-ihi +* rows and columns to those of the unit matrix +* + DO 40 J = IHI, ILO + 1, -1 + DO 10 I = 1, J - 1 + A( I, J ) = ZERO + 10 CONTINUE + DO 20 I = J + 1, IHI + A( I, J ) = A( I, J-1 ) + 20 CONTINUE + DO 30 I = IHI + 1, N + A( I, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + DO 60 J = 1, ILO + DO 50 I = 1, N + A( I, J ) = ZERO + 50 CONTINUE + A( J, J ) = ONE + 60 CONTINUE + DO 80 J = IHI + 1, N + DO 70 I = 1, N + A( I, J ) = ZERO + 70 CONTINUE + A( J, J ) = ONE + 80 CONTINUE +* + IF( NH.GT.0 ) THEN +* +* Generate Q(ilo+1:ihi,ilo+1:ihi) +* + CALL SORGQR( NH, NH, NH, A( ILO+1, ILO+1 ), LDA, TAU( ILO ), + $ WORK, LWORK, IINFO ) + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of SORGHR +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sorgl2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sorgl2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,133 @@ + SUBROUTINE SORGL2( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SORGL2 generates an m by n real matrix Q with orthonormal rows, +* which is defined as the first m rows of a product of k elementary +* reflectors of order n +* +* Q = H(k) . . . H(2) H(1) +* +* as returned by SGELQF. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix Q. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix Q. N >= M. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines the +* matrix Q. M >= K >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the i-th row must contain the vector which defines +* the elementary reflector H(i), for i = 1,2,...,k, as returned +* by SGELQF in the first k rows of its array argument A. +* On exit, the m-by-n matrix Q. +* +* LDA (input) INTEGER +* The first dimension of the array A. LDA >= max(1,M). +* +* TAU (input) REAL array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by SGELQF. +* +* WORK (workspace) REAL array, dimension (M) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument has an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, L +* .. +* .. External Subroutines .. + EXTERNAL SLARF, SSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORGL2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.LE.0 ) + $ RETURN +* + IF( K.LT.M ) THEN +* +* Initialise rows k+1:m to rows of the unit matrix +* + DO 20 J = 1, N + DO 10 L = K + 1, M + A( L, J ) = ZERO + 10 CONTINUE + IF( J.GT.K .AND. J.LE.M ) + $ A( J, J ) = ONE + 20 CONTINUE + END IF +* + DO 40 I = K, 1, -1 +* +* Apply H(i) to A(i:m,i:n) from the right +* + IF( I.LT.N ) THEN + IF( I.LT.M ) THEN + A( I, I ) = ONE + CALL SLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, + $ TAU( I ), A( I+1, I ), LDA, WORK ) + END IF + CALL SSCAL( N-I, -TAU( I ), A( I, I+1 ), LDA ) + END IF + A( I, I ) = ONE - TAU( I ) +* +* Set A(i,1:i-1) to zero +* + DO 30 L = 1, I - 1 + A( I, L ) = ZERO + 30 CONTINUE + 40 CONTINUE + RETURN +* +* End of SORGL2 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sorglq.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sorglq.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,215 @@ + SUBROUTINE SORGLQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SORGLQ generates an M-by-N real matrix Q with orthonormal rows, +* which is defined as the first M rows of a product of K elementary +* reflectors of order N +* +* Q = H(k) . . . H(2) H(1) +* +* as returned by SGELQF. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix Q. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix Q. N >= M. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines the +* matrix Q. M >= K >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the i-th row must contain the vector which defines +* the elementary reflector H(i), for i = 1,2,...,k, as returned +* by SGELQF in the first k rows of its array argument A. +* On exit, the M-by-N matrix Q. +* +* LDA (input) INTEGER +* The first dimension of the array A. LDA >= max(1,M). +* +* TAU (input) REAL array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by SGELQF. +* +* WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,M). +* For optimum performance LWORK >= M*NB, where NB is +* the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument has an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, J, KI, KK, L, LDWORK, + $ LWKOPT, NB, NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL SLARFB, SLARFT, SORGL2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'SORGLQ', ' ', M, N, K, -1 ) + LWKOPT = MAX( 1, M )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORGLQ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.LE.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = M + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'SORGLQ', ' ', M, N, K, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = M + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'SORGLQ', ' ', M, N, K, -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code after the last block. +* The first kk rows are handled by the block method. +* + KI = ( ( K-NX-1 ) / NB )*NB + KK = MIN( K, KI+NB ) +* +* Set A(kk+1:m,1:kk) to zero. +* + DO 20 J = 1, KK + DO 10 I = KK + 1, M + A( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + KK = 0 + END IF +* +* Use unblocked code for the last or only block. +* + IF( KK.LT.M ) + $ CALL SORGL2( M-KK, N-KK, K-KK, A( KK+1, KK+1 ), LDA, + $ TAU( KK+1 ), WORK, IINFO ) +* + IF( KK.GT.0 ) THEN +* +* Use blocked code +* + DO 50 I = KI + 1, 1, -NB + IB = MIN( NB, K-I+1 ) + IF( I+IB.LE.M ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL SLARFT( 'Forward', 'Rowwise', N-I+1, IB, A( I, I ), + $ LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H' to A(i+ib:m,i:n) from the right +* + CALL SLARFB( 'Right', 'Transpose', 'Forward', 'Rowwise', + $ M-I-IB+1, N-I+1, IB, A( I, I ), LDA, WORK, + $ LDWORK, A( I+IB, I ), LDA, WORK( IB+1 ), + $ LDWORK ) + END IF +* +* Apply H' to columns i:n of current block +* + CALL SORGL2( IB, N-I+1, IB, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) +* +* Set columns 1:i-1 of current block to zero +* + DO 40 J = 1, I - 1 + DO 30 L = I, I + IB - 1 + A( L, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + 50 CONTINUE + END IF +* + WORK( 1 ) = IWS + RETURN +* +* End of SORGLQ +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sorgql.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sorgql.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,222 @@ + SUBROUTINE SORGQL( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SORGQL generates an M-by-N real matrix Q with orthonormal columns, +* which is defined as the last N columns of a product of K elementary +* reflectors of order M +* +* Q = H(k) . . . H(2) H(1) +* +* as returned by SGEQLF. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix Q. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix Q. M >= N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines the +* matrix Q. N >= K >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the (n-k+i)-th column must contain the vector which +* defines the elementary reflector H(i), for i = 1,2,...,k, as +* returned by SGEQLF in the last k columns of its array +* argument A. +* On exit, the M-by-N matrix Q. +* +* LDA (input) INTEGER +* The first dimension of the array A. LDA >= max(1,M). +* +* TAU (input) REAL array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by SGEQLF. +* +* WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N). +* For optimum performance LWORK >= N*NB, where NB is the +* optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument has an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, J, KK, L, LDWORK, LWKOPT, + $ NB, NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL SLARFB, SLARFT, SORG2L, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.GT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.EQ.0 ) THEN + LWKOPT = 1 + ELSE + NB = ILAENV( 1, 'SORGQL', ' ', M, N, K, -1 ) + LWKOPT = N*NB + END IF + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORGQL', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = N + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'SORGQL', ' ', M, N, K, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'SORGQL', ' ', M, N, K, -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code after the first block. +* The last kk columns are handled by the block method. +* + KK = MIN( K, ( ( K-NX+NB-1 ) / NB )*NB ) +* +* Set A(m-kk+1:m,1:n-kk) to zero. +* + DO 20 J = 1, N - KK + DO 10 I = M - KK + 1, M + A( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + KK = 0 + END IF +* +* Use unblocked code for the first or only block. +* + CALL SORG2L( M-KK, N-KK, K-KK, A, LDA, TAU, WORK, IINFO ) +* + IF( KK.GT.0 ) THEN +* +* Use blocked code +* + DO 50 I = K - KK + 1, K, NB + IB = MIN( NB, K-I+1 ) + IF( N-K+I.GT.1 ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL SLARFT( 'Backward', 'Columnwise', M-K+I+IB-1, IB, + $ A( 1, N-K+I ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(1:m-k+i+ib-1,1:n-k+i-1) from the left +* + CALL SLARFB( 'Left', 'No transpose', 'Backward', + $ 'Columnwise', M-K+I+IB-1, N-K+I-1, IB, + $ A( 1, N-K+I ), LDA, WORK, LDWORK, A, LDA, + $ WORK( IB+1 ), LDWORK ) + END IF +* +* Apply H to rows 1:m-k+i+ib-1 of current block +* + CALL SORG2L( M-K+I+IB-1, IB, IB, A( 1, N-K+I ), LDA, + $ TAU( I ), WORK, IINFO ) +* +* Set rows m-k+i+ib:m of current block to zero +* + DO 40 J = N - K + I, N - K + I + IB - 1 + DO 30 L = M - K + I + IB, M + A( L, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + 50 CONTINUE + END IF +* + WORK( 1 ) = IWS + RETURN +* +* End of SORGQL +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sorgqr.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sorgqr.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,216 @@ + SUBROUTINE SORGQR( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SORGQR generates an M-by-N real matrix Q with orthonormal columns, +* which is defined as the first N columns of a product of K elementary +* reflectors of order M +* +* Q = H(1) H(2) . . . H(k) +* +* as returned by SGEQRF. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix Q. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix Q. M >= N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines the +* matrix Q. N >= K >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the i-th column must contain the vector which +* defines the elementary reflector H(i), for i = 1,2,...,k, as +* returned by SGEQRF in the first k columns of its array +* argument A. +* On exit, the M-by-N matrix Q. +* +* LDA (input) INTEGER +* The first dimension of the array A. LDA >= max(1,M). +* +* TAU (input) REAL array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by SGEQRF. +* +* WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N). +* For optimum performance LWORK >= N*NB, where NB is the +* optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument has an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, J, KI, KK, L, LDWORK, + $ LWKOPT, NB, NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL SLARFB, SLARFT, SORG2R, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'SORGQR', ' ', M, N, K, -1 ) + LWKOPT = MAX( 1, N )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.GT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORGQR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = N + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'SORGQR', ' ', M, N, K, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'SORGQR', ' ', M, N, K, -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code after the last block. +* The first kk columns are handled by the block method. +* + KI = ( ( K-NX-1 ) / NB )*NB + KK = MIN( K, KI+NB ) +* +* Set A(1:kk,kk+1:n) to zero. +* + DO 20 J = KK + 1, N + DO 10 I = 1, KK + A( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + KK = 0 + END IF +* +* Use unblocked code for the last or only block. +* + IF( KK.LT.N ) + $ CALL SORG2R( M-KK, N-KK, K-KK, A( KK+1, KK+1 ), LDA, + $ TAU( KK+1 ), WORK, IINFO ) +* + IF( KK.GT.0 ) THEN +* +* Use blocked code +* + DO 50 I = KI + 1, 1, -NB + IB = MIN( NB, K-I+1 ) + IF( I+IB.LE.N ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL SLARFT( 'Forward', 'Columnwise', M-I+1, IB, + $ A( I, I ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(i:m,i+ib:n) from the left +* + CALL SLARFB( 'Left', 'No transpose', 'Forward', + $ 'Columnwise', M-I+1, N-I-IB+1, IB, + $ A( I, I ), LDA, WORK, LDWORK, A( I, I+IB ), + $ LDA, WORK( IB+1 ), LDWORK ) + END IF +* +* Apply H to rows i:m of current block +* + CALL SORG2R( M-I+1, IB, IB, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) +* +* Set rows 1:i-1 of current block to zero +* + DO 40 J = I, I + IB - 1 + DO 30 L = 1, I - 1 + A( L, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + 50 CONTINUE + END IF +* + WORK( 1 ) = IWS + RETURN +* +* End of SORGQR +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sorgtr.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sorgtr.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,183 @@ + SUBROUTINE SORGTR( UPLO, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SORGTR generates a real orthogonal matrix Q which is defined as the +* product of n-1 elementary reflectors of order N, as returned by +* SSYTRD: +* +* if UPLO = 'U', Q = H(n-1) . . . H(2) H(1), +* +* if UPLO = 'L', Q = H(1) H(2) . . . H(n-1). +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A contains elementary reflectors +* from SSYTRD; +* = 'L': Lower triangle of A contains elementary reflectors +* from SSYTRD. +* +* N (input) INTEGER +* The order of the matrix Q. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the vectors which define the elementary reflectors, +* as returned by SSYTRD. +* On exit, the N-by-N orthogonal matrix Q. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* TAU (input) REAL array, dimension (N-1) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by SSYTRD. +* +* WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N-1). +* For optimum performance LWORK >= (N-1)*NB, where NB is +* the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER I, IINFO, J, LWKOPT, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL SORGQL, SORGQR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, N-1 ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF +* + IF( INFO.EQ.0 ) THEN + IF ( UPPER ) THEN + NB = ILAENV( 1, 'SORGQL', ' ', N-1, N-1, N-1, -1 ) + ELSE + NB = ILAENV( 1, 'SORGQR', ' ', N-1, N-1, N-1, -1 ) + END IF + LWKOPT = MAX( 1, N-1 )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORGTR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( UPPER ) THEN +* +* Q was determined by a call to SSYTRD with UPLO = 'U' +* +* Shift the vectors which define the elementary reflectors one +* column to the left, and set the last row and column of Q to +* those of the unit matrix +* + DO 20 J = 1, N - 1 + DO 10 I = 1, J - 1 + A( I, J ) = A( I, J+1 ) + 10 CONTINUE + A( N, J ) = ZERO + 20 CONTINUE + DO 30 I = 1, N - 1 + A( I, N ) = ZERO + 30 CONTINUE + A( N, N ) = ONE +* +* Generate Q(1:n-1,1:n-1) +* + CALL SORGQL( N-1, N-1, N-1, A, LDA, TAU, WORK, LWORK, IINFO ) +* + ELSE +* +* Q was determined by a call to SSYTRD with UPLO = 'L'. +* +* Shift the vectors which define the elementary reflectors one +* column to the right, and set the first row and column of Q to +* those of the unit matrix +* + DO 50 J = N, 2, -1 + A( 1, J ) = ZERO + DO 40 I = J + 1, N + A( I, J ) = A( I, J-1 ) + 40 CONTINUE + 50 CONTINUE + A( 1, 1 ) = ONE + DO 60 I = 2, N + A( I, 1 ) = ZERO + 60 CONTINUE + IF( N.GT.1 ) THEN +* +* Generate Q(2:n,2:n) +* + CALL SORGQR( N-1, N-1, N-1, A( 2, 2 ), LDA, TAU, WORK, + $ LWORK, IINFO ) + END IF + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of SORGTR +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sorm2r.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sorm2r.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,197 @@ + SUBROUTINE SORM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SORM2R overwrites the general real m by n matrix C with +* +* Q * C if SIDE = 'L' and TRANS = 'N', or +* +* Q'* C if SIDE = 'L' and TRANS = 'T', or +* +* C * Q if SIDE = 'R' and TRANS = 'N', or +* +* C * Q' if SIDE = 'R' and TRANS = 'T', +* +* where Q is a real orthogonal matrix defined as the product of k +* elementary reflectors +* +* Q = H(1) H(2) . . . H(k) +* +* as returned by SGEQRF. Q is of order m if SIDE = 'L' and of order n +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q' from the Left +* = 'R': apply Q or Q' from the Right +* +* TRANS (input) CHARACTER*1 +* = 'N': apply Q (No transpose) +* = 'T': apply Q' (Transpose) +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* A (input) REAL array, dimension (LDA,K) +* The i-th column must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* SGEQRF in the first k columns of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. +* If SIDE = 'L', LDA >= max(1,M); +* if SIDE = 'R', LDA >= max(1,N). +* +* TAU (input) REAL array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by SGEQRF. +* +* C (input/output) REAL array, dimension (LDC,N) +* On entry, the m by n matrix C. +* On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace) REAL array, dimension +* (N) if SIDE = 'L', +* (M) if SIDE = 'R' +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, NOTRAN + INTEGER I, I1, I2, I3, IC, JC, MI, NI, NQ + REAL AII +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SLARF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORM2R', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) + $ RETURN +* + IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. ( .NOT.LEFT .AND. NOTRAN ) ) + $ THEN + I1 = 1 + I2 = K + I3 = 1 + ELSE + I1 = K + I2 = 1 + I3 = -1 + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H(i) is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H(i) +* + AII = A( I, I ) + A( I, I ) = ONE + CALL SLARF( SIDE, MI, NI, A( I, I ), 1, TAU( I ), C( IC, JC ), + $ LDC, WORK ) + A( I, I ) = AII + 10 CONTINUE + RETURN +* +* End of SORM2R +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sormbr.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sormbr.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,282 @@ + SUBROUTINE SORMBR( VECT, SIDE, TRANS, M, N, K, A, LDA, TAU, C, + $ LDC, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS, VECT + INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), C( LDC, * ), TAU( * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* If VECT = 'Q', SORMBR overwrites the general real M-by-N matrix C +* with +* SIDE = 'L' SIDE = 'R' +* TRANS = 'N': Q * C C * Q +* TRANS = 'T': Q**T * C C * Q**T +* +* If VECT = 'P', SORMBR overwrites the general real M-by-N matrix C +* with +* SIDE = 'L' SIDE = 'R' +* TRANS = 'N': P * C C * P +* TRANS = 'T': P**T * C C * P**T +* +* Here Q and P**T are the orthogonal matrices determined by SGEBRD when +* reducing a real matrix A to bidiagonal form: A = Q * B * P**T. Q and +* P**T are defined as products of elementary reflectors H(i) and G(i) +* respectively. +* +* Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the +* order of the orthogonal matrix Q or P**T that is applied. +* +* If VECT = 'Q', A is assumed to have been an NQ-by-K matrix: +* if nq >= k, Q = H(1) H(2) . . . H(k); +* if nq < k, Q = H(1) H(2) . . . H(nq-1). +* +* If VECT = 'P', A is assumed to have been a K-by-NQ matrix: +* if k < nq, P = G(1) G(2) . . . G(k); +* if k >= nq, P = G(1) G(2) . . . G(nq-1). +* +* Arguments +* ========= +* +* VECT (input) CHARACTER*1 +* = 'Q': apply Q or Q**T; +* = 'P': apply P or P**T. +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q, Q**T, P or P**T from the Left; +* = 'R': apply Q, Q**T, P or P**T from the Right. +* +* TRANS (input) CHARACTER*1 +* = 'N': No transpose, apply Q or P; +* = 'T': Transpose, apply Q**T or P**T. +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* If VECT = 'Q', the number of columns in the original +* matrix reduced by SGEBRD. +* If VECT = 'P', the number of rows in the original +* matrix reduced by SGEBRD. +* K >= 0. +* +* A (input) REAL array, dimension +* (LDA,min(nq,K)) if VECT = 'Q' +* (LDA,nq) if VECT = 'P' +* The vectors which define the elementary reflectors H(i) and +* G(i), whose products determine the matrices Q and P, as +* returned by SGEBRD. +* +* LDA (input) INTEGER +* The leading dimension of the array A. +* If VECT = 'Q', LDA >= max(1,nq); +* if VECT = 'P', LDA >= max(1,min(nq,K)). +* +* TAU (input) REAL array, dimension (min(nq,K)) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i) or G(i) which determines Q or P, as returned +* by SGEBRD in the array argument TAUQ or TAUP. +* +* C (input/output) REAL array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q +* or P*C or P**T*C or C*P or C*P**T. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If SIDE = 'L', LWORK >= max(1,N); +* if SIDE = 'R', LWORK >= max(1,M). +* For optimum performance LWORK >= N*NB if SIDE = 'L', and +* LWORK >= M*NB if SIDE = 'R', where NB is the optimal +* blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL APPLYQ, LEFT, LQUERY, NOTRAN + CHARACTER TRANST + INTEGER I1, I2, IINFO, LWKOPT, MI, NB, NI, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL SORMLQ, SORMQR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + APPLYQ = LSAME( VECT, 'Q' ) + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q or P and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.APPLYQ .AND. .NOT.LSAME( VECT, 'P' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( K.LT.0 ) THEN + INFO = -6 + ELSE IF( ( APPLYQ .AND. LDA.LT.MAX( 1, NQ ) ) .OR. + $ ( .NOT.APPLYQ .AND. LDA.LT.MAX( 1, MIN( NQ, K ) ) ) ) + $ THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( APPLYQ ) THEN + IF( LEFT ) THEN + NB = ILAENV( 1, 'SORMQR', SIDE // TRANS, M-1, N, M-1, + $ -1 ) + ELSE + NB = ILAENV( 1, 'SORMQR', SIDE // TRANS, M, N-1, N-1, + $ -1 ) + END IF + ELSE + IF( LEFT ) THEN + NB = ILAENV( 1, 'SORMLQ', SIDE // TRANS, M-1, N, M-1, + $ -1 ) + ELSE + NB = ILAENV( 1, 'SORMLQ', SIDE // TRANS, M, N-1, N-1, + $ -1 ) + END IF + END IF + LWKOPT = MAX( 1, NW )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORMBR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + WORK( 1 ) = 1 + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* + IF( APPLYQ ) THEN +* +* Apply Q +* + IF( NQ.GE.K ) THEN +* +* Q was determined by a call to SGEBRD with nq >= k +* + CALL SORMQR( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, IINFO ) + ELSE IF( NQ.GT.1 ) THEN +* +* Q was determined by a call to SGEBRD with nq < k +* + IF( LEFT ) THEN + MI = M - 1 + NI = N + I1 = 2 + I2 = 1 + ELSE + MI = M + NI = N - 1 + I1 = 1 + I2 = 2 + END IF + CALL SORMQR( SIDE, TRANS, MI, NI, NQ-1, A( 2, 1 ), LDA, TAU, + $ C( I1, I2 ), LDC, WORK, LWORK, IINFO ) + END IF + ELSE +* +* Apply P +* + IF( NOTRAN ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF + IF( NQ.GT.K ) THEN +* +* P was determined by a call to SGEBRD with nq > k +* + CALL SORMLQ( SIDE, TRANST, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, IINFO ) + ELSE IF( NQ.GT.1 ) THEN +* +* P was determined by a call to SGEBRD with nq <= k +* + IF( LEFT ) THEN + MI = M - 1 + NI = N + I1 = 2 + I2 = 1 + ELSE + MI = M + NI = N - 1 + I1 = 1 + I2 = 2 + END IF + CALL SORMLQ( SIDE, TRANST, MI, NI, NQ-1, A( 1, 2 ), LDA, + $ TAU, C( I1, I2 ), LDC, WORK, LWORK, IINFO ) + END IF + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of SORMBR +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sorml2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sorml2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,197 @@ + SUBROUTINE SORML2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SORML2 overwrites the general real m by n matrix C with +* +* Q * C if SIDE = 'L' and TRANS = 'N', or +* +* Q'* C if SIDE = 'L' and TRANS = 'T', or +* +* C * Q if SIDE = 'R' and TRANS = 'N', or +* +* C * Q' if SIDE = 'R' and TRANS = 'T', +* +* where Q is a real orthogonal matrix defined as the product of k +* elementary reflectors +* +* Q = H(k) . . . H(2) H(1) +* +* as returned by SGELQF. Q is of order m if SIDE = 'L' and of order n +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q' from the Left +* = 'R': apply Q or Q' from the Right +* +* TRANS (input) CHARACTER*1 +* = 'N': apply Q (No transpose) +* = 'T': apply Q' (Transpose) +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* A (input) REAL array, dimension +* (LDA,M) if SIDE = 'L', +* (LDA,N) if SIDE = 'R' +* The i-th row must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* SGELQF in the first k rows of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,K). +* +* TAU (input) REAL array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by SGELQF. +* +* C (input/output) REAL array, dimension (LDC,N) +* On entry, the m by n matrix C. +* On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace) REAL array, dimension +* (N) if SIDE = 'L', +* (M) if SIDE = 'R' +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, NOTRAN + INTEGER I, I1, I2, I3, IC, JC, MI, NI, NQ + REAL AII +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SLARF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORML2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) + $ RETURN +* + IF( ( LEFT .AND. NOTRAN ) .OR. ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) + $ THEN + I1 = 1 + I2 = K + I3 = 1 + ELSE + I1 = K + I2 = 1 + I3 = -1 + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H(i) is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H(i) +* + AII = A( I, I ) + A( I, I ) = ONE + CALL SLARF( SIDE, MI, NI, A( I, I ), LDA, TAU( I ), + $ C( IC, JC ), LDC, WORK ) + A( I, I ) = AII + 10 CONTINUE + RETURN +* +* End of SORML2 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sormlq.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sormlq.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,268 @@ + SUBROUTINE SORMLQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), C( LDC, * ), TAU( * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* SORMLQ overwrites the general real M-by-N matrix C with +* +* SIDE = 'L' SIDE = 'R' +* TRANS = 'N': Q * C C * Q +* TRANS = 'T': Q**T * C C * Q**T +* +* where Q is a real orthogonal matrix defined as the product of k +* elementary reflectors +* +* Q = H(k) . . . H(2) H(1) +* +* as returned by SGELQF. Q is of order M if SIDE = 'L' and of order N +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q**T from the Left; +* = 'R': apply Q or Q**T from the Right. +* +* TRANS (input) CHARACTER*1 +* = 'N': No transpose, apply Q; +* = 'T': Transpose, apply Q**T. +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* A (input) REAL array, dimension +* (LDA,M) if SIDE = 'L', +* (LDA,N) if SIDE = 'R' +* The i-th row must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* SGELQF in the first k rows of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,K). +* +* TAU (input) REAL array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by SGELQF. +* +* C (input/output) REAL array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If SIDE = 'L', LWORK >= max(1,N); +* if SIDE = 'R', LWORK >= max(1,M). +* For optimum performance LWORK >= N*NB if SIDE = 'L', and +* LWORK >= M*NB if SIDE = 'R', where NB is the optimal +* blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT + PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + CHARACTER TRANST + INTEGER I, I1, I2, I3, IB, IC, IINFO, IWS, JC, LDWORK, + $ LWKOPT, MI, NB, NBMIN, NI, NQ, NW +* .. +* .. Local Arrays .. + REAL T( LDT, NBMAX ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL SLARFB, SLARFT, SORML2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size. NB may be at most NBMAX, where NBMAX +* is used to define the local array T. +* + NB = MIN( NBMAX, ILAENV( 1, 'SORMLQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + LWKOPT = MAX( 1, NW )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORMLQ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + LDWORK = NW + IF( NB.GT.1 .AND. NB.LT.K ) THEN + IWS = NW*NB + IF( LWORK.LT.IWS ) THEN + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'SORMLQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + END IF + ELSE + IWS = NW + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN +* +* Use unblocked code +* + CALL SORML2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, + $ IINFO ) + ELSE +* +* Use blocked code +* + IF( ( LEFT .AND. NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = NB + ELSE + I1 = ( ( K-1 ) / NB )*NB + 1 + I2 = 1 + I3 = -NB + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + IF( NOTRAN ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF +* + DO 10 I = I1, I2, I3 + IB = MIN( NB, K-I+1 ) +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL SLARFT( 'Forward', 'Rowwise', NQ-I+1, IB, A( I, I ), + $ LDA, TAU( I ), T, LDT ) + IF( LEFT ) THEN +* +* H or H' is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H or H' is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H or H' +* + CALL SLARFB( SIDE, TRANST, 'Forward', 'Rowwise', MI, NI, IB, + $ A( I, I ), LDA, T, LDT, C( IC, JC ), LDC, WORK, + $ LDWORK ) + 10 CONTINUE + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of SORMLQ +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sormqr.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sormqr.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,261 @@ + SUBROUTINE SORMQR( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), C( LDC, * ), TAU( * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* SORMQR overwrites the general real M-by-N matrix C with +* +* SIDE = 'L' SIDE = 'R' +* TRANS = 'N': Q * C C * Q +* TRANS = 'T': Q**T * C C * Q**T +* +* where Q is a real orthogonal matrix defined as the product of k +* elementary reflectors +* +* Q = H(1) H(2) . . . H(k) +* +* as returned by SGEQRF. Q is of order M if SIDE = 'L' and of order N +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q**T from the Left; +* = 'R': apply Q or Q**T from the Right. +* +* TRANS (input) CHARACTER*1 +* = 'N': No transpose, apply Q; +* = 'T': Transpose, apply Q**T. +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* A (input) REAL array, dimension (LDA,K) +* The i-th column must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* SGEQRF in the first k columns of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. +* If SIDE = 'L', LDA >= max(1,M); +* if SIDE = 'R', LDA >= max(1,N). +* +* TAU (input) REAL array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by SGEQRF. +* +* C (input/output) REAL array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If SIDE = 'L', LWORK >= max(1,N); +* if SIDE = 'R', LWORK >= max(1,M). +* For optimum performance LWORK >= N*NB if SIDE = 'L', and +* LWORK >= M*NB if SIDE = 'R', where NB is the optimal +* blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT + PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + INTEGER I, I1, I2, I3, IB, IC, IINFO, IWS, JC, LDWORK, + $ LWKOPT, MI, NB, NBMIN, NI, NQ, NW +* .. +* .. Local Arrays .. + REAL T( LDT, NBMAX ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL SLARFB, SLARFT, SORM2R, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size. NB may be at most NBMAX, where NBMAX +* is used to define the local array T. +* + NB = MIN( NBMAX, ILAENV( 1, 'SORMQR', SIDE // TRANS, M, N, K, + $ -1 ) ) + LWKOPT = MAX( 1, NW )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORMQR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + LDWORK = NW + IF( NB.GT.1 .AND. NB.LT.K ) THEN + IWS = NW*NB + IF( LWORK.LT.IWS ) THEN + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'SORMQR', SIDE // TRANS, M, N, K, + $ -1 ) ) + END IF + ELSE + IWS = NW + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN +* +* Use unblocked code +* + CALL SORM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, + $ IINFO ) + ELSE +* +* Use blocked code +* + IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = NB + ELSE + I1 = ( ( K-1 ) / NB )*NB + 1 + I2 = 1 + I3 = -NB + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + DO 10 I = I1, I2, I3 + IB = MIN( NB, K-I+1 ) +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL SLARFT( 'Forward', 'Columnwise', NQ-I+1, IB, A( I, I ), + $ LDA, TAU( I ), T, LDT ) + IF( LEFT ) THEN +* +* H or H' is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H or H' is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H or H' +* + CALL SLARFB( SIDE, TRANS, 'Forward', 'Columnwise', MI, NI, + $ IB, A( I, I ), LDA, T, LDT, C( IC, JC ), LDC, + $ WORK, LDWORK ) + 10 CONTINUE + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of SORMQR +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sormr3.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sormr3.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,206 @@ + SUBROUTINE SORMR3( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC, + $ WORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, L, LDA, LDC, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SORMR3 overwrites the general real m by n matrix C with +* +* Q * C if SIDE = 'L' and TRANS = 'N', or +* +* Q'* C if SIDE = 'L' and TRANS = 'T', or +* +* C * Q if SIDE = 'R' and TRANS = 'N', or +* +* C * Q' if SIDE = 'R' and TRANS = 'T', +* +* where Q is a real orthogonal matrix defined as the product of k +* elementary reflectors +* +* Q = H(1) H(2) . . . H(k) +* +* as returned by STZRZF. Q is of order m if SIDE = 'L' and of order n +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q' from the Left +* = 'R': apply Q or Q' from the Right +* +* TRANS (input) CHARACTER*1 +* = 'N': apply Q (No transpose) +* = 'T': apply Q' (Transpose) +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* L (input) INTEGER +* The number of columns of the matrix A containing +* the meaningful part of the Householder reflectors. +* If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. +* +* A (input) REAL array, dimension +* (LDA,M) if SIDE = 'L', +* (LDA,N) if SIDE = 'R' +* The i-th row must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* STZRZF in the last k rows of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,K). +* +* TAU (input) REAL array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by STZRZF. +* +* C (input/output) REAL array, dimension (LDC,N) +* On entry, the m-by-n matrix C. +* On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace) REAL array, dimension +* (N) if SIDE = 'L', +* (M) if SIDE = 'R' +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* Based on contributions by +* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LEFT, NOTRAN + INTEGER I, I1, I2, I3, IC, JA, JC, MI, NI, NQ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SLARZ, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( L.LT.0 .OR. ( LEFT .AND. ( L.GT.M ) ) .OR. + $ ( .NOT.LEFT .AND. ( L.GT.N ) ) ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORMR3', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) + $ RETURN +* + IF( ( LEFT .AND. .NOT.NOTRAN .OR. .NOT.LEFT .AND. NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = 1 + ELSE + I1 = K + I2 = 1 + I3 = -1 + END IF +* + IF( LEFT ) THEN + NI = N + JA = M - L + 1 + JC = 1 + ELSE + MI = M + JA = N - L + 1 + IC = 1 + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) or H(i)' is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H(i) or H(i)' is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H(i) or H(i)' +* + CALL SLARZ( SIDE, MI, NI, L, A( I, JA ), LDA, TAU( I ), + $ C( IC, JC ), LDC, WORK ) +* + 10 CONTINUE +* + RETURN +* +* End of SORMR3 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sormrz.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sormrz.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,292 @@ + SUBROUTINE SORMRZ( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.1.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* January 2007 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, L, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SORMRZ overwrites the general real M-by-N matrix C with +* +* SIDE = 'L' SIDE = 'R' +* TRANS = 'N': Q * C C * Q +* TRANS = 'T': Q**T * C C * Q**T +* +* where Q is a real orthogonal matrix defined as the product of k +* elementary reflectors +* +* Q = H(1) H(2) . . . H(k) +* +* as returned by STZRZF. Q is of order M if SIDE = 'L' and of order N +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q**T from the Left; +* = 'R': apply Q or Q**T from the Right. +* +* TRANS (input) CHARACTER*1 +* = 'N': No transpose, apply Q; +* = 'T': Transpose, apply Q**T. +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* L (input) INTEGER +* The number of columns of the matrix A containing +* the meaningful part of the Householder reflectors. +* If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. +* +* A (input) REAL array, dimension +* (LDA,M) if SIDE = 'L', +* (LDA,N) if SIDE = 'R' +* The i-th row must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* STZRZF in the last k rows of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,K). +* +* TAU (input) REAL array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by STZRZF. +* +* C (input/output) REAL array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If SIDE = 'L', LWORK >= max(1,N); +* if SIDE = 'R', LWORK >= max(1,M). +* For optimum performance LWORK >= N*NB if SIDE = 'L', and +* LWORK >= M*NB if SIDE = 'R', where NB is the optimal +* blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* Based on contributions by +* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT + PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + CHARACTER TRANST + INTEGER I, I1, I2, I3, IB, IC, IINFO, IWS, JA, JC, + $ LDWORK, LWKOPT, MI, NB, NBMIN, NI, NQ, NW +* .. +* .. Local Arrays .. + REAL T( LDT, NBMAX ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL SLARZB, SLARZT, SORMR3, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = MAX( 1, N ) + ELSE + NQ = N + NW = MAX( 1, M ) + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( L.LT.0 .OR. ( LEFT .AND. ( L.GT.M ) ) .OR. + $ ( .NOT.LEFT .AND. ( L.GT.N ) ) ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + LWKOPT = 1 + ELSE +* +* Determine the block size. NB may be at most NBMAX, where +* NBMAX is used to define the local array T. +* + NB = MIN( NBMAX, ILAENV( 1, 'SORMRQ', SIDE // TRANS, M, N, + $ K, -1 ) ) + LWKOPT = NW*NB + END IF + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORMRZ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + RETURN + END IF +* + NBMIN = 2 + LDWORK = NW + IF( NB.GT.1 .AND. NB.LT.K ) THEN + IWS = NW*NB + IF( LWORK.LT.IWS ) THEN + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'SORMRQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + END IF + ELSE + IWS = NW + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN +* +* Use unblocked code +* + CALL SORMR3( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC, + $ WORK, IINFO ) + ELSE +* +* Use blocked code +* + IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = NB + ELSE + I1 = ( ( K-1 ) / NB )*NB + 1 + I2 = 1 + I3 = -NB + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + JA = M - L + 1 + ELSE + MI = M + IC = 1 + JA = N - L + 1 + END IF +* + IF( NOTRAN ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF +* + DO 10 I = I1, I2, I3 + IB = MIN( NB, K-I+1 ) +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL SLARZT( 'Backward', 'Rowwise', L, IB, A( I, JA ), LDA, + $ TAU( I ), T, LDT ) +* + IF( LEFT ) THEN +* +* H or H' is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H or H' is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H or H' +* + CALL SLARZB( SIDE, TRANST, 'Backward', 'Rowwise', MI, NI, + $ IB, L, A( I, JA ), LDA, T, LDT, C( IC, JC ), + $ LDC, WORK, LDWORK ) + 10 CONTINUE +* + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of SORMRZ +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/spbcon.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/spbcon.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,192 @@ + SUBROUTINE SPBCON( UPLO, N, KD, AB, LDAB, ANORM, RCOND, WORK, + $ IWORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* Modified to call SLACN2 in place of SLACON, 7 Feb 03, SJH. +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL AB( LDAB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SPBCON estimates the reciprocal of the condition number (in the +* 1-norm) of a real symmetric positive definite band matrix using the +* Cholesky factorization A = U**T*U or A = L*L**T computed by SPBTRF. +* +* An estimate is obtained for norm(inv(A)), and the reciprocal of the +* condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangular factor stored in AB; +* = 'L': Lower triangular factor stored in AB. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KD >= 0. +* +* AB (input) REAL array, dimension (LDAB,N) +* The triangular factor U or L from the Cholesky factorization +* A = U**T*U or A = L*L**T of the band matrix A, stored in the +* first KD+1 rows of the array. The j-th column of U or L is +* stored in the j-th column of the array AB as follows: +* if UPLO ='U', AB(kd+1+i-j,j) = U(i,j) for max(1,j-kd)<=i<=j; +* if UPLO ='L', AB(1+i-j,j) = L(i,j) for j<=i<=min(n,j+kd). +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* ANORM (input) REAL +* The 1-norm (or infinity-norm) of the symmetric band matrix A. +* +* RCOND (output) REAL +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +* estimate of the 1-norm of inv(A) computed in this routine. +* +* WORK (workspace) REAL array, dimension (3*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + CHARACTER NORMIN + INTEGER IX, KASE + REAL AINVNM, SCALE, SCALEL, SCALEU, SMLNUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX + REAL SLAMCH + EXTERNAL LSAME, ISAMAX, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SLACN2, SLATBS, SRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -5 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPBCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* + SMLNUM = SLAMCH( 'Safe minimum' ) +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + NORMIN = 'N' + 10 CONTINUE + CALL SLACN2( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( UPPER ) THEN +* +* Multiply by inv(U'). +* + CALL SLATBS( 'Upper', 'Transpose', 'Non-unit', NORMIN, N, + $ KD, AB, LDAB, WORK, SCALEL, WORK( 2*N+1 ), + $ INFO ) + NORMIN = 'Y' +* +* Multiply by inv(U). +* + CALL SLATBS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N, + $ KD, AB, LDAB, WORK, SCALEU, WORK( 2*N+1 ), + $ INFO ) + ELSE +* +* Multiply by inv(L). +* + CALL SLATBS( 'Lower', 'No transpose', 'Non-unit', NORMIN, N, + $ KD, AB, LDAB, WORK, SCALEL, WORK( 2*N+1 ), + $ INFO ) + NORMIN = 'Y' +* +* Multiply by inv(L'). +* + CALL SLATBS( 'Lower', 'Transpose', 'Non-unit', NORMIN, N, + $ KD, AB, LDAB, WORK, SCALEU, WORK( 2*N+1 ), + $ INFO ) + END IF +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + SCALE = SCALEL*SCALEU + IF( SCALE.NE.ONE ) THEN + IX = ISAMAX( N, WORK, 1 ) + IF( SCALE.LT.ABS( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL SRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + 20 CONTINUE +* + RETURN +* +* End of SPBCON +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/spbtf2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/spbtf2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,194 @@ + SUBROUTINE SPBTF2( UPLO, N, KD, AB, LDAB, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, N +* .. +* .. Array Arguments .. + REAL AB( LDAB, * ) +* .. +* +* Purpose +* ======= +* +* SPBTF2 computes the Cholesky factorization of a real symmetric +* positive definite band matrix A. +* +* The factorization has the form +* A = U' * U , if UPLO = 'U', or +* A = L * L', if UPLO = 'L', +* where U is an upper triangular matrix, U' is the transpose of U, and +* L is lower triangular. +* +* This is the unblocked version of the algorithm, calling Level 2 BLAS. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* symmetric matrix A is stored: +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of super-diagonals of the matrix A if UPLO = 'U', +* or the number of sub-diagonals if UPLO = 'L'. KD >= 0. +* +* AB (input/output) REAL array, dimension (LDAB,N) +* On entry, the upper or lower triangle of the symmetric band +* matrix A, stored in the first KD+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* +* On exit, if INFO = 0, the triangular factor U or L from the +* Cholesky factorization A = U'*U or A = L*L' of the band +* matrix A, in the same storage format as A. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* > 0: if INFO = k, the leading minor of order k is not +* positive definite, and the factorization could not be +* completed. +* +* Further Details +* =============== +* +* The band storage scheme is illustrated by the following example, when +* N = 6, KD = 2, and UPLO = 'U': +* +* On entry: On exit: +* +* * * a13 a24 a35 a46 * * u13 u24 u35 u46 +* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +* +* Similarly, if UPLO = 'L' the format of A is as follows: +* +* On entry: On exit: +* +* a11 a22 a33 a44 a55 a66 l11 l22 l33 l44 l55 l66 +* a21 a32 a43 a54 a65 * l21 l32 l43 l54 l65 * +* a31 a42 a53 a64 * * l31 l42 l53 l64 * * +* +* Array elements marked * are not used by the routine. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, KLD, KN + REAL AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SSCAL, SSYR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPBTF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + KLD = MAX( 1, LDAB-1 ) +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization A = U'*U. +* + DO 10 J = 1, N +* +* Compute U(J,J) and test for non-positive-definiteness. +* + AJJ = AB( KD+1, J ) + IF( AJJ.LE.ZERO ) + $ GO TO 30 + AJJ = SQRT( AJJ ) + AB( KD+1, J ) = AJJ +* +* Compute elements J+1:J+KN of row J and update the +* trailing submatrix within the band. +* + KN = MIN( KD, N-J ) + IF( KN.GT.0 ) THEN + CALL SSCAL( KN, ONE / AJJ, AB( KD, J+1 ), KLD ) + CALL SSYR( 'Upper', KN, -ONE, AB( KD, J+1 ), KLD, + $ AB( KD+1, J+1 ), KLD ) + END IF + 10 CONTINUE + ELSE +* +* Compute the Cholesky factorization A = L*L'. +* + DO 20 J = 1, N +* +* Compute L(J,J) and test for non-positive-definiteness. +* + AJJ = AB( 1, J ) + IF( AJJ.LE.ZERO ) + $ GO TO 30 + AJJ = SQRT( AJJ ) + AB( 1, J ) = AJJ +* +* Compute elements J+1:J+KN of column J and update the +* trailing submatrix within the band. +* + KN = MIN( KD, N-J ) + IF( KN.GT.0 ) THEN + CALL SSCAL( KN, ONE / AJJ, AB( 2, J ), 1 ) + CALL SSYR( 'Lower', KN, -ONE, AB( 2, J ), 1, + $ AB( 1, J+1 ), KLD ) + END IF + 20 CONTINUE + END IF + RETURN +* + 30 CONTINUE + INFO = J + RETURN +* +* End of SPBTF2 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/spbtrf.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/spbtrf.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,364 @@ + SUBROUTINE SPBTRF( UPLO, N, KD, AB, LDAB, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, N +* .. +* .. Array Arguments .. + REAL AB( LDAB, * ) +* .. +* +* Purpose +* ======= +* +* SPBTRF computes the Cholesky factorization of a real symmetric +* positive definite band matrix A. +* +* The factorization has the form +* A = U**T * U, if UPLO = 'U', or +* A = L * L**T, if UPLO = 'L', +* where U is an upper triangular matrix and L is lower triangular. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KD >= 0. +* +* AB (input/output) REAL array, dimension (LDAB,N) +* On entry, the upper or lower triangle of the symmetric band +* matrix A, stored in the first KD+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* +* On exit, if INFO = 0, the triangular factor U or L from the +* Cholesky factorization A = U**T*U or A = L*L**T of the band +* matrix A, in the same storage format as A. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the leading minor of order i is not +* positive definite, and the factorization could not be +* completed. +* +* Further Details +* =============== +* +* The band storage scheme is illustrated by the following example, when +* N = 6, KD = 2, and UPLO = 'U': +* +* On entry: On exit: +* +* * * a13 a24 a35 a46 * * u13 u24 u35 u46 +* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +* +* Similarly, if UPLO = 'L' the format of A is as follows: +* +* On entry: On exit: +* +* a11 a22 a33 a44 a55 a66 l11 l22 l33 l44 l55 l66 +* a21 a32 a43 a54 a65 * l21 l32 l43 l54 l65 * +* a31 a42 a53 a64 * * l31 l42 l53 l64 * * +* +* Array elements marked * are not used by the routine. +* +* Contributed by +* Peter Mayes and Giuseppe Radicati, IBM ECSEC, Rome, March 23, 1989 +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) + INTEGER NBMAX, LDWORK + PARAMETER ( NBMAX = 32, LDWORK = NBMAX+1 ) +* .. +* .. Local Scalars .. + INTEGER I, I2, I3, IB, II, J, JJ, NB +* .. +* .. Local Arrays .. + REAL WORK( LDWORK, NBMAX ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL SGEMM, SPBTF2, SPOTF2, SSYRK, STRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( ( .NOT.LSAME( UPLO, 'U' ) ) .AND. + $ ( .NOT.LSAME( UPLO, 'L' ) ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPBTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment +* + NB = ILAENV( 1, 'SPBTRF', UPLO, N, KD, -1, -1 ) +* +* The block size must not exceed the semi-bandwidth KD, and must not +* exceed the limit set by the size of the local array WORK. +* + NB = MIN( NB, NBMAX ) +* + IF( NB.LE.1 .OR. NB.GT.KD ) THEN +* +* Use unblocked code +* + CALL SPBTF2( UPLO, N, KD, AB, LDAB, INFO ) + ELSE +* +* Use blocked code +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Compute the Cholesky factorization of a symmetric band +* matrix, given the upper triangle of the matrix in band +* storage. +* +* Zero the upper triangle of the work array. +* + DO 20 J = 1, NB + DO 10 I = 1, J - 1 + WORK( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE +* +* Process the band matrix one diagonal block at a time. +* + DO 70 I = 1, N, NB + IB = MIN( NB, N-I+1 ) +* +* Factorize the diagonal block +* + CALL SPOTF2( UPLO, IB, AB( KD+1, I ), LDAB-1, II ) + IF( II.NE.0 ) THEN + INFO = I + II - 1 + GO TO 150 + END IF + IF( I+IB.LE.N ) THEN +* +* Update the relevant part of the trailing submatrix. +* If A11 denotes the diagonal block which has just been +* factorized, then we need to update the remaining +* blocks in the diagram: +* +* A11 A12 A13 +* A22 A23 +* A33 +* +* The numbers of rows and columns in the partitioning +* are IB, I2, I3 respectively. The blocks A12, A22 and +* A23 are empty if IB = KD. The upper triangle of A13 +* lies outside the band. +* + I2 = MIN( KD-IB, N-I-IB+1 ) + I3 = MIN( IB, N-I-KD+1 ) +* + IF( I2.GT.0 ) THEN +* +* Update A12 +* + CALL STRSM( 'Left', 'Upper', 'Transpose', + $ 'Non-unit', IB, I2, ONE, AB( KD+1, I ), + $ LDAB-1, AB( KD+1-IB, I+IB ), LDAB-1 ) +* +* Update A22 +* + CALL SSYRK( 'Upper', 'Transpose', I2, IB, -ONE, + $ AB( KD+1-IB, I+IB ), LDAB-1, ONE, + $ AB( KD+1, I+IB ), LDAB-1 ) + END IF +* + IF( I3.GT.0 ) THEN +* +* Copy the lower triangle of A13 into the work array. +* + DO 40 JJ = 1, I3 + DO 30 II = JJ, IB + WORK( II, JJ ) = AB( II-JJ+1, JJ+I+KD-1 ) + 30 CONTINUE + 40 CONTINUE +* +* Update A13 (in the work array). +* + CALL STRSM( 'Left', 'Upper', 'Transpose', + $ 'Non-unit', IB, I3, ONE, AB( KD+1, I ), + $ LDAB-1, WORK, LDWORK ) +* +* Update A23 +* + IF( I2.GT.0 ) + $ CALL SGEMM( 'Transpose', 'No Transpose', I2, I3, + $ IB, -ONE, AB( KD+1-IB, I+IB ), + $ LDAB-1, WORK, LDWORK, ONE, + $ AB( 1+IB, I+KD ), LDAB-1 ) +* +* Update A33 +* + CALL SSYRK( 'Upper', 'Transpose', I3, IB, -ONE, + $ WORK, LDWORK, ONE, AB( KD+1, I+KD ), + $ LDAB-1 ) +* +* Copy the lower triangle of A13 back into place. +* + DO 60 JJ = 1, I3 + DO 50 II = JJ, IB + AB( II-JJ+1, JJ+I+KD-1 ) = WORK( II, JJ ) + 50 CONTINUE + 60 CONTINUE + END IF + END IF + 70 CONTINUE + ELSE +* +* Compute the Cholesky factorization of a symmetric band +* matrix, given the lower triangle of the matrix in band +* storage. +* +* Zero the lower triangle of the work array. +* + DO 90 J = 1, NB + DO 80 I = J + 1, NB + WORK( I, J ) = ZERO + 80 CONTINUE + 90 CONTINUE +* +* Process the band matrix one diagonal block at a time. +* + DO 140 I = 1, N, NB + IB = MIN( NB, N-I+1 ) +* +* Factorize the diagonal block +* + CALL SPOTF2( UPLO, IB, AB( 1, I ), LDAB-1, II ) + IF( II.NE.0 ) THEN + INFO = I + II - 1 + GO TO 150 + END IF + IF( I+IB.LE.N ) THEN +* +* Update the relevant part of the trailing submatrix. +* If A11 denotes the diagonal block which has just been +* factorized, then we need to update the remaining +* blocks in the diagram: +* +* A11 +* A21 A22 +* A31 A32 A33 +* +* The numbers of rows and columns in the partitioning +* are IB, I2, I3 respectively. The blocks A21, A22 and +* A32 are empty if IB = KD. The lower triangle of A31 +* lies outside the band. +* + I2 = MIN( KD-IB, N-I-IB+1 ) + I3 = MIN( IB, N-I-KD+1 ) +* + IF( I2.GT.0 ) THEN +* +* Update A21 +* + CALL STRSM( 'Right', 'Lower', 'Transpose', + $ 'Non-unit', I2, IB, ONE, AB( 1, I ), + $ LDAB-1, AB( 1+IB, I ), LDAB-1 ) +* +* Update A22 +* + CALL SSYRK( 'Lower', 'No Transpose', I2, IB, -ONE, + $ AB( 1+IB, I ), LDAB-1, ONE, + $ AB( 1, I+IB ), LDAB-1 ) + END IF +* + IF( I3.GT.0 ) THEN +* +* Copy the upper triangle of A31 into the work array. +* + DO 110 JJ = 1, IB + DO 100 II = 1, MIN( JJ, I3 ) + WORK( II, JJ ) = AB( KD+1-JJ+II, JJ+I-1 ) + 100 CONTINUE + 110 CONTINUE +* +* Update A31 (in the work array). +* + CALL STRSM( 'Right', 'Lower', 'Transpose', + $ 'Non-unit', I3, IB, ONE, AB( 1, I ), + $ LDAB-1, WORK, LDWORK ) +* +* Update A32 +* + IF( I2.GT.0 ) + $ CALL SGEMM( 'No transpose', 'Transpose', I3, I2, + $ IB, -ONE, WORK, LDWORK, + $ AB( 1+IB, I ), LDAB-1, ONE, + $ AB( 1+KD-IB, I+IB ), LDAB-1 ) +* +* Update A33 +* + CALL SSYRK( 'Lower', 'No Transpose', I3, IB, -ONE, + $ WORK, LDWORK, ONE, AB( 1, I+KD ), + $ LDAB-1 ) +* +* Copy the upper triangle of A31 back into place. +* + DO 130 JJ = 1, IB + DO 120 II = 1, MIN( JJ, I3 ) + AB( KD+1-JJ+II, JJ+I-1 ) = WORK( II, JJ ) + 120 CONTINUE + 130 CONTINUE + END IF + END IF + 140 CONTINUE + END IF + END IF + RETURN +* + 150 CONTINUE + RETURN +* +* End of SPBTRF +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/spbtrs.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/spbtrs.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,145 @@ + SUBROUTINE SPBTRS( UPLO, N, KD, NRHS, AB, LDAB, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. + REAL AB( LDAB, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* SPBTRS solves a system of linear equations A*X = B with a symmetric +* positive definite band matrix A using the Cholesky factorization +* A = U**T*U or A = L*L**T computed by SPBTRF. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangular factor stored in AB; +* = 'L': Lower triangular factor stored in AB. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KD >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* AB (input) REAL array, dimension (LDAB,N) +* The triangular factor U or L from the Cholesky factorization +* A = U**T*U or A = L*L**T of the band matrix A, stored in the +* first KD+1 rows of the array. The j-th column of U or L is +* stored in the j-th column of the array AB as follows: +* if UPLO ='U', AB(kd+1+i-j,j) = U(i,j) for max(1,j-kd)<=i<=j; +* if UPLO ='L', AB(1+i-j,j) = L(i,j) for j<=i<=min(n,j+kd). +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL STBSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPBTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B where A = U'*U. +* + DO 10 J = 1, NRHS +* +* Solve U'*X = B, overwriting B with X. +* + CALL STBSV( 'Upper', 'Transpose', 'Non-unit', N, KD, AB, + $ LDAB, B( 1, J ), 1 ) +* +* Solve U*X = B, overwriting B with X. +* + CALL STBSV( 'Upper', 'No transpose', 'Non-unit', N, KD, AB, + $ LDAB, B( 1, J ), 1 ) + 10 CONTINUE + ELSE +* +* Solve A*X = B where A = L*L'. +* + DO 20 J = 1, NRHS +* +* Solve L*X = B, overwriting B with X. +* + CALL STBSV( 'Lower', 'No transpose', 'Non-unit', N, KD, AB, + $ LDAB, B( 1, J ), 1 ) +* +* Solve L'*X = B, overwriting B with X. +* + CALL STBSV( 'Lower', 'Transpose', 'Non-unit', N, KD, AB, + $ LDAB, B( 1, J ), 1 ) + 20 CONTINUE + END IF +* + RETURN +* +* End of SPBTRS +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/spocon.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/spocon.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,177 @@ + SUBROUTINE SPOCON( UPLO, N, A, LDA, ANORM, RCOND, WORK, IWORK, + $ INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* Modified to call SLACN2 in place of SLACON, 7 Feb 03, SJH. +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SPOCON estimates the reciprocal of the condition number (in the +* 1-norm) of a real symmetric positive definite matrix using the +* Cholesky factorization A = U**T*U or A = L*L**T computed by SPOTRF. +* +* An estimate is obtained for norm(inv(A)), and the reciprocal of the +* condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input) REAL array, dimension (LDA,N) +* The triangular factor U or L from the Cholesky factorization +* A = U**T*U or A = L*L**T, as computed by SPOTRF. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* ANORM (input) REAL +* The 1-norm (or infinity-norm) of the symmetric matrix A. +* +* RCOND (output) REAL +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +* estimate of the 1-norm of inv(A) computed in this routine. +* +* WORK (workspace) REAL array, dimension (3*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + CHARACTER NORMIN + INTEGER IX, KASE + REAL AINVNM, SCALE, SCALEL, SCALEU, SMLNUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX + REAL SLAMCH + EXTERNAL LSAME, ISAMAX, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SLACN2, SLATRS, SRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPOCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* + SMLNUM = SLAMCH( 'Safe minimum' ) +* +* Estimate the 1-norm of inv(A). +* + KASE = 0 + NORMIN = 'N' + 10 CONTINUE + CALL SLACN2( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( UPPER ) THEN +* +* Multiply by inv(U'). +* + CALL SLATRS( 'Upper', 'Transpose', 'Non-unit', NORMIN, N, A, + $ LDA, WORK, SCALEL, WORK( 2*N+1 ), INFO ) + NORMIN = 'Y' +* +* Multiply by inv(U). +* + CALL SLATRS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N, + $ A, LDA, WORK, SCALEU, WORK( 2*N+1 ), INFO ) + ELSE +* +* Multiply by inv(L). +* + CALL SLATRS( 'Lower', 'No transpose', 'Non-unit', NORMIN, N, + $ A, LDA, WORK, SCALEL, WORK( 2*N+1 ), INFO ) + NORMIN = 'Y' +* +* Multiply by inv(L'). +* + CALL SLATRS( 'Lower', 'Transpose', 'Non-unit', NORMIN, N, A, + $ LDA, WORK, SCALEU, WORK( 2*N+1 ), INFO ) + END IF +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + SCALE = SCALEL*SCALEU + IF( SCALE.NE.ONE ) THEN + IX = ISAMAX( N, WORK, 1 ) + IF( SCALE.LT.ABS( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL SRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + 20 CONTINUE + RETURN +* +* End of SPOCON +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/spotri.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/spotri.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,96 @@ + SUBROUTINE SPOTRI( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* SPOTRI computes the inverse of a real symmetric positive definite +* matrix A using the Cholesky factorization A = U**T*U or A = L*L**T +* computed by SPOTRF. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the triangular factor U or L from the Cholesky +* factorization A = U**T*U or A = L*L**T, as computed by +* SPOTRF. +* On exit, the upper or lower triangle of the (symmetric) +* inverse of A, overwriting the input factor U or L. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the (i,i) element of the factor U or L is +* zero, and the inverse could not be computed. +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SLAUUM, STRTRI, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPOTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Invert the triangular Cholesky factor U or L. +* + CALL STRTRI( UPLO, 'Non-unit', N, A, LDA, INFO ) + IF( INFO.GT.0 ) + $ RETURN +* +* Form inv(U)*inv(U)' or inv(L)'*inv(L). +* + CALL SLAUUM( UPLO, N, A, LDA, INFO ) +* + RETURN +* +* End of SPOTRI +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/spotrs.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/spotrs.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,132 @@ + SUBROUTINE SPOTRS( UPLO, N, NRHS, A, LDA, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* SPOTRS solves a system of linear equations A*X = B with a symmetric +* positive definite matrix A using the Cholesky factorization +* A = U**T*U or A = L*L**T computed by SPOTRF. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* A (input) REAL array, dimension (LDA,N) +* The triangular factor U or L from the Cholesky factorization +* A = U**T*U or A = L*L**T, as computed by SPOTRF. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL STRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPOTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B where A = U'*U. +* +* Solve U'*X = B, overwriting B with X. +* + CALL STRSM( 'Left', 'Upper', 'Transpose', 'Non-unit', N, NRHS, + $ ONE, A, LDA, B, LDB ) +* +* Solve U*X = B, overwriting B with X. +* + CALL STRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N, + $ NRHS, ONE, A, LDA, B, LDB ) + ELSE +* +* Solve A*X = B where A = L*L'. +* +* Solve L*X = B, overwriting B with X. +* + CALL STRSM( 'Left', 'Lower', 'No transpose', 'Non-unit', N, + $ NRHS, ONE, A, LDA, B, LDB ) +* +* Solve L'*X = B, overwriting B with X. +* + CALL STRSM( 'Left', 'Lower', 'Transpose', 'Non-unit', N, NRHS, + $ ONE, A, LDA, B, LDB ) + END IF +* + RETURN +* +* End of SPOTRS +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sptsv.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sptsv.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,99 @@ + SUBROUTINE SPTSV( N, NRHS, D, E, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + REAL B( LDB, * ), D( * ), E( * ) +* .. +* +* Purpose +* ======= +* +* SPTSV computes the solution to a real system of linear equations +* A*X = B, where A is an N-by-N symmetric positive definite tridiagonal +* matrix, and X and B are N-by-NRHS matrices. +* +* A is factored as A = L*D*L**T, and the factored form of A is then +* used to solve the system of equations. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* D (input/output) REAL array, dimension (N) +* On entry, the n diagonal elements of the tridiagonal matrix +* A. On exit, the n diagonal elements of the diagonal matrix +* D from the factorization A = L*D*L**T. +* +* E (input/output) REAL array, dimension (N-1) +* On entry, the (n-1) subdiagonal elements of the tridiagonal +* matrix A. On exit, the (n-1) subdiagonal elements of the +* unit bidiagonal factor L from the L*D*L**T factorization of +* A. (E can also be regarded as the superdiagonal of the unit +* bidiagonal factor U from the U**T*D*U factorization of A.) +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS right hand side matrix B. +* On exit, if INFO = 0, the N-by-NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the leading minor of order i is not +* positive definite, and the solution has not been +* computed. The factorization has not been completed +* unless i = N. +* +* ===================================================================== +* +* .. External Subroutines .. + EXTERNAL SPTTRF, SPTTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -2 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPTSV ', -INFO ) + RETURN + END IF +* +* Compute the L*D*L' (or U'*D*U) factorization of A. +* + CALL SPTTRF( N, D, E, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL SPTTRS( N, NRHS, D, E, B, LDB, INFO ) + END IF + RETURN +* +* End of SPTSV +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/spttrf.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/spttrf.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,152 @@ + SUBROUTINE SPTTRF( N, D, E, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, N +* .. +* .. Array Arguments .. + REAL D( * ), E( * ) +* .. +* +* Purpose +* ======= +* +* SPTTRF computes the L*D*L' factorization of a real symmetric +* positive definite tridiagonal matrix A. The factorization may also +* be regarded as having the form A = U'*D*U. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* D (input/output) REAL array, dimension (N) +* On entry, the n diagonal elements of the tridiagonal matrix +* A. On exit, the n diagonal elements of the diagonal matrix +* D from the L*D*L' factorization of A. +* +* E (input/output) REAL array, dimension (N-1) +* On entry, the (n-1) subdiagonal elements of the tridiagonal +* matrix A. On exit, the (n-1) subdiagonal elements of the +* unit bidiagonal factor L from the L*D*L' factorization of A. +* E can also be regarded as the superdiagonal of the unit +* bidiagonal factor U from the U'*D*U factorization of A. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* > 0: if INFO = k, the leading minor of order k is not +* positive definite; if k < N, the factorization could not +* be completed, while if k = N, the factorization was +* completed, but D(N) <= 0. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, I4 + REAL EI +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MOD +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + CALL XERBLA( 'SPTTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Compute the L*D*L' (or U'*D*U) factorization of A. +* + I4 = MOD( N-1, 4 ) + DO 10 I = 1, I4 + IF( D( I ).LE.ZERO ) THEN + INFO = I + GO TO 30 + END IF + EI = E( I ) + E( I ) = EI / D( I ) + D( I+1 ) = D( I+1 ) - E( I )*EI + 10 CONTINUE +* + DO 20 I = I4 + 1, N - 4, 4 +* +* Drop out of the loop if d(i) <= 0: the matrix is not positive +* definite. +* + IF( D( I ).LE.ZERO ) THEN + INFO = I + GO TO 30 + END IF +* +* Solve for e(i) and d(i+1). +* + EI = E( I ) + E( I ) = EI / D( I ) + D( I+1 ) = D( I+1 ) - E( I )*EI +* + IF( D( I+1 ).LE.ZERO ) THEN + INFO = I + 1 + GO TO 30 + END IF +* +* Solve for e(i+1) and d(i+2). +* + EI = E( I+1 ) + E( I+1 ) = EI / D( I+1 ) + D( I+2 ) = D( I+2 ) - E( I+1 )*EI +* + IF( D( I+2 ).LE.ZERO ) THEN + INFO = I + 2 + GO TO 30 + END IF +* +* Solve for e(i+2) and d(i+3). +* + EI = E( I+2 ) + E( I+2 ) = EI / D( I+2 ) + D( I+3 ) = D( I+3 ) - E( I+2 )*EI +* + IF( D( I+3 ).LE.ZERO ) THEN + INFO = I + 3 + GO TO 30 + END IF +* +* Solve for e(i+3) and d(i+4). +* + EI = E( I+3 ) + E( I+3 ) = EI / D( I+3 ) + D( I+4 ) = D( I+4 ) - E( I+3 )*EI + 20 CONTINUE +* +* Check d(n) for positive definiteness. +* + IF( D( N ).LE.ZERO ) + $ INFO = N +* + 30 CONTINUE + RETURN +* +* End of SPTTRF +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/spttrs.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/spttrs.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,114 @@ + SUBROUTINE SPTTRS( N, NRHS, D, E, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + REAL B( LDB, * ), D( * ), E( * ) +* .. +* +* Purpose +* ======= +* +* SPTTRS solves a tridiagonal system of the form +* A * X = B +* using the L*D*L' factorization of A computed by SPTTRF. D is a +* diagonal matrix specified in the vector D, L is a unit bidiagonal +* matrix whose subdiagonal is specified in the vector E, and X and B +* are N by NRHS matrices. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the tridiagonal matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* D (input) REAL array, dimension (N) +* The n diagonal elements of the diagonal matrix D from the +* L*D*L' factorization of A. +* +* E (input) REAL array, dimension (N-1) +* The (n-1) subdiagonal elements of the unit bidiagonal factor +* L from the L*D*L' factorization of A. E can also be regarded +* as the superdiagonal of the unit bidiagonal factor U from the +* factorization A = U'*D*U. +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the right hand side vectors B for the system of +* linear equations. +* On exit, the solution vectors, X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER J, JB, NB +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. External Subroutines .. + EXTERNAL SPTTS2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -2 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPTTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* +* Determine the number of right-hand sides to solve at a time. +* + IF( NRHS.EQ.1 ) THEN + NB = 1 + ELSE + NB = MAX( 1, ILAENV( 1, 'SPTTRS', ' ', N, NRHS, -1, -1 ) ) + END IF +* + IF( NB.GE.NRHS ) THEN + CALL SPTTS2( N, NRHS, D, E, B, LDB ) + ELSE + DO 10 J = 1, NRHS, NB + JB = MIN( NRHS-J+1, NB ) + CALL SPTTS2( N, JB, D, E, B( 1, J ), LDB ) + 10 CONTINUE + END IF +* + RETURN +* +* End of SPTTRS +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/sptts2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/sptts2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,93 @@ + SUBROUTINE SPTTS2( N, NRHS, D, E, B, LDB ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER LDB, N, NRHS +* .. +* .. Array Arguments .. + REAL B( LDB, * ), D( * ), E( * ) +* .. +* +* Purpose +* ======= +* +* SPTTS2 solves a tridiagonal system of the form +* A * X = B +* using the L*D*L' factorization of A computed by SPTTRF. D is a +* diagonal matrix specified in the vector D, L is a unit bidiagonal +* matrix whose subdiagonal is specified in the vector E, and X and B +* are N by NRHS matrices. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the tridiagonal matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* D (input) REAL array, dimension (N) +* The n diagonal elements of the diagonal matrix D from the +* L*D*L' factorization of A. +* +* E (input) REAL array, dimension (N-1) +* The (n-1) subdiagonal elements of the unit bidiagonal factor +* L from the L*D*L' factorization of A. E can also be regarded +* as the superdiagonal of the unit bidiagonal factor U from the +* factorization A = U'*D*U. +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the right hand side vectors B for the system of +* linear equations. +* On exit, the solution vectors, X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J +* .. +* .. External Subroutines .. + EXTERNAL SSCAL +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.1 ) THEN + IF( N.EQ.1 ) + $ CALL SSCAL( NRHS, 1. / D( 1 ), B, LDB ) + RETURN + END IF +* +* Solve A * X = B using the factorization A = L*D*L', +* overwriting each right hand side vector with its solution. +* + DO 30 J = 1, NRHS +* +* Solve L * x = b. +* + DO 10 I = 2, N + B( I, J ) = B( I, J ) - B( I-1, J )*E( I-1 ) + 10 CONTINUE +* +* Solve D * L' * x = b. +* + B( N, J ) = B( N, J ) / D( N ) + DO 20 I = N - 1, 1, -1 + B( I, J ) = B( I, J ) / D( I ) - B( I+1, J )*E( I ) + 20 CONTINUE + 30 CONTINUE +* + RETURN +* +* End of SPTTS2 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/srscl.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/srscl.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,114 @@ + SUBROUTINE SRSCL( N, SA, SX, INCX ) +* +* -- LAPACK auxiliary routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INCX, N + REAL SA +* .. +* .. Array Arguments .. + REAL SX( * ) +* .. +* +* Purpose +* ======= +* +* SRSCL multiplies an n-element real vector x by the real scalar 1/a. +* This is done without overflow or underflow as long as +* the final result x/a does not overflow or underflow. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of components of the vector x. +* +* SA (input) REAL +* The scalar a which is used to divide each component of x. +* SA must be >= 0, or the subroutine will divide by zero. +* +* SX (input/output) REAL array, dimension +* (1+(N-1)*abs(INCX)) +* The n-element vector x. +* +* INCX (input) INTEGER +* The increment between successive values of the vector SX. +* > 0: SX(1) = X(1) and SX(1+(i-1)*INCX) = x(i), 1< i<= n +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL DONE + REAL BIGNUM, CDEN, CDEN1, CNUM, CNUM1, MUL, SMLNUM +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SLABAD, SSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* +* Get machine parameters +* + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* +* Initialize the denominator to SA and the numerator to 1. +* + CDEN = SA + CNUM = ONE +* + 10 CONTINUE + CDEN1 = CDEN*SMLNUM + CNUM1 = CNUM / BIGNUM + IF( ABS( CDEN1 ).GT.ABS( CNUM ) .AND. CNUM.NE.ZERO ) THEN +* +* Pre-multiply X by SMLNUM if CDEN is large compared to CNUM. +* + MUL = SMLNUM + DONE = .FALSE. + CDEN = CDEN1 + ELSE IF( ABS( CNUM1 ).GT.ABS( CDEN ) ) THEN +* +* Pre-multiply X by BIGNUM if CDEN is small compared to CNUM. +* + MUL = BIGNUM + DONE = .FALSE. + CNUM = CNUM1 + ELSE +* +* Multiply X by CNUM / CDEN and return. +* + MUL = CNUM / CDEN + DONE = .TRUE. + END IF +* +* Scale the vector X by MUL +* + CALL SSCAL( N, MUL, SX, INCX ) +* + IF( .NOT.DONE ) + $ GO TO 10 +* + RETURN +* +* End of SRSCL +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/ssteqr.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/ssteqr.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,500 @@ + SUBROUTINE SSTEQR( COMPZ, N, D, E, Z, LDZ, WORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER COMPZ + INTEGER INFO, LDZ, N +* .. +* .. Array Arguments .. + REAL D( * ), E( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* SSTEQR computes all eigenvalues and, optionally, eigenvectors of a +* symmetric tridiagonal matrix using the implicit QL or QR method. +* The eigenvectors of a full or band symmetric matrix can also be found +* if SSYTRD or SSPTRD or SSBTRD has been used to reduce this matrix to +* tridiagonal form. +* +* Arguments +* ========= +* +* COMPZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only. +* = 'V': Compute eigenvalues and eigenvectors of the original +* symmetric matrix. On entry, Z must contain the +* orthogonal matrix used to reduce the original matrix +* to tridiagonal form. +* = 'I': Compute eigenvalues and eigenvectors of the +* tridiagonal matrix. Z is initialized to the identity +* matrix. +* +* N (input) INTEGER +* The order of the matrix. N >= 0. +* +* D (input/output) REAL array, dimension (N) +* On entry, the diagonal elements of the tridiagonal matrix. +* On exit, if INFO = 0, the eigenvalues in ascending order. +* +* E (input/output) REAL array, dimension (N-1) +* On entry, the (n-1) subdiagonal elements of the tridiagonal +* matrix. +* On exit, E has been destroyed. +* +* Z (input/output) REAL array, dimension (LDZ, N) +* On entry, if COMPZ = 'V', then Z contains the orthogonal +* matrix used in the reduction to tridiagonal form. +* On exit, if INFO = 0, then if COMPZ = 'V', Z contains the +* orthonormal eigenvectors of the original symmetric matrix, +* and if COMPZ = 'I', Z contains the orthonormal eigenvectors +* of the symmetric tridiagonal matrix. +* If COMPZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* eigenvectors are desired, then LDZ >= max(1,N). +* +* WORK (workspace) REAL array, dimension (max(1,2*N-2)) +* If COMPZ = 'N', then WORK is not referenced. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: the algorithm has failed to find all the eigenvalues in +* a total of 30*N iterations; if INFO = i, then i +* elements of E have not converged to zero; on exit, D +* and E contain the elements of a symmetric tridiagonal +* matrix which is orthogonally similar to the original +* matrix. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO, THREE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0, + $ THREE = 3.0E0 ) + INTEGER MAXIT + PARAMETER ( MAXIT = 30 ) +* .. +* .. Local Scalars .. + INTEGER I, ICOMPZ, II, ISCALE, J, JTOT, K, L, L1, LEND, + $ LENDM1, LENDP1, LENDSV, LM1, LSV, M, MM, MM1, + $ NM1, NMAXIT + REAL ANORM, B, C, EPS, EPS2, F, G, P, R, RT1, RT2, + $ S, SAFMAX, SAFMIN, SSFMAX, SSFMIN, TST +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANST, SLAPY2 + EXTERNAL LSAME, SLAMCH, SLANST, SLAPY2 +* .. +* .. External Subroutines .. + EXTERNAL SLAE2, SLAEV2, SLARTG, SLASCL, SLASET, SLASR, + $ SLASRT, SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( LSAME( COMPZ, 'N' ) ) THEN + ICOMPZ = 0 + ELSE IF( LSAME( COMPZ, 'V' ) ) THEN + ICOMPZ = 1 + ELSE IF( LSAME( COMPZ, 'I' ) ) THEN + ICOMPZ = 2 + ELSE + ICOMPZ = -1 + END IF + IF( ICOMPZ.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ( LDZ.LT.1 ) .OR. ( ICOMPZ.GT.0 .AND. LDZ.LT.MAX( 1, + $ N ) ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSTEQR', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( ICOMPZ.EQ.2 ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Determine the unit roundoff and over/underflow thresholds. +* + EPS = SLAMCH( 'E' ) + EPS2 = EPS**2 + SAFMIN = SLAMCH( 'S' ) + SAFMAX = ONE / SAFMIN + SSFMAX = SQRT( SAFMAX ) / THREE + SSFMIN = SQRT( SAFMIN ) / EPS2 +* +* Compute the eigenvalues and eigenvectors of the tridiagonal +* matrix. +* + IF( ICOMPZ.EQ.2 ) + $ CALL SLASET( 'Full', N, N, ZERO, ONE, Z, LDZ ) +* + NMAXIT = N*MAXIT + JTOT = 0 +* +* Determine where the matrix splits and choose QL or QR iteration +* for each block, according to whether top or bottom diagonal +* element is smaller. +* + L1 = 1 + NM1 = N - 1 +* + 10 CONTINUE + IF( L1.GT.N ) + $ GO TO 160 + IF( L1.GT.1 ) + $ E( L1-1 ) = ZERO + IF( L1.LE.NM1 ) THEN + DO 20 M = L1, NM1 + TST = ABS( E( M ) ) + IF( TST.EQ.ZERO ) + $ GO TO 30 + IF( TST.LE.( SQRT( ABS( D( M ) ) )*SQRT( ABS( D( M+ + $ 1 ) ) ) )*EPS ) THEN + E( M ) = ZERO + GO TO 30 + END IF + 20 CONTINUE + END IF + M = N +* + 30 CONTINUE + L = L1 + LSV = L + LEND = M + LENDSV = LEND + L1 = M + 1 + IF( LEND.EQ.L ) + $ GO TO 10 +* +* Scale submatrix in rows and columns L to LEND +* + ANORM = SLANST( 'I', LEND-L+1, D( L ), E( L ) ) + ISCALE = 0 + IF( ANORM.EQ.ZERO ) + $ GO TO 10 + IF( ANORM.GT.SSFMAX ) THEN + ISCALE = 1 + CALL SLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L+1, 1, D( L ), N, + $ INFO ) + CALL SLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L, 1, E( L ), N, + $ INFO ) + ELSE IF( ANORM.LT.SSFMIN ) THEN + ISCALE = 2 + CALL SLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L+1, 1, D( L ), N, + $ INFO ) + CALL SLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L, 1, E( L ), N, + $ INFO ) + END IF +* +* Choose between QL and QR iteration +* + IF( ABS( D( LEND ) ).LT.ABS( D( L ) ) ) THEN + LEND = LSV + L = LENDSV + END IF +* + IF( LEND.GT.L ) THEN +* +* QL Iteration +* +* Look for small subdiagonal element. +* + 40 CONTINUE + IF( L.NE.LEND ) THEN + LENDM1 = LEND - 1 + DO 50 M = L, LENDM1 + TST = ABS( E( M ) )**2 + IF( TST.LE.( EPS2*ABS( D( M ) ) )*ABS( D( M+1 ) )+ + $ SAFMIN )GO TO 60 + 50 CONTINUE + END IF +* + M = LEND +* + 60 CONTINUE + IF( M.LT.LEND ) + $ E( M ) = ZERO + P = D( L ) + IF( M.EQ.L ) + $ GO TO 80 +* +* If remaining matrix is 2-by-2, use SLAE2 or SLAEV2 +* to compute its eigensystem. +* + IF( M.EQ.L+1 ) THEN + IF( ICOMPZ.GT.0 ) THEN + CALL SLAEV2( D( L ), E( L ), D( L+1 ), RT1, RT2, C, S ) + WORK( L ) = C + WORK( N-1+L ) = S + CALL SLASR( 'R', 'V', 'B', N, 2, WORK( L ), + $ WORK( N-1+L ), Z( 1, L ), LDZ ) + ELSE + CALL SLAE2( D( L ), E( L ), D( L+1 ), RT1, RT2 ) + END IF + D( L ) = RT1 + D( L+1 ) = RT2 + E( L ) = ZERO + L = L + 2 + IF( L.LE.LEND ) + $ GO TO 40 + GO TO 140 + END IF +* + IF( JTOT.EQ.NMAXIT ) + $ GO TO 140 + JTOT = JTOT + 1 +* +* Form shift. +* + G = ( D( L+1 )-P ) / ( TWO*E( L ) ) + R = SLAPY2( G, ONE ) + G = D( M ) - P + ( E( L ) / ( G+SIGN( R, G ) ) ) +* + S = ONE + C = ONE + P = ZERO +* +* Inner loop +* + MM1 = M - 1 + DO 70 I = MM1, L, -1 + F = S*E( I ) + B = C*E( I ) + CALL SLARTG( G, F, C, S, R ) + IF( I.NE.M-1 ) + $ E( I+1 ) = R + G = D( I+1 ) - P + R = ( D( I )-G )*S + TWO*C*B + P = S*R + D( I+1 ) = G + P + G = C*R - B +* +* If eigenvectors are desired, then save rotations. +* + IF( ICOMPZ.GT.0 ) THEN + WORK( I ) = C + WORK( N-1+I ) = -S + END IF +* + 70 CONTINUE +* +* If eigenvectors are desired, then apply saved rotations. +* + IF( ICOMPZ.GT.0 ) THEN + MM = M - L + 1 + CALL SLASR( 'R', 'V', 'B', N, MM, WORK( L ), WORK( N-1+L ), + $ Z( 1, L ), LDZ ) + END IF +* + D( L ) = D( L ) - P + E( L ) = G + GO TO 40 +* +* Eigenvalue found. +* + 80 CONTINUE + D( L ) = P +* + L = L + 1 + IF( L.LE.LEND ) + $ GO TO 40 + GO TO 140 +* + ELSE +* +* QR Iteration +* +* Look for small superdiagonal element. +* + 90 CONTINUE + IF( L.NE.LEND ) THEN + LENDP1 = LEND + 1 + DO 100 M = L, LENDP1, -1 + TST = ABS( E( M-1 ) )**2 + IF( TST.LE.( EPS2*ABS( D( M ) ) )*ABS( D( M-1 ) )+ + $ SAFMIN )GO TO 110 + 100 CONTINUE + END IF +* + M = LEND +* + 110 CONTINUE + IF( M.GT.LEND ) + $ E( M-1 ) = ZERO + P = D( L ) + IF( M.EQ.L ) + $ GO TO 130 +* +* If remaining matrix is 2-by-2, use SLAE2 or SLAEV2 +* to compute its eigensystem. +* + IF( M.EQ.L-1 ) THEN + IF( ICOMPZ.GT.0 ) THEN + CALL SLAEV2( D( L-1 ), E( L-1 ), D( L ), RT1, RT2, C, S ) + WORK( M ) = C + WORK( N-1+M ) = S + CALL SLASR( 'R', 'V', 'F', N, 2, WORK( M ), + $ WORK( N-1+M ), Z( 1, L-1 ), LDZ ) + ELSE + CALL SLAE2( D( L-1 ), E( L-1 ), D( L ), RT1, RT2 ) + END IF + D( L-1 ) = RT1 + D( L ) = RT2 + E( L-1 ) = ZERO + L = L - 2 + IF( L.GE.LEND ) + $ GO TO 90 + GO TO 140 + END IF +* + IF( JTOT.EQ.NMAXIT ) + $ GO TO 140 + JTOT = JTOT + 1 +* +* Form shift. +* + G = ( D( L-1 )-P ) / ( TWO*E( L-1 ) ) + R = SLAPY2( G, ONE ) + G = D( M ) - P + ( E( L-1 ) / ( G+SIGN( R, G ) ) ) +* + S = ONE + C = ONE + P = ZERO +* +* Inner loop +* + LM1 = L - 1 + DO 120 I = M, LM1 + F = S*E( I ) + B = C*E( I ) + CALL SLARTG( G, F, C, S, R ) + IF( I.NE.M ) + $ E( I-1 ) = R + G = D( I ) - P + R = ( D( I+1 )-G )*S + TWO*C*B + P = S*R + D( I ) = G + P + G = C*R - B +* +* If eigenvectors are desired, then save rotations. +* + IF( ICOMPZ.GT.0 ) THEN + WORK( I ) = C + WORK( N-1+I ) = S + END IF +* + 120 CONTINUE +* +* If eigenvectors are desired, then apply saved rotations. +* + IF( ICOMPZ.GT.0 ) THEN + MM = L - M + 1 + CALL SLASR( 'R', 'V', 'F', N, MM, WORK( M ), WORK( N-1+M ), + $ Z( 1, M ), LDZ ) + END IF +* + D( L ) = D( L ) - P + E( LM1 ) = G + GO TO 90 +* +* Eigenvalue found. +* + 130 CONTINUE + D( L ) = P +* + L = L - 1 + IF( L.GE.LEND ) + $ GO TO 90 + GO TO 140 +* + END IF +* +* Undo scaling if necessary +* + 140 CONTINUE + IF( ISCALE.EQ.1 ) THEN + CALL SLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV+1, 1, + $ D( LSV ), N, INFO ) + CALL SLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV, 1, E( LSV ), + $ N, INFO ) + ELSE IF( ISCALE.EQ.2 ) THEN + CALL SLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV+1, 1, + $ D( LSV ), N, INFO ) + CALL SLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV, 1, E( LSV ), + $ N, INFO ) + END IF +* +* Check for no convergence to an eigenvalue after a total +* of N*MAXIT iterations. +* + IF( JTOT.LT.NMAXIT ) + $ GO TO 10 + DO 150 I = 1, N - 1 + IF( E( I ).NE.ZERO ) + $ INFO = INFO + 1 + 150 CONTINUE + GO TO 190 +* +* Order eigenvalues and eigenvectors. +* + 160 CONTINUE + IF( ICOMPZ.EQ.0 ) THEN +* +* Use Quick Sort +* + CALL SLASRT( 'I', N, D, INFO ) +* + ELSE +* +* Use Selection Sort to minimize swaps of eigenvectors +* + DO 180 II = 2, N + I = II - 1 + K = I + P = D( I ) + DO 170 J = II, N + IF( D( J ).LT.P ) THEN + K = J + P = D( J ) + END IF + 170 CONTINUE + IF( K.NE.I ) THEN + D( K ) = D( I ) + D( I ) = P + CALL SSWAP( N, Z( 1, I ), 1, Z( 1, K ), 1 ) + END IF + 180 CONTINUE + END IF +* + 190 CONTINUE + RETURN +* +* End of SSTEQR +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/ssterf.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/ssterf.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,364 @@ + SUBROUTINE SSTERF( N, D, E, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, N +* .. +* .. Array Arguments .. + REAL D( * ), E( * ) +* .. +* +* Purpose +* ======= +* +* SSTERF computes all eigenvalues of a symmetric tridiagonal matrix +* using the Pal-Walker-Kahan variant of the QL or QR algorithm. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix. N >= 0. +* +* D (input/output) REAL array, dimension (N) +* On entry, the n diagonal elements of the tridiagonal matrix. +* On exit, if INFO = 0, the eigenvalues in ascending order. +* +* E (input/output) REAL array, dimension (N-1) +* On entry, the (n-1) subdiagonal elements of the tridiagonal +* matrix. +* On exit, E has been destroyed. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: the algorithm failed to find all of the eigenvalues in +* a total of 30*N iterations; if INFO = i, then i +* elements of E have not converged to zero. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO, THREE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0, + $ THREE = 3.0E0 ) + INTEGER MAXIT + PARAMETER ( MAXIT = 30 ) +* .. +* .. Local Scalars .. + INTEGER I, ISCALE, JTOT, L, L1, LEND, LENDSV, LSV, M, + $ NMAXIT + REAL ALPHA, ANORM, BB, C, EPS, EPS2, GAMMA, OLDC, + $ OLDGAM, P, R, RT1, RT2, RTE, S, SAFMAX, SAFMIN, + $ SIGMA, SSFMAX, SSFMIN +* .. +* .. External Functions .. + REAL SLAMCH, SLANST, SLAPY2 + EXTERNAL SLAMCH, SLANST, SLAPY2 +* .. +* .. External Subroutines .. + EXTERNAL SLAE2, SLASCL, SLASRT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* +* Quick return if possible +* + IF( N.LT.0 ) THEN + INFO = -1 + CALL XERBLA( 'SSTERF', -INFO ) + RETURN + END IF + IF( N.LE.1 ) + $ RETURN +* +* Determine the unit roundoff for this environment. +* + EPS = SLAMCH( 'E' ) + EPS2 = EPS**2 + SAFMIN = SLAMCH( 'S' ) + SAFMAX = ONE / SAFMIN + SSFMAX = SQRT( SAFMAX ) / THREE + SSFMIN = SQRT( SAFMIN ) / EPS2 +* +* Compute the eigenvalues of the tridiagonal matrix. +* + NMAXIT = N*MAXIT + SIGMA = ZERO + JTOT = 0 +* +* Determine where the matrix splits and choose QL or QR iteration +* for each block, according to whether top or bottom diagonal +* element is smaller. +* + L1 = 1 +* + 10 CONTINUE + IF( L1.GT.N ) + $ GO TO 170 + IF( L1.GT.1 ) + $ E( L1-1 ) = ZERO + DO 20 M = L1, N - 1 + IF( ABS( E( M ) ).LE.( SQRT( ABS( D( M ) ) )* + $ SQRT( ABS( D( M+1 ) ) ) )*EPS ) THEN + E( M ) = ZERO + GO TO 30 + END IF + 20 CONTINUE + M = N +* + 30 CONTINUE + L = L1 + LSV = L + LEND = M + LENDSV = LEND + L1 = M + 1 + IF( LEND.EQ.L ) + $ GO TO 10 +* +* Scale submatrix in rows and columns L to LEND +* + ANORM = SLANST( 'I', LEND-L+1, D( L ), E( L ) ) + ISCALE = 0 + IF( ANORM.GT.SSFMAX ) THEN + ISCALE = 1 + CALL SLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L+1, 1, D( L ), N, + $ INFO ) + CALL SLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L, 1, E( L ), N, + $ INFO ) + ELSE IF( ANORM.LT.SSFMIN ) THEN + ISCALE = 2 + CALL SLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L+1, 1, D( L ), N, + $ INFO ) + CALL SLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L, 1, E( L ), N, + $ INFO ) + END IF +* + DO 40 I = L, LEND - 1 + E( I ) = E( I )**2 + 40 CONTINUE +* +* Choose between QL and QR iteration +* + IF( ABS( D( LEND ) ).LT.ABS( D( L ) ) ) THEN + LEND = LSV + L = LENDSV + END IF +* + IF( LEND.GE.L ) THEN +* +* QL Iteration +* +* Look for small subdiagonal element. +* + 50 CONTINUE + IF( L.NE.LEND ) THEN + DO 60 M = L, LEND - 1 + IF( ABS( E( M ) ).LE.EPS2*ABS( D( M )*D( M+1 ) ) ) + $ GO TO 70 + 60 CONTINUE + END IF + M = LEND +* + 70 CONTINUE + IF( M.LT.LEND ) + $ E( M ) = ZERO + P = D( L ) + IF( M.EQ.L ) + $ GO TO 90 +* +* If remaining matrix is 2 by 2, use SLAE2 to compute its +* eigenvalues. +* + IF( M.EQ.L+1 ) THEN + RTE = SQRT( E( L ) ) + CALL SLAE2( D( L ), RTE, D( L+1 ), RT1, RT2 ) + D( L ) = RT1 + D( L+1 ) = RT2 + E( L ) = ZERO + L = L + 2 + IF( L.LE.LEND ) + $ GO TO 50 + GO TO 150 + END IF +* + IF( JTOT.EQ.NMAXIT ) + $ GO TO 150 + JTOT = JTOT + 1 +* +* Form shift. +* + RTE = SQRT( E( L ) ) + SIGMA = ( D( L+1 )-P ) / ( TWO*RTE ) + R = SLAPY2( SIGMA, ONE ) + SIGMA = P - ( RTE / ( SIGMA+SIGN( R, SIGMA ) ) ) +* + C = ONE + S = ZERO + GAMMA = D( M ) - SIGMA + P = GAMMA*GAMMA +* +* Inner loop +* + DO 80 I = M - 1, L, -1 + BB = E( I ) + R = P + BB + IF( I.NE.M-1 ) + $ E( I+1 ) = S*R + OLDC = C + C = P / R + S = BB / R + OLDGAM = GAMMA + ALPHA = D( I ) + GAMMA = C*( ALPHA-SIGMA ) - S*OLDGAM + D( I+1 ) = OLDGAM + ( ALPHA-GAMMA ) + IF( C.NE.ZERO ) THEN + P = ( GAMMA*GAMMA ) / C + ELSE + P = OLDC*BB + END IF + 80 CONTINUE +* + E( L ) = S*P + D( L ) = SIGMA + GAMMA + GO TO 50 +* +* Eigenvalue found. +* + 90 CONTINUE + D( L ) = P +* + L = L + 1 + IF( L.LE.LEND ) + $ GO TO 50 + GO TO 150 +* + ELSE +* +* QR Iteration +* +* Look for small superdiagonal element. +* + 100 CONTINUE + DO 110 M = L, LEND + 1, -1 + IF( ABS( E( M-1 ) ).LE.EPS2*ABS( D( M )*D( M-1 ) ) ) + $ GO TO 120 + 110 CONTINUE + M = LEND +* + 120 CONTINUE + IF( M.GT.LEND ) + $ E( M-1 ) = ZERO + P = D( L ) + IF( M.EQ.L ) + $ GO TO 140 +* +* If remaining matrix is 2 by 2, use SLAE2 to compute its +* eigenvalues. +* + IF( M.EQ.L-1 ) THEN + RTE = SQRT( E( L-1 ) ) + CALL SLAE2( D( L ), RTE, D( L-1 ), RT1, RT2 ) + D( L ) = RT1 + D( L-1 ) = RT2 + E( L-1 ) = ZERO + L = L - 2 + IF( L.GE.LEND ) + $ GO TO 100 + GO TO 150 + END IF +* + IF( JTOT.EQ.NMAXIT ) + $ GO TO 150 + JTOT = JTOT + 1 +* +* Form shift. +* + RTE = SQRT( E( L-1 ) ) + SIGMA = ( D( L-1 )-P ) / ( TWO*RTE ) + R = SLAPY2( SIGMA, ONE ) + SIGMA = P - ( RTE / ( SIGMA+SIGN( R, SIGMA ) ) ) +* + C = ONE + S = ZERO + GAMMA = D( M ) - SIGMA + P = GAMMA*GAMMA +* +* Inner loop +* + DO 130 I = M, L - 1 + BB = E( I ) + R = P + BB + IF( I.NE.M ) + $ E( I-1 ) = S*R + OLDC = C + C = P / R + S = BB / R + OLDGAM = GAMMA + ALPHA = D( I+1 ) + GAMMA = C*( ALPHA-SIGMA ) - S*OLDGAM + D( I ) = OLDGAM + ( ALPHA-GAMMA ) + IF( C.NE.ZERO ) THEN + P = ( GAMMA*GAMMA ) / C + ELSE + P = OLDC*BB + END IF + 130 CONTINUE +* + E( L-1 ) = S*P + D( L ) = SIGMA + GAMMA + GO TO 100 +* +* Eigenvalue found. +* + 140 CONTINUE + D( L ) = P +* + L = L - 1 + IF( L.GE.LEND ) + $ GO TO 100 + GO TO 150 +* + END IF +* +* Undo scaling if necessary +* + 150 CONTINUE + IF( ISCALE.EQ.1 ) + $ CALL SLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV+1, 1, + $ D( LSV ), N, INFO ) + IF( ISCALE.EQ.2 ) + $ CALL SLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV+1, 1, + $ D( LSV ), N, INFO ) +* +* Check for no convergence to an eigenvalue after a total +* of N*MAXIT iterations. +* + IF( JTOT.LT.NMAXIT ) + $ GO TO 10 + DO 160 I = 1, N - 1 + IF( E( I ).NE.ZERO ) + $ INFO = INFO + 1 + 160 CONTINUE + GO TO 180 +* +* Sort eigenvalues in increasing order. +* + 170 CONTINUE + CALL SLASRT( 'I', N, D, INFO ) +* + 180 CONTINUE + RETURN +* +* End of SSTERF +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/ssyev.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/ssyev.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,211 @@ + SUBROUTINE SSYEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), W( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SSYEV computes all eigenvalues and, optionally, eigenvectors of a +* real symmetric matrix A. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA, N) +* On entry, the symmetric matrix A. If UPLO = 'U', the +* leading N-by-N upper triangular part of A contains the +* upper triangular part of the matrix A. If UPLO = 'L', +* the leading N-by-N lower triangular part of A contains +* the lower triangular part of the matrix A. +* On exit, if JOBZ = 'V', then if INFO = 0, A contains the +* orthonormal eigenvectors of the matrix A. +* If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') +* or the upper triangle (if UPLO='U') of A, including the +* diagonal, is destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* W (output) REAL array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of the array WORK. LWORK >= max(1,3*N-1). +* For optimal efficiency, LWORK >= (NB+2)*N, +* where NB is the blocksize for SSYTRD returned by ILAENV. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the algorithm failed to converge; i +* off-diagonal elements of an intermediate tridiagonal +* form did not converge to zero. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, IMAX, INDE, INDTAU, INDWRK, ISCALE, + $ LLWORK, LWKOPT, NB + REAL ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANSY + EXTERNAL ILAENV, LSAME, SLAMCH, SLANSY +* .. +* .. External Subroutines .. + EXTERNAL SLASCL, SORGTR, SSCAL, SSTEQR, SSTERF, SSYTRD, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'SSYTRD', UPLO, N, -1, -1, -1 ) + LWKOPT = MAX( 1, ( NB+2 )*N ) + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.MAX( 1, 3*N-1 ) .AND. .NOT.LQUERY ) + $ INFO = -8 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYEV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + RETURN + END IF +* + IF( N.EQ.1 ) THEN + W( 1 ) = A( 1, 1 ) + WORK( 1 ) = 2 + IF( WANTZ ) + $ A( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = SLANSY( 'M', UPLO, N, A, LDA, WORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) + $ CALL SLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO ) +* +* Call SSYTRD to reduce symmetric matrix to tridiagonal form. +* + INDE = 1 + INDTAU = INDE + N + INDWRK = INDTAU + N + LLWORK = LWORK - INDWRK + 1 + CALL SSYTRD( UPLO, N, A, LDA, W, WORK( INDE ), WORK( INDTAU ), + $ WORK( INDWRK ), LLWORK, IINFO ) +* +* For eigenvalues only, call SSTERF. For eigenvectors, first call +* SORGTR to generate the orthogonal matrix, then call SSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, WORK( INDE ), INFO ) + ELSE + CALL SORGTR( UPLO, N, A, LDA, WORK( INDTAU ), WORK( INDWRK ), + $ LLWORK, IINFO ) + CALL SSTEQR( JOBZ, N, W, WORK( INDE ), A, LDA, WORK( INDTAU ), + $ INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of SSYEV +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/ssytd2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/ssytd2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,247 @@ + SUBROUTINE SSYTD2( UPLO, N, A, LDA, D, E, TAU, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), D( * ), E( * ), TAU( * ) +* .. +* +* Purpose +* ======= +* +* SSYTD2 reduces a real symmetric matrix A to symmetric tridiagonal +* form T by an orthogonal similarity transformation: Q' * A * Q = T. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* symmetric matrix A is stored: +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the symmetric matrix A. If UPLO = 'U', the leading +* n-by-n upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading n-by-n lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* On exit, if UPLO = 'U', the diagonal and first superdiagonal +* of A are overwritten by the corresponding elements of the +* tridiagonal matrix T, and the elements above the first +* superdiagonal, with the array TAU, represent the orthogonal +* matrix Q as a product of elementary reflectors; if UPLO +* = 'L', the diagonal and first subdiagonal of A are over- +* written by the corresponding elements of the tridiagonal +* matrix T, and the elements below the first subdiagonal, with +* the array TAU, represent the orthogonal matrix Q as a product +* of elementary reflectors. See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* D (output) REAL array, dimension (N) +* The diagonal elements of the tridiagonal matrix T: +* D(i) = A(i,i). +* +* E (output) REAL array, dimension (N-1) +* The off-diagonal elements of the tridiagonal matrix T: +* E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. +* +* TAU (output) REAL array, dimension (N-1) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* If UPLO = 'U', the matrix Q is represented as a product of elementary +* reflectors +* +* Q = H(n-1) . . . H(2) H(1). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in +* A(1:i-1,i+1), and tau in TAU(i). +* +* If UPLO = 'L', the matrix Q is represented as a product of elementary +* reflectors +* +* Q = H(1) H(2) . . . H(n-1). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), +* and tau in TAU(i). +* +* The contents of A on exit are illustrated by the following examples +* with n = 5: +* +* if UPLO = 'U': if UPLO = 'L': +* +* ( d e v2 v3 v4 ) ( d ) +* ( d e v3 v4 ) ( e d ) +* ( d e v4 ) ( v1 e d ) +* ( d e ) ( v1 v2 e d ) +* ( d ) ( v1 v2 v3 e d ) +* +* where d and e denote diagonal and off-diagonal elements of T, and vi +* denotes an element of the vector defining H(i). +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO, HALF + PARAMETER ( ONE = 1.0, ZERO = 0.0, HALF = 1.0 / 2.0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I + REAL ALPHA, TAUI +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SLARFG, SSYMV, SSYR2, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SDOT + EXTERNAL LSAME, SDOT +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYTD2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Reduce the upper triangle of A +* + DO 10 I = N - 1, 1, -1 +* +* Generate elementary reflector H(i) = I - tau * v * v' +* to annihilate A(1:i-1,i+1) +* + CALL SLARFG( I, A( I, I+1 ), A( 1, I+1 ), 1, TAUI ) + E( I ) = A( I, I+1 ) +* + IF( TAUI.NE.ZERO ) THEN +* +* Apply H(i) from both sides to A(1:i,1:i) +* + A( I, I+1 ) = ONE +* +* Compute x := tau * A * v storing x in TAU(1:i) +* + CALL SSYMV( UPLO, I, TAUI, A, LDA, A( 1, I+1 ), 1, ZERO, + $ TAU, 1 ) +* +* Compute w := x - 1/2 * tau * (x'*v) * v +* + ALPHA = -HALF*TAUI*SDOT( I, TAU, 1, A( 1, I+1 ), 1 ) + CALL SAXPY( I, ALPHA, A( 1, I+1 ), 1, TAU, 1 ) +* +* Apply the transformation as a rank-2 update: +* A := A - v * w' - w * v' +* + CALL SSYR2( UPLO, I, -ONE, A( 1, I+1 ), 1, TAU, 1, A, + $ LDA ) +* + A( I, I+1 ) = E( I ) + END IF + D( I+1 ) = A( I+1, I+1 ) + TAU( I ) = TAUI + 10 CONTINUE + D( 1 ) = A( 1, 1 ) + ELSE +* +* Reduce the lower triangle of A +* + DO 20 I = 1, N - 1 +* +* Generate elementary reflector H(i) = I - tau * v * v' +* to annihilate A(i+2:n,i) +* + CALL SLARFG( N-I, A( I+1, I ), A( MIN( I+2, N ), I ), 1, + $ TAUI ) + E( I ) = A( I+1, I ) +* + IF( TAUI.NE.ZERO ) THEN +* +* Apply H(i) from both sides to A(i+1:n,i+1:n) +* + A( I+1, I ) = ONE +* +* Compute x := tau * A * v storing y in TAU(i:n-1) +* + CALL SSYMV( UPLO, N-I, TAUI, A( I+1, I+1 ), LDA, + $ A( I+1, I ), 1, ZERO, TAU( I ), 1 ) +* +* Compute w := x - 1/2 * tau * (x'*v) * v +* + ALPHA = -HALF*TAUI*SDOT( N-I, TAU( I ), 1, A( I+1, I ), + $ 1 ) + CALL SAXPY( N-I, ALPHA, A( I+1, I ), 1, TAU( I ), 1 ) +* +* Apply the transformation as a rank-2 update: +* A := A - v * w' - w * v' +* + CALL SSYR2( UPLO, N-I, -ONE, A( I+1, I ), 1, TAU( I ), 1, + $ A( I+1, I+1 ), LDA ) +* + A( I+1, I ) = E( I ) + END IF + D( I ) = A( I, I ) + TAU( I ) = TAUI + 20 CONTINUE + D( N ) = A( N, N ) + END IF +* + RETURN +* +* End of SSYTD2 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/ssytrd.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/ssytrd.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,294 @@ + SUBROUTINE SSYTRD( UPLO, N, A, LDA, D, E, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), D( * ), E( * ), TAU( * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* SSYTRD reduces a real symmetric matrix A to real symmetric +* tridiagonal form T by an orthogonal similarity transformation: +* Q**T * A * Q = T. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the symmetric matrix A. If UPLO = 'U', the leading +* N-by-N upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading N-by-N lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* On exit, if UPLO = 'U', the diagonal and first superdiagonal +* of A are overwritten by the corresponding elements of the +* tridiagonal matrix T, and the elements above the first +* superdiagonal, with the array TAU, represent the orthogonal +* matrix Q as a product of elementary reflectors; if UPLO +* = 'L', the diagonal and first subdiagonal of A are over- +* written by the corresponding elements of the tridiagonal +* matrix T, and the elements below the first subdiagonal, with +* the array TAU, represent the orthogonal matrix Q as a product +* of elementary reflectors. See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* D (output) REAL array, dimension (N) +* The diagonal elements of the tridiagonal matrix T: +* D(i) = A(i,i). +* +* E (output) REAL array, dimension (N-1) +* The off-diagonal elements of the tridiagonal matrix T: +* E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. +* +* TAU (output) REAL array, dimension (N-1) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= 1. +* For optimum performance LWORK >= N*NB, where NB is the +* optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* If UPLO = 'U', the matrix Q is represented as a product of elementary +* reflectors +* +* Q = H(n-1) . . . H(2) H(1). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in +* A(1:i-1,i+1), and tau in TAU(i). +* +* If UPLO = 'L', the matrix Q is represented as a product of elementary +* reflectors +* +* Q = H(1) H(2) . . . H(n-1). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), +* and tau in TAU(i). +* +* The contents of A on exit are illustrated by the following examples +* with n = 5: +* +* if UPLO = 'U': if UPLO = 'L': +* +* ( d e v2 v3 v4 ) ( d ) +* ( d e v3 v4 ) ( e d ) +* ( d e v4 ) ( v1 e d ) +* ( d e ) ( v1 v2 e d ) +* ( d ) ( v1 v2 v3 e d ) +* +* where d and e denote diagonal and off-diagonal elements of T, and vi +* denotes an element of the vector defining H(i). +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER I, IINFO, IWS, J, KK, LDWORK, LWKOPT, NB, + $ NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL SLATRD, SSYR2K, SSYTD2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -9 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size. +* + NB = ILAENV( 1, 'SSYTRD', UPLO, N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYTRD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NX = N + IWS = 1 + IF( NB.GT.1 .AND. NB.LT.N ) THEN +* +* Determine when to cross over from blocked to unblocked code +* (last block is always handled by unblocked code). +* + NX = MAX( NB, ILAENV( 3, 'SSYTRD', UPLO, N, -1, -1, -1 ) ) + IF( NX.LT.N ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: determine the +* minimum value of NB, and reduce NB or force use of +* unblocked code by setting NX = N. +* + NB = MAX( LWORK / LDWORK, 1 ) + NBMIN = ILAENV( 2, 'SSYTRD', UPLO, N, -1, -1, -1 ) + IF( NB.LT.NBMIN ) + $ NX = N + END IF + ELSE + NX = N + END IF + ELSE + NB = 1 + END IF +* + IF( UPPER ) THEN +* +* Reduce the upper triangle of A. +* Columns 1:kk are handled by the unblocked method. +* + KK = N - ( ( N-NX+NB-1 ) / NB )*NB + DO 20 I = N - NB + 1, KK + 1, -NB +* +* Reduce columns i:i+nb-1 to tridiagonal form and form the +* matrix W which is needed to update the unreduced part of +* the matrix +* + CALL SLATRD( UPLO, I+NB-1, NB, A, LDA, E, TAU, WORK, + $ LDWORK ) +* +* Update the unreduced submatrix A(1:i-1,1:i-1), using an +* update of the form: A := A - V*W' - W*V' +* + CALL SSYR2K( UPLO, 'No transpose', I-1, NB, -ONE, A( 1, I ), + $ LDA, WORK, LDWORK, ONE, A, LDA ) +* +* Copy superdiagonal elements back into A, and diagonal +* elements into D +* + DO 10 J = I, I + NB - 1 + A( J-1, J ) = E( J-1 ) + D( J ) = A( J, J ) + 10 CONTINUE + 20 CONTINUE +* +* Use unblocked code to reduce the last or only block +* + CALL SSYTD2( UPLO, KK, A, LDA, D, E, TAU, IINFO ) + ELSE +* +* Reduce the lower triangle of A +* + DO 40 I = 1, N - NX, NB +* +* Reduce columns i:i+nb-1 to tridiagonal form and form the +* matrix W which is needed to update the unreduced part of +* the matrix +* + CALL SLATRD( UPLO, N-I+1, NB, A( I, I ), LDA, E( I ), + $ TAU( I ), WORK, LDWORK ) +* +* Update the unreduced submatrix A(i+ib:n,i+ib:n), using +* an update of the form: A := A - V*W' - W*V' +* + CALL SSYR2K( UPLO, 'No transpose', N-I-NB+1, NB, -ONE, + $ A( I+NB, I ), LDA, WORK( NB+1 ), LDWORK, ONE, + $ A( I+NB, I+NB ), LDA ) +* +* Copy subdiagonal elements back into A, and diagonal +* elements into D +* + DO 30 J = I, I + NB - 1 + A( J+1, J ) = E( J ) + D( J ) = A( J, J ) + 30 CONTINUE + 40 CONTINUE +* +* Use unblocked code to reduce the last or only block +* + CALL SSYTD2( UPLO, N-I+1, A( I, I ), LDA, D( I ), E( I ), + $ TAU( I ), IINFO ) + END IF +* + WORK( 1 ) = LWKOPT + RETURN +* +* End of SSYTRD +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/stgevc.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/stgevc.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,1147 @@ + SUBROUTINE STGEVC( SIDE, HOWMNY, SELECT, N, S, LDS, P, LDP, VL, + $ LDVL, VR, LDVR, MM, M, WORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER HOWMNY, SIDE + INTEGER INFO, LDP, LDS, LDVL, LDVR, M, MM, N +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + REAL P( LDP, * ), S( LDS, * ), VL( LDVL, * ), + $ VR( LDVR, * ), WORK( * ) +* .. +* +* +* Purpose +* ======= +* +* STGEVC computes some or all of the right and/or left eigenvectors of +* a pair of real matrices (S,P), where S is a quasi-triangular matrix +* and P is upper triangular. Matrix pairs of this type are produced by +* the generalized Schur factorization of a matrix pair (A,B): +* +* A = Q*S*Z**T, B = Q*P*Z**T +* +* as computed by SGGHRD + SHGEQZ. +* +* The right eigenvector x and the left eigenvector y of (S,P) +* corresponding to an eigenvalue w are defined by: +* +* S*x = w*P*x, (y**H)*S = w*(y**H)*P, +* +* where y**H denotes the conjugate tranpose of y. +* The eigenvalues are not input to this routine, but are computed +* directly from the diagonal blocks of S and P. +* +* This routine returns the matrices X and/or Y of right and left +* eigenvectors of (S,P), or the products Z*X and/or Q*Y, +* where Z and Q are input matrices. +* If Q and Z are the orthogonal factors from the generalized Schur +* factorization of a matrix pair (A,B), then Z*X and Q*Y +* are the matrices of right and left eigenvectors of (A,B). +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'R': compute right eigenvectors only; +* = 'L': compute left eigenvectors only; +* = 'B': compute both right and left eigenvectors. +* +* HOWMNY (input) CHARACTER*1 +* = 'A': compute all right and/or left eigenvectors; +* = 'B': compute all right and/or left eigenvectors, +* backtransformed by the matrices in VR and/or VL; +* = 'S': compute selected right and/or left eigenvectors, +* specified by the logical array SELECT. +* +* SELECT (input) LOGICAL array, dimension (N) +* If HOWMNY='S', SELECT specifies the eigenvectors to be +* computed. If w(j) is a real eigenvalue, the corresponding +* real eigenvector is computed if SELECT(j) is .TRUE.. +* If w(j) and w(j+1) are the real and imaginary parts of a +* complex eigenvalue, the corresponding complex eigenvector +* is computed if either SELECT(j) or SELECT(j+1) is .TRUE., +* and on exit SELECT(j) is set to .TRUE. and SELECT(j+1) is +* set to .FALSE.. +* Not referenced if HOWMNY = 'A' or 'B'. +* +* N (input) INTEGER +* The order of the matrices S and P. N >= 0. +* +* S (input) REAL array, dimension (LDS,N) +* The upper quasi-triangular matrix S from a generalized Schur +* factorization, as computed by SHGEQZ. +* +* LDS (input) INTEGER +* The leading dimension of array S. LDS >= max(1,N). +* +* P (input) REAL array, dimension (LDP,N) +* The upper triangular matrix P from a generalized Schur +* factorization, as computed by SHGEQZ. +* 2-by-2 diagonal blocks of P corresponding to 2-by-2 blocks +* of S must be in positive diagonal form. +* +* LDP (input) INTEGER +* The leading dimension of array P. LDP >= max(1,N). +* +* VL (input/output) REAL array, dimension (LDVL,MM) +* On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must +* contain an N-by-N matrix Q (usually the orthogonal matrix Q +* of left Schur vectors returned by SHGEQZ). +* On exit, if SIDE = 'L' or 'B', VL contains: +* if HOWMNY = 'A', the matrix Y of left eigenvectors of (S,P); +* if HOWMNY = 'B', the matrix Q*Y; +* if HOWMNY = 'S', the left eigenvectors of (S,P) specified by +* SELECT, stored consecutively in the columns of +* VL, in the same order as their eigenvalues. +* +* A complex eigenvector corresponding to a complex eigenvalue +* is stored in two consecutive columns, the first holding the +* real part, and the second the imaginary part. +* +* Not referenced if SIDE = 'R'. +* +* LDVL (input) INTEGER +* The leading dimension of array VL. LDVL >= 1, and if +* SIDE = 'L' or 'B', LDVL >= N. +* +* VR (input/output) REAL array, dimension (LDVR,MM) +* On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must +* contain an N-by-N matrix Z (usually the orthogonal matrix Z +* of right Schur vectors returned by SHGEQZ). +* +* On exit, if SIDE = 'R' or 'B', VR contains: +* if HOWMNY = 'A', the matrix X of right eigenvectors of (S,P); +* if HOWMNY = 'B' or 'b', the matrix Z*X; +* if HOWMNY = 'S' or 's', the right eigenvectors of (S,P) +* specified by SELECT, stored consecutively in the +* columns of VR, in the same order as their +* eigenvalues. +* +* A complex eigenvector corresponding to a complex eigenvalue +* is stored in two consecutive columns, the first holding the +* real part and the second the imaginary part. +* +* Not referenced if SIDE = 'L'. +* +* LDVR (input) INTEGER +* The leading dimension of the array VR. LDVR >= 1, and if +* SIDE = 'R' or 'B', LDVR >= N. +* +* MM (input) INTEGER +* The number of columns in the arrays VL and/or VR. MM >= M. +* +* M (output) INTEGER +* The number of columns in the arrays VL and/or VR actually +* used to store the eigenvectors. If HOWMNY = 'A' or 'B', M +* is set to N. Each selected real eigenvector occupies one +* column and each selected complex eigenvector occupies two +* columns. +* +* WORK (workspace) REAL array, dimension (6*N) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: the 2-by-2 block (INFO:INFO+1) does not have a complex +* eigenvalue. +* +* Further Details +* =============== +* +* Allocation of workspace: +* ---------- -- --------- +* +* WORK( j ) = 1-norm of j-th column of A, above the diagonal +* WORK( N+j ) = 1-norm of j-th column of B, above the diagonal +* WORK( 2*N+1:3*N ) = real part of eigenvector +* WORK( 3*N+1:4*N ) = imaginary part of eigenvector +* WORK( 4*N+1:5*N ) = real part of back-transformed eigenvector +* WORK( 5*N+1:6*N ) = imaginary part of back-transformed eigenvector +* +* Rowwise vs. columnwise solution methods: +* ------- -- ---------- -------- ------- +* +* Finding a generalized eigenvector consists basically of solving the +* singular triangular system +* +* (A - w B) x = 0 (for right) or: (A - w B)**H y = 0 (for left) +* +* Consider finding the i-th right eigenvector (assume all eigenvalues +* are real). The equation to be solved is: +* n i +* 0 = sum C(j,k) v(k) = sum C(j,k) v(k) for j = i,. . .,1 +* k=j k=j +* +* where C = (A - w B) (The components v(i+1:n) are 0.) +* +* The "rowwise" method is: +* +* (1) v(i) := 1 +* for j = i-1,. . .,1: +* i +* (2) compute s = - sum C(j,k) v(k) and +* k=j+1 +* +* (3) v(j) := s / C(j,j) +* +* Step 2 is sometimes called the "dot product" step, since it is an +* inner product between the j-th row and the portion of the eigenvector +* that has been computed so far. +* +* The "columnwise" method consists basically in doing the sums +* for all the rows in parallel. As each v(j) is computed, the +* contribution of v(j) times the j-th column of C is added to the +* partial sums. Since FORTRAN arrays are stored columnwise, this has +* the advantage that at each step, the elements of C that are accessed +* are adjacent to one another, whereas with the rowwise method, the +* elements accessed at a step are spaced LDS (and LDP) words apart. +* +* When finding left eigenvectors, the matrix in question is the +* transpose of the one in storage, so the rowwise method then +* actually accesses columns of A and B at each step, and so is the +* preferred method. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, SAFETY + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, + $ SAFETY = 1.0E+2 ) +* .. +* .. Local Scalars .. + LOGICAL COMPL, COMPR, IL2BY2, ILABAD, ILALL, ILBACK, + $ ILBBAD, ILCOMP, ILCPLX, LSA, LSB + INTEGER I, IBEG, IEIG, IEND, IHWMNY, IINFO, IM, ISIDE, + $ J, JA, JC, JE, JR, JW, NA, NW + REAL ACOEF, ACOEFA, ANORM, ASCALE, BCOEFA, BCOEFI, + $ BCOEFR, BIG, BIGNUM, BNORM, BSCALE, CIM2A, + $ CIM2B, CIMAGA, CIMAGB, CRE2A, CRE2B, CREALA, + $ CREALB, DMIN, SAFMIN, SALFAR, SBETA, SCALE, + $ SMALL, TEMP, TEMP2, TEMP2I, TEMP2R, ULP, XMAX, + $ XSCALE +* .. +* .. Local Arrays .. + REAL BDIAG( 2 ), SUM( 2, 2 ), SUMS( 2, 2 ), + $ SUMP( 2, 2 ) +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SGEMV, SLABAD, SLACPY, SLAG2, SLALN2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* +* Decode and Test the input parameters +* + IF( LSAME( HOWMNY, 'A' ) ) THEN + IHWMNY = 1 + ILALL = .TRUE. + ILBACK = .FALSE. + ELSE IF( LSAME( HOWMNY, 'S' ) ) THEN + IHWMNY = 2 + ILALL = .FALSE. + ILBACK = .FALSE. + ELSE IF( LSAME( HOWMNY, 'B' ) ) THEN + IHWMNY = 3 + ILALL = .TRUE. + ILBACK = .TRUE. + ELSE + IHWMNY = -1 + ILALL = .TRUE. + END IF +* + IF( LSAME( SIDE, 'R' ) ) THEN + ISIDE = 1 + COMPL = .FALSE. + COMPR = .TRUE. + ELSE IF( LSAME( SIDE, 'L' ) ) THEN + ISIDE = 2 + COMPL = .TRUE. + COMPR = .FALSE. + ELSE IF( LSAME( SIDE, 'B' ) ) THEN + ISIDE = 3 + COMPL = .TRUE. + COMPR = .TRUE. + ELSE + ISIDE = -1 + END IF +* + INFO = 0 + IF( ISIDE.LT.0 ) THEN + INFO = -1 + ELSE IF( IHWMNY.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDS.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDP.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STGEVC', -INFO ) + RETURN + END IF +* +* Count the number of eigenvectors to be computed +* + IF( .NOT.ILALL ) THEN + IM = 0 + ILCPLX = .FALSE. + DO 10 J = 1, N + IF( ILCPLX ) THEN + ILCPLX = .FALSE. + GO TO 10 + END IF + IF( J.LT.N ) THEN + IF( S( J+1, J ).NE.ZERO ) + $ ILCPLX = .TRUE. + END IF + IF( ILCPLX ) THEN + IF( SELECT( J ) .OR. SELECT( J+1 ) ) + $ IM = IM + 2 + ELSE + IF( SELECT( J ) ) + $ IM = IM + 1 + END IF + 10 CONTINUE + ELSE + IM = N + END IF +* +* Check 2-by-2 diagonal blocks of A, B +* + ILABAD = .FALSE. + ILBBAD = .FALSE. + DO 20 J = 1, N - 1 + IF( S( J+1, J ).NE.ZERO ) THEN + IF( P( J, J ).EQ.ZERO .OR. P( J+1, J+1 ).EQ.ZERO .OR. + $ P( J, J+1 ).NE.ZERO )ILBBAD = .TRUE. + IF( J.LT.N-1 ) THEN + IF( S( J+2, J+1 ).NE.ZERO ) + $ ILABAD = .TRUE. + END IF + END IF + 20 CONTINUE +* + IF( ILABAD ) THEN + INFO = -5 + ELSE IF( ILBBAD ) THEN + INFO = -7 + ELSE IF( COMPL .AND. LDVL.LT.N .OR. LDVL.LT.1 ) THEN + INFO = -10 + ELSE IF( COMPR .AND. LDVR.LT.N .OR. LDVR.LT.1 ) THEN + INFO = -12 + ELSE IF( MM.LT.IM ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STGEVC', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + M = IM + IF( N.EQ.0 ) + $ RETURN +* +* Machine Constants +* + SAFMIN = SLAMCH( 'Safe minimum' ) + BIG = ONE / SAFMIN + CALL SLABAD( SAFMIN, BIG ) + ULP = SLAMCH( 'Epsilon' )*SLAMCH( 'Base' ) + SMALL = SAFMIN*N / ULP + BIG = ONE / SMALL + BIGNUM = ONE / ( SAFMIN*N ) +* +* Compute the 1-norm of each column of the strictly upper triangular +* part (i.e., excluding all elements belonging to the diagonal +* blocks) of A and B to check for possible overflow in the +* triangular solver. +* + ANORM = ABS( S( 1, 1 ) ) + IF( N.GT.1 ) + $ ANORM = ANORM + ABS( S( 2, 1 ) ) + BNORM = ABS( P( 1, 1 ) ) + WORK( 1 ) = ZERO + WORK( N+1 ) = ZERO +* + DO 50 J = 2, N + TEMP = ZERO + TEMP2 = ZERO + IF( S( J, J-1 ).EQ.ZERO ) THEN + IEND = J - 1 + ELSE + IEND = J - 2 + END IF + DO 30 I = 1, IEND + TEMP = TEMP + ABS( S( I, J ) ) + TEMP2 = TEMP2 + ABS( P( I, J ) ) + 30 CONTINUE + WORK( J ) = TEMP + WORK( N+J ) = TEMP2 + DO 40 I = IEND + 1, MIN( J+1, N ) + TEMP = TEMP + ABS( S( I, J ) ) + TEMP2 = TEMP2 + ABS( P( I, J ) ) + 40 CONTINUE + ANORM = MAX( ANORM, TEMP ) + BNORM = MAX( BNORM, TEMP2 ) + 50 CONTINUE +* + ASCALE = ONE / MAX( ANORM, SAFMIN ) + BSCALE = ONE / MAX( BNORM, SAFMIN ) +* +* Left eigenvectors +* + IF( COMPL ) THEN + IEIG = 0 +* +* Main loop over eigenvalues +* + ILCPLX = .FALSE. + DO 220 JE = 1, N +* +* Skip this iteration if (a) HOWMNY='S' and SELECT=.FALSE., or +* (b) this would be the second of a complex pair. +* Check for complex eigenvalue, so as to be sure of which +* entry(-ies) of SELECT to look at. +* + IF( ILCPLX ) THEN + ILCPLX = .FALSE. + GO TO 220 + END IF + NW = 1 + IF( JE.LT.N ) THEN + IF( S( JE+1, JE ).NE.ZERO ) THEN + ILCPLX = .TRUE. + NW = 2 + END IF + END IF + IF( ILALL ) THEN + ILCOMP = .TRUE. + ELSE IF( ILCPLX ) THEN + ILCOMP = SELECT( JE ) .OR. SELECT( JE+1 ) + ELSE + ILCOMP = SELECT( JE ) + END IF + IF( .NOT.ILCOMP ) + $ GO TO 220 +* +* Decide if (a) singular pencil, (b) real eigenvalue, or +* (c) complex eigenvalue. +* + IF( .NOT.ILCPLX ) THEN + IF( ABS( S( JE, JE ) ).LE.SAFMIN .AND. + $ ABS( P( JE, JE ) ).LE.SAFMIN ) THEN +* +* Singular matrix pencil -- return unit eigenvector +* + IEIG = IEIG + 1 + DO 60 JR = 1, N + VL( JR, IEIG ) = ZERO + 60 CONTINUE + VL( IEIG, IEIG ) = ONE + GO TO 220 + END IF + END IF +* +* Clear vector +* + DO 70 JR = 1, NW*N + WORK( 2*N+JR ) = ZERO + 70 CONTINUE +* T +* Compute coefficients in ( a A - b B ) y = 0 +* a is ACOEF +* b is BCOEFR + i*BCOEFI +* + IF( .NOT.ILCPLX ) THEN +* +* Real eigenvalue +* + TEMP = ONE / MAX( ABS( S( JE, JE ) )*ASCALE, + $ ABS( P( JE, JE ) )*BSCALE, SAFMIN ) + SALFAR = ( TEMP*S( JE, JE ) )*ASCALE + SBETA = ( TEMP*P( JE, JE ) )*BSCALE + ACOEF = SBETA*ASCALE + BCOEFR = SALFAR*BSCALE + BCOEFI = ZERO +* +* Scale to avoid underflow +* + SCALE = ONE + LSA = ABS( SBETA ).GE.SAFMIN .AND. ABS( ACOEF ).LT.SMALL + LSB = ABS( SALFAR ).GE.SAFMIN .AND. ABS( BCOEFR ).LT. + $ SMALL + IF( LSA ) + $ SCALE = ( SMALL / ABS( SBETA ) )*MIN( ANORM, BIG ) + IF( LSB ) + $ SCALE = MAX( SCALE, ( SMALL / ABS( SALFAR ) )* + $ MIN( BNORM, BIG ) ) + IF( LSA .OR. LSB ) THEN + SCALE = MIN( SCALE, ONE / + $ ( SAFMIN*MAX( ONE, ABS( ACOEF ), + $ ABS( BCOEFR ) ) ) ) + IF( LSA ) THEN + ACOEF = ASCALE*( SCALE*SBETA ) + ELSE + ACOEF = SCALE*ACOEF + END IF + IF( LSB ) THEN + BCOEFR = BSCALE*( SCALE*SALFAR ) + ELSE + BCOEFR = SCALE*BCOEFR + END IF + END IF + ACOEFA = ABS( ACOEF ) + BCOEFA = ABS( BCOEFR ) +* +* First component is 1 +* + WORK( 2*N+JE ) = ONE + XMAX = ONE + ELSE +* +* Complex eigenvalue +* + CALL SLAG2( S( JE, JE ), LDS, P( JE, JE ), LDP, + $ SAFMIN*SAFETY, ACOEF, TEMP, BCOEFR, TEMP2, + $ BCOEFI ) + BCOEFI = -BCOEFI + IF( BCOEFI.EQ.ZERO ) THEN + INFO = JE + RETURN + END IF +* +* Scale to avoid over/underflow +* + ACOEFA = ABS( ACOEF ) + BCOEFA = ABS( BCOEFR ) + ABS( BCOEFI ) + SCALE = ONE + IF( ACOEFA*ULP.LT.SAFMIN .AND. ACOEFA.GE.SAFMIN ) + $ SCALE = ( SAFMIN / ULP ) / ACOEFA + IF( BCOEFA*ULP.LT.SAFMIN .AND. BCOEFA.GE.SAFMIN ) + $ SCALE = MAX( SCALE, ( SAFMIN / ULP ) / BCOEFA ) + IF( SAFMIN*ACOEFA.GT.ASCALE ) + $ SCALE = ASCALE / ( SAFMIN*ACOEFA ) + IF( SAFMIN*BCOEFA.GT.BSCALE ) + $ SCALE = MIN( SCALE, BSCALE / ( SAFMIN*BCOEFA ) ) + IF( SCALE.NE.ONE ) THEN + ACOEF = SCALE*ACOEF + ACOEFA = ABS( ACOEF ) + BCOEFR = SCALE*BCOEFR + BCOEFI = SCALE*BCOEFI + BCOEFA = ABS( BCOEFR ) + ABS( BCOEFI ) + END IF +* +* Compute first two components of eigenvector +* + TEMP = ACOEF*S( JE+1, JE ) + TEMP2R = ACOEF*S( JE, JE ) - BCOEFR*P( JE, JE ) + TEMP2I = -BCOEFI*P( JE, JE ) + IF( ABS( TEMP ).GT.ABS( TEMP2R )+ABS( TEMP2I ) ) THEN + WORK( 2*N+JE ) = ONE + WORK( 3*N+JE ) = ZERO + WORK( 2*N+JE+1 ) = -TEMP2R / TEMP + WORK( 3*N+JE+1 ) = -TEMP2I / TEMP + ELSE + WORK( 2*N+JE+1 ) = ONE + WORK( 3*N+JE+1 ) = ZERO + TEMP = ACOEF*S( JE, JE+1 ) + WORK( 2*N+JE ) = ( BCOEFR*P( JE+1, JE+1 )-ACOEF* + $ S( JE+1, JE+1 ) ) / TEMP + WORK( 3*N+JE ) = BCOEFI*P( JE+1, JE+1 ) / TEMP + END IF + XMAX = MAX( ABS( WORK( 2*N+JE ) )+ABS( WORK( 3*N+JE ) ), + $ ABS( WORK( 2*N+JE+1 ) )+ABS( WORK( 3*N+JE+1 ) ) ) + END IF +* + DMIN = MAX( ULP*ACOEFA*ANORM, ULP*BCOEFA*BNORM, SAFMIN ) +* +* T +* Triangular solve of (a A - b B) y = 0 +* +* T +* (rowwise in (a A - b B) , or columnwise in (a A - b B) ) +* + IL2BY2 = .FALSE. +* + DO 160 J = JE + NW, N + IF( IL2BY2 ) THEN + IL2BY2 = .FALSE. + GO TO 160 + END IF +* + NA = 1 + BDIAG( 1 ) = P( J, J ) + IF( J.LT.N ) THEN + IF( S( J+1, J ).NE.ZERO ) THEN + IL2BY2 = .TRUE. + BDIAG( 2 ) = P( J+1, J+1 ) + NA = 2 + END IF + END IF +* +* Check whether scaling is necessary for dot products +* + XSCALE = ONE / MAX( ONE, XMAX ) + TEMP = MAX( WORK( J ), WORK( N+J ), + $ ACOEFA*WORK( J )+BCOEFA*WORK( N+J ) ) + IF( IL2BY2 ) + $ TEMP = MAX( TEMP, WORK( J+1 ), WORK( N+J+1 ), + $ ACOEFA*WORK( J+1 )+BCOEFA*WORK( N+J+1 ) ) + IF( TEMP.GT.BIGNUM*XSCALE ) THEN + DO 90 JW = 0, NW - 1 + DO 80 JR = JE, J - 1 + WORK( ( JW+2 )*N+JR ) = XSCALE* + $ WORK( ( JW+2 )*N+JR ) + 80 CONTINUE + 90 CONTINUE + XMAX = XMAX*XSCALE + END IF +* +* Compute dot products +* +* j-1 +* SUM = sum conjg( a*S(k,j) - b*P(k,j) )*x(k) +* k=je +* +* To reduce the op count, this is done as +* +* _ j-1 _ j-1 +* a*conjg( sum S(k,j)*x(k) ) - b*conjg( sum P(k,j)*x(k) ) +* k=je k=je +* +* which may cause underflow problems if A or B are close +* to underflow. (E.g., less than SMALL.) +* +* +* A series of compiler directives to defeat vectorization +* for the next loop +* +*$PL$ CMCHAR=' ' +CDIR$ NEXTSCALAR +C$DIR SCALAR +CDIR$ NEXT SCALAR +CVD$L NOVECTOR +CDEC$ NOVECTOR +CVD$ NOVECTOR +*VDIR NOVECTOR +*VOCL LOOP,SCALAR +CIBM PREFER SCALAR +*$PL$ CMCHAR='*' +* + DO 120 JW = 1, NW +* +*$PL$ CMCHAR=' ' +CDIR$ NEXTSCALAR +C$DIR SCALAR +CDIR$ NEXT SCALAR +CVD$L NOVECTOR +CDEC$ NOVECTOR +CVD$ NOVECTOR +*VDIR NOVECTOR +*VOCL LOOP,SCALAR +CIBM PREFER SCALAR +*$PL$ CMCHAR='*' +* + DO 110 JA = 1, NA + SUMS( JA, JW ) = ZERO + SUMP( JA, JW ) = ZERO +* + DO 100 JR = JE, J - 1 + SUMS( JA, JW ) = SUMS( JA, JW ) + + $ S( JR, J+JA-1 )* + $ WORK( ( JW+1 )*N+JR ) + SUMP( JA, JW ) = SUMP( JA, JW ) + + $ P( JR, J+JA-1 )* + $ WORK( ( JW+1 )*N+JR ) + 100 CONTINUE + 110 CONTINUE + 120 CONTINUE +* +*$PL$ CMCHAR=' ' +CDIR$ NEXTSCALAR +C$DIR SCALAR +CDIR$ NEXT SCALAR +CVD$L NOVECTOR +CDEC$ NOVECTOR +CVD$ NOVECTOR +*VDIR NOVECTOR +*VOCL LOOP,SCALAR +CIBM PREFER SCALAR +*$PL$ CMCHAR='*' +* + DO 130 JA = 1, NA + IF( ILCPLX ) THEN + SUM( JA, 1 ) = -ACOEF*SUMS( JA, 1 ) + + $ BCOEFR*SUMP( JA, 1 ) - + $ BCOEFI*SUMP( JA, 2 ) + SUM( JA, 2 ) = -ACOEF*SUMS( JA, 2 ) + + $ BCOEFR*SUMP( JA, 2 ) + + $ BCOEFI*SUMP( JA, 1 ) + ELSE + SUM( JA, 1 ) = -ACOEF*SUMS( JA, 1 ) + + $ BCOEFR*SUMP( JA, 1 ) + END IF + 130 CONTINUE +* +* T +* Solve ( a A - b B ) y = SUM(,) +* with scaling and perturbation of the denominator +* + CALL SLALN2( .TRUE., NA, NW, DMIN, ACOEF, S( J, J ), LDS, + $ BDIAG( 1 ), BDIAG( 2 ), SUM, 2, BCOEFR, + $ BCOEFI, WORK( 2*N+J ), N, SCALE, TEMP, + $ IINFO ) + IF( SCALE.LT.ONE ) THEN + DO 150 JW = 0, NW - 1 + DO 140 JR = JE, J - 1 + WORK( ( JW+2 )*N+JR ) = SCALE* + $ WORK( ( JW+2 )*N+JR ) + 140 CONTINUE + 150 CONTINUE + XMAX = SCALE*XMAX + END IF + XMAX = MAX( XMAX, TEMP ) + 160 CONTINUE +* +* Copy eigenvector to VL, back transforming if +* HOWMNY='B'. +* + IEIG = IEIG + 1 + IF( ILBACK ) THEN + DO 170 JW = 0, NW - 1 + CALL SGEMV( 'N', N, N+1-JE, ONE, VL( 1, JE ), LDVL, + $ WORK( ( JW+2 )*N+JE ), 1, ZERO, + $ WORK( ( JW+4 )*N+1 ), 1 ) + 170 CONTINUE + CALL SLACPY( ' ', N, NW, WORK( 4*N+1 ), N, VL( 1, JE ), + $ LDVL ) + IBEG = 1 + ELSE + CALL SLACPY( ' ', N, NW, WORK( 2*N+1 ), N, VL( 1, IEIG ), + $ LDVL ) + IBEG = JE + END IF +* +* Scale eigenvector +* + XMAX = ZERO + IF( ILCPLX ) THEN + DO 180 J = IBEG, N + XMAX = MAX( XMAX, ABS( VL( J, IEIG ) )+ + $ ABS( VL( J, IEIG+1 ) ) ) + 180 CONTINUE + ELSE + DO 190 J = IBEG, N + XMAX = MAX( XMAX, ABS( VL( J, IEIG ) ) ) + 190 CONTINUE + END IF +* + IF( XMAX.GT.SAFMIN ) THEN + XSCALE = ONE / XMAX +* + DO 210 JW = 0, NW - 1 + DO 200 JR = IBEG, N + VL( JR, IEIG+JW ) = XSCALE*VL( JR, IEIG+JW ) + 200 CONTINUE + 210 CONTINUE + END IF + IEIG = IEIG + NW - 1 +* + 220 CONTINUE + END IF +* +* Right eigenvectors +* + IF( COMPR ) THEN + IEIG = IM + 1 +* +* Main loop over eigenvalues +* + ILCPLX = .FALSE. + DO 500 JE = N, 1, -1 +* +* Skip this iteration if (a) HOWMNY='S' and SELECT=.FALSE., or +* (b) this would be the second of a complex pair. +* Check for complex eigenvalue, so as to be sure of which +* entry(-ies) of SELECT to look at -- if complex, SELECT(JE) +* or SELECT(JE-1). +* If this is a complex pair, the 2-by-2 diagonal block +* corresponding to the eigenvalue is in rows/columns JE-1:JE +* + IF( ILCPLX ) THEN + ILCPLX = .FALSE. + GO TO 500 + END IF + NW = 1 + IF( JE.GT.1 ) THEN + IF( S( JE, JE-1 ).NE.ZERO ) THEN + ILCPLX = .TRUE. + NW = 2 + END IF + END IF + IF( ILALL ) THEN + ILCOMP = .TRUE. + ELSE IF( ILCPLX ) THEN + ILCOMP = SELECT( JE ) .OR. SELECT( JE-1 ) + ELSE + ILCOMP = SELECT( JE ) + END IF + IF( .NOT.ILCOMP ) + $ GO TO 500 +* +* Decide if (a) singular pencil, (b) real eigenvalue, or +* (c) complex eigenvalue. +* + IF( .NOT.ILCPLX ) THEN + IF( ABS( S( JE, JE ) ).LE.SAFMIN .AND. + $ ABS( P( JE, JE ) ).LE.SAFMIN ) THEN +* +* Singular matrix pencil -- unit eigenvector +* + IEIG = IEIG - 1 + DO 230 JR = 1, N + VR( JR, IEIG ) = ZERO + 230 CONTINUE + VR( IEIG, IEIG ) = ONE + GO TO 500 + END IF + END IF +* +* Clear vector +* + DO 250 JW = 0, NW - 1 + DO 240 JR = 1, N + WORK( ( JW+2 )*N+JR ) = ZERO + 240 CONTINUE + 250 CONTINUE +* +* Compute coefficients in ( a A - b B ) x = 0 +* a is ACOEF +* b is BCOEFR + i*BCOEFI +* + IF( .NOT.ILCPLX ) THEN +* +* Real eigenvalue +* + TEMP = ONE / MAX( ABS( S( JE, JE ) )*ASCALE, + $ ABS( P( JE, JE ) )*BSCALE, SAFMIN ) + SALFAR = ( TEMP*S( JE, JE ) )*ASCALE + SBETA = ( TEMP*P( JE, JE ) )*BSCALE + ACOEF = SBETA*ASCALE + BCOEFR = SALFAR*BSCALE + BCOEFI = ZERO +* +* Scale to avoid underflow +* + SCALE = ONE + LSA = ABS( SBETA ).GE.SAFMIN .AND. ABS( ACOEF ).LT.SMALL + LSB = ABS( SALFAR ).GE.SAFMIN .AND. ABS( BCOEFR ).LT. + $ SMALL + IF( LSA ) + $ SCALE = ( SMALL / ABS( SBETA ) )*MIN( ANORM, BIG ) + IF( LSB ) + $ SCALE = MAX( SCALE, ( SMALL / ABS( SALFAR ) )* + $ MIN( BNORM, BIG ) ) + IF( LSA .OR. LSB ) THEN + SCALE = MIN( SCALE, ONE / + $ ( SAFMIN*MAX( ONE, ABS( ACOEF ), + $ ABS( BCOEFR ) ) ) ) + IF( LSA ) THEN + ACOEF = ASCALE*( SCALE*SBETA ) + ELSE + ACOEF = SCALE*ACOEF + END IF + IF( LSB ) THEN + BCOEFR = BSCALE*( SCALE*SALFAR ) + ELSE + BCOEFR = SCALE*BCOEFR + END IF + END IF + ACOEFA = ABS( ACOEF ) + BCOEFA = ABS( BCOEFR ) +* +* First component is 1 +* + WORK( 2*N+JE ) = ONE + XMAX = ONE +* +* Compute contribution from column JE of A and B to sum +* (See "Further Details", above.) +* + DO 260 JR = 1, JE - 1 + WORK( 2*N+JR ) = BCOEFR*P( JR, JE ) - + $ ACOEF*S( JR, JE ) + 260 CONTINUE + ELSE +* +* Complex eigenvalue +* + CALL SLAG2( S( JE-1, JE-1 ), LDS, P( JE-1, JE-1 ), LDP, + $ SAFMIN*SAFETY, ACOEF, TEMP, BCOEFR, TEMP2, + $ BCOEFI ) + IF( BCOEFI.EQ.ZERO ) THEN + INFO = JE - 1 + RETURN + END IF +* +* Scale to avoid over/underflow +* + ACOEFA = ABS( ACOEF ) + BCOEFA = ABS( BCOEFR ) + ABS( BCOEFI ) + SCALE = ONE + IF( ACOEFA*ULP.LT.SAFMIN .AND. ACOEFA.GE.SAFMIN ) + $ SCALE = ( SAFMIN / ULP ) / ACOEFA + IF( BCOEFA*ULP.LT.SAFMIN .AND. BCOEFA.GE.SAFMIN ) + $ SCALE = MAX( SCALE, ( SAFMIN / ULP ) / BCOEFA ) + IF( SAFMIN*ACOEFA.GT.ASCALE ) + $ SCALE = ASCALE / ( SAFMIN*ACOEFA ) + IF( SAFMIN*BCOEFA.GT.BSCALE ) + $ SCALE = MIN( SCALE, BSCALE / ( SAFMIN*BCOEFA ) ) + IF( SCALE.NE.ONE ) THEN + ACOEF = SCALE*ACOEF + ACOEFA = ABS( ACOEF ) + BCOEFR = SCALE*BCOEFR + BCOEFI = SCALE*BCOEFI + BCOEFA = ABS( BCOEFR ) + ABS( BCOEFI ) + END IF +* +* Compute first two components of eigenvector +* and contribution to sums +* + TEMP = ACOEF*S( JE, JE-1 ) + TEMP2R = ACOEF*S( JE, JE ) - BCOEFR*P( JE, JE ) + TEMP2I = -BCOEFI*P( JE, JE ) + IF( ABS( TEMP ).GE.ABS( TEMP2R )+ABS( TEMP2I ) ) THEN + WORK( 2*N+JE ) = ONE + WORK( 3*N+JE ) = ZERO + WORK( 2*N+JE-1 ) = -TEMP2R / TEMP + WORK( 3*N+JE-1 ) = -TEMP2I / TEMP + ELSE + WORK( 2*N+JE-1 ) = ONE + WORK( 3*N+JE-1 ) = ZERO + TEMP = ACOEF*S( JE-1, JE ) + WORK( 2*N+JE ) = ( BCOEFR*P( JE-1, JE-1 )-ACOEF* + $ S( JE-1, JE-1 ) ) / TEMP + WORK( 3*N+JE ) = BCOEFI*P( JE-1, JE-1 ) / TEMP + END IF +* + XMAX = MAX( ABS( WORK( 2*N+JE ) )+ABS( WORK( 3*N+JE ) ), + $ ABS( WORK( 2*N+JE-1 ) )+ABS( WORK( 3*N+JE-1 ) ) ) +* +* Compute contribution from columns JE and JE-1 +* of A and B to the sums. +* + CREALA = ACOEF*WORK( 2*N+JE-1 ) + CIMAGA = ACOEF*WORK( 3*N+JE-1 ) + CREALB = BCOEFR*WORK( 2*N+JE-1 ) - + $ BCOEFI*WORK( 3*N+JE-1 ) + CIMAGB = BCOEFI*WORK( 2*N+JE-1 ) + + $ BCOEFR*WORK( 3*N+JE-1 ) + CRE2A = ACOEF*WORK( 2*N+JE ) + CIM2A = ACOEF*WORK( 3*N+JE ) + CRE2B = BCOEFR*WORK( 2*N+JE ) - BCOEFI*WORK( 3*N+JE ) + CIM2B = BCOEFI*WORK( 2*N+JE ) + BCOEFR*WORK( 3*N+JE ) + DO 270 JR = 1, JE - 2 + WORK( 2*N+JR ) = -CREALA*S( JR, JE-1 ) + + $ CREALB*P( JR, JE-1 ) - + $ CRE2A*S( JR, JE ) + CRE2B*P( JR, JE ) + WORK( 3*N+JR ) = -CIMAGA*S( JR, JE-1 ) + + $ CIMAGB*P( JR, JE-1 ) - + $ CIM2A*S( JR, JE ) + CIM2B*P( JR, JE ) + 270 CONTINUE + END IF +* + DMIN = MAX( ULP*ACOEFA*ANORM, ULP*BCOEFA*BNORM, SAFMIN ) +* +* Columnwise triangular solve of (a A - b B) x = 0 +* + IL2BY2 = .FALSE. + DO 370 J = JE - NW, 1, -1 +* +* If a 2-by-2 block, is in position j-1:j, wait until +* next iteration to process it (when it will be j:j+1) +* + IF( .NOT.IL2BY2 .AND. J.GT.1 ) THEN + IF( S( J, J-1 ).NE.ZERO ) THEN + IL2BY2 = .TRUE. + GO TO 370 + END IF + END IF + BDIAG( 1 ) = P( J, J ) + IF( IL2BY2 ) THEN + NA = 2 + BDIAG( 2 ) = P( J+1, J+1 ) + ELSE + NA = 1 + END IF +* +* Compute x(j) (and x(j+1), if 2-by-2 block) +* + CALL SLALN2( .FALSE., NA, NW, DMIN, ACOEF, S( J, J ), + $ LDS, BDIAG( 1 ), BDIAG( 2 ), WORK( 2*N+J ), + $ N, BCOEFR, BCOEFI, SUM, 2, SCALE, TEMP, + $ IINFO ) + IF( SCALE.LT.ONE ) THEN +* + DO 290 JW = 0, NW - 1 + DO 280 JR = 1, JE + WORK( ( JW+2 )*N+JR ) = SCALE* + $ WORK( ( JW+2 )*N+JR ) + 280 CONTINUE + 290 CONTINUE + END IF + XMAX = MAX( SCALE*XMAX, TEMP ) +* + DO 310 JW = 1, NW + DO 300 JA = 1, NA + WORK( ( JW+1 )*N+J+JA-1 ) = SUM( JA, JW ) + 300 CONTINUE + 310 CONTINUE +* +* w = w + x(j)*(a S(*,j) - b P(*,j) ) with scaling +* + IF( J.GT.1 ) THEN +* +* Check whether scaling is necessary for sum. +* + XSCALE = ONE / MAX( ONE, XMAX ) + TEMP = ACOEFA*WORK( J ) + BCOEFA*WORK( N+J ) + IF( IL2BY2 ) + $ TEMP = MAX( TEMP, ACOEFA*WORK( J+1 )+BCOEFA* + $ WORK( N+J+1 ) ) + TEMP = MAX( TEMP, ACOEFA, BCOEFA ) + IF( TEMP.GT.BIGNUM*XSCALE ) THEN +* + DO 330 JW = 0, NW - 1 + DO 320 JR = 1, JE + WORK( ( JW+2 )*N+JR ) = XSCALE* + $ WORK( ( JW+2 )*N+JR ) + 320 CONTINUE + 330 CONTINUE + XMAX = XMAX*XSCALE + END IF +* +* Compute the contributions of the off-diagonals of +* column j (and j+1, if 2-by-2 block) of A and B to the +* sums. +* +* + DO 360 JA = 1, NA + IF( ILCPLX ) THEN + CREALA = ACOEF*WORK( 2*N+J+JA-1 ) + CIMAGA = ACOEF*WORK( 3*N+J+JA-1 ) + CREALB = BCOEFR*WORK( 2*N+J+JA-1 ) - + $ BCOEFI*WORK( 3*N+J+JA-1 ) + CIMAGB = BCOEFI*WORK( 2*N+J+JA-1 ) + + $ BCOEFR*WORK( 3*N+J+JA-1 ) + DO 340 JR = 1, J - 1 + WORK( 2*N+JR ) = WORK( 2*N+JR ) - + $ CREALA*S( JR, J+JA-1 ) + + $ CREALB*P( JR, J+JA-1 ) + WORK( 3*N+JR ) = WORK( 3*N+JR ) - + $ CIMAGA*S( JR, J+JA-1 ) + + $ CIMAGB*P( JR, J+JA-1 ) + 340 CONTINUE + ELSE + CREALA = ACOEF*WORK( 2*N+J+JA-1 ) + CREALB = BCOEFR*WORK( 2*N+J+JA-1 ) + DO 350 JR = 1, J - 1 + WORK( 2*N+JR ) = WORK( 2*N+JR ) - + $ CREALA*S( JR, J+JA-1 ) + + $ CREALB*P( JR, J+JA-1 ) + 350 CONTINUE + END IF + 360 CONTINUE + END IF +* + IL2BY2 = .FALSE. + 370 CONTINUE +* +* Copy eigenvector to VR, back transforming if +* HOWMNY='B'. +* + IEIG = IEIG - NW + IF( ILBACK ) THEN +* + DO 410 JW = 0, NW - 1 + DO 380 JR = 1, N + WORK( ( JW+4 )*N+JR ) = WORK( ( JW+2 )*N+1 )* + $ VR( JR, 1 ) + 380 CONTINUE +* +* A series of compiler directives to defeat +* vectorization for the next loop +* +* + DO 400 JC = 2, JE + DO 390 JR = 1, N + WORK( ( JW+4 )*N+JR ) = WORK( ( JW+4 )*N+JR ) + + $ WORK( ( JW+2 )*N+JC )*VR( JR, JC ) + 390 CONTINUE + 400 CONTINUE + 410 CONTINUE +* + DO 430 JW = 0, NW - 1 + DO 420 JR = 1, N + VR( JR, IEIG+JW ) = WORK( ( JW+4 )*N+JR ) + 420 CONTINUE + 430 CONTINUE +* + IEND = N + ELSE + DO 450 JW = 0, NW - 1 + DO 440 JR = 1, N + VR( JR, IEIG+JW ) = WORK( ( JW+2 )*N+JR ) + 440 CONTINUE + 450 CONTINUE +* + IEND = JE + END IF +* +* Scale eigenvector +* + XMAX = ZERO + IF( ILCPLX ) THEN + DO 460 J = 1, IEND + XMAX = MAX( XMAX, ABS( VR( J, IEIG ) )+ + $ ABS( VR( J, IEIG+1 ) ) ) + 460 CONTINUE + ELSE + DO 470 J = 1, IEND + XMAX = MAX( XMAX, ABS( VR( J, IEIG ) ) ) + 470 CONTINUE + END IF +* + IF( XMAX.GT.SAFMIN ) THEN + XSCALE = ONE / XMAX + DO 490 JW = 0, NW - 1 + DO 480 JR = 1, IEND + VR( JR, IEIG+JW ) = XSCALE*VR( JR, IEIG+JW ) + 480 CONTINUE + 490 CONTINUE + END IF + 500 CONTINUE + END IF +* + RETURN +* +* End of STGEVC +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/strcon.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/strcon.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,197 @@ + SUBROUTINE STRCON( NORM, UPLO, DIAG, N, A, LDA, RCOND, WORK, + $ IWORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* Modified to call SLACN2 in place of SLACON, 7 Feb 03, SJH. +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER INFO, LDA, N + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* STRCON estimates the reciprocal of the condition number of a +* triangular matrix A, in either the 1-norm or the infinity-norm. +* +* The norm of A is computed and an estimate is obtained for +* norm(inv(A)), then the reciprocal of the condition number is +* computed as +* RCOND = 1 / ( norm(A) * norm(inv(A)) ). +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies whether the 1-norm condition number or the +* infinity-norm condition number is required: +* = '1' or 'O': 1-norm; +* = 'I': Infinity-norm. +* +* UPLO (input) CHARACTER*1 +* = 'U': A is upper triangular; +* = 'L': A is lower triangular. +* +* DIAG (input) CHARACTER*1 +* = 'N': A is non-unit triangular; +* = 'U': A is unit triangular. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input) REAL array, dimension (LDA,N) +* The triangular matrix A. If UPLO = 'U', the leading N-by-N +* upper triangular part of the array A contains the upper +* triangular matrix, and the strictly lower triangular part of +* A is not referenced. If UPLO = 'L', the leading N-by-N lower +* triangular part of the array A contains the lower triangular +* matrix, and the strictly upper triangular part of A is not +* referenced. If DIAG = 'U', the diagonal elements of A are +* also not referenced and are assumed to be 1. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* RCOND (output) REAL +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(norm(A) * norm(inv(A))). +* +* WORK (workspace) REAL array, dimension (3*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, ONENRM, UPPER + CHARACTER NORMIN + INTEGER IX, KASE, KASE1 + REAL AINVNM, ANORM, SCALE, SMLNUM, XNORM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX + REAL SLAMCH, SLANTR + EXTERNAL LSAME, ISAMAX, SLAMCH, SLANTR +* .. +* .. External Subroutines .. + EXTERNAL SLACN2, SLATRS, SRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + NOUNIT = LSAME( DIAG, 'N' ) +* + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STRCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + END IF +* + RCOND = ZERO + SMLNUM = SLAMCH( 'Safe minimum' )*REAL( MAX( 1, N ) ) +* +* Compute the norm of the triangular matrix A. +* + ANORM = SLANTR( NORM, UPLO, DIAG, N, N, A, LDA, WORK ) +* +* Continue only if ANORM > 0. +* + IF( ANORM.GT.ZERO ) THEN +* +* Estimate the norm of the inverse of A. +* + AINVNM = ZERO + NORMIN = 'N' + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KASE = 0 + 10 CONTINUE + CALL SLACN2( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(A). +* + CALL SLATRS( UPLO, 'No transpose', DIAG, NORMIN, N, A, + $ LDA, WORK, SCALE, WORK( 2*N+1 ), INFO ) + ELSE +* +* Multiply by inv(A'). +* + CALL SLATRS( UPLO, 'Transpose', DIAG, NORMIN, N, A, LDA, + $ WORK, SCALE, WORK( 2*N+1 ), INFO ) + END IF + NORMIN = 'Y' +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + IF( SCALE.NE.ONE ) THEN + IX = ISAMAX( N, WORK, 1 ) + XNORM = ABS( WORK( IX ) ) + IF( SCALE.LT.XNORM*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL SRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / ANORM ) / AINVNM + END IF +* + 20 CONTINUE + RETURN +* +* End of STRCON +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/strevc.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/strevc.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,981 @@ + SUBROUTINE STREVC( SIDE, HOWMNY, SELECT, N, T, LDT, VL, LDVL, VR, + $ LDVR, MM, M, WORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER HOWMNY, SIDE + INTEGER INFO, LDT, LDVL, LDVR, M, MM, N +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + REAL T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* STREVC computes some or all of the right and/or left eigenvectors of +* a real upper quasi-triangular matrix T. +* Matrices of this type are produced by the Schur factorization of +* a real general matrix: A = Q*T*Q**T, as computed by SHSEQR. +* +* The right eigenvector x and the left eigenvector y of T corresponding +* to an eigenvalue w are defined by: +* +* T*x = w*x, (y**H)*T = w*(y**H) +* +* where y**H denotes the conjugate transpose of y. +* The eigenvalues are not input to this routine, but are read directly +* from the diagonal blocks of T. +* +* This routine returns the matrices X and/or Y of right and left +* eigenvectors of T, or the products Q*X and/or Q*Y, where Q is an +* input matrix. If Q is the orthogonal factor that reduces a matrix +* A to Schur form T, then Q*X and Q*Y are the matrices of right and +* left eigenvectors of A. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'R': compute right eigenvectors only; +* = 'L': compute left eigenvectors only; +* = 'B': compute both right and left eigenvectors. +* +* HOWMNY (input) CHARACTER*1 +* = 'A': compute all right and/or left eigenvectors; +* = 'B': compute all right and/or left eigenvectors, +* backtransformed by the matrices in VR and/or VL; +* = 'S': compute selected right and/or left eigenvectors, +* as indicated by the logical array SELECT. +* +* SELECT (input/output) LOGICAL array, dimension (N) +* If HOWMNY = 'S', SELECT specifies the eigenvectors to be +* computed. +* If w(j) is a real eigenvalue, the corresponding real +* eigenvector is computed if SELECT(j) is .TRUE.. +* If w(j) and w(j+1) are the real and imaginary parts of a +* complex eigenvalue, the corresponding complex eigenvector is +* computed if either SELECT(j) or SELECT(j+1) is .TRUE., and +* on exit SELECT(j) is set to .TRUE. and SELECT(j+1) is set to +* .FALSE.. +* Not referenced if HOWMNY = 'A' or 'B'. +* +* N (input) INTEGER +* The order of the matrix T. N >= 0. +* +* T (input) REAL array, dimension (LDT,N) +* The upper quasi-triangular matrix T in Schur canonical form. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= max(1,N). +* +* VL (input/output) REAL array, dimension (LDVL,MM) +* On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must +* contain an N-by-N matrix Q (usually the orthogonal matrix Q +* of Schur vectors returned by SHSEQR). +* On exit, if SIDE = 'L' or 'B', VL contains: +* if HOWMNY = 'A', the matrix Y of left eigenvectors of T; +* if HOWMNY = 'B', the matrix Q*Y; +* if HOWMNY = 'S', the left eigenvectors of T specified by +* SELECT, stored consecutively in the columns +* of VL, in the same order as their +* eigenvalues. +* A complex eigenvector corresponding to a complex eigenvalue +* is stored in two consecutive columns, the first holding the +* real part, and the second the imaginary part. +* Not referenced if SIDE = 'R'. +* +* LDVL (input) INTEGER +* The leading dimension of the array VL. LDVL >= 1, and if +* SIDE = 'L' or 'B', LDVL >= N. +* +* VR (input/output) REAL array, dimension (LDVR,MM) +* On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must +* contain an N-by-N matrix Q (usually the orthogonal matrix Q +* of Schur vectors returned by SHSEQR). +* On exit, if SIDE = 'R' or 'B', VR contains: +* if HOWMNY = 'A', the matrix X of right eigenvectors of T; +* if HOWMNY = 'B', the matrix Q*X; +* if HOWMNY = 'S', the right eigenvectors of T specified by +* SELECT, stored consecutively in the columns +* of VR, in the same order as their +* eigenvalues. +* A complex eigenvector corresponding to a complex eigenvalue +* is stored in two consecutive columns, the first holding the +* real part and the second the imaginary part. +* Not referenced if SIDE = 'L'. +* +* LDVR (input) INTEGER +* The leading dimension of the array VR. LDVR >= 1, and if +* SIDE = 'R' or 'B', LDVR >= N. +* +* MM (input) INTEGER +* The number of columns in the arrays VL and/or VR. MM >= M. +* +* M (output) INTEGER +* The number of columns in the arrays VL and/or VR actually +* used to store the eigenvectors. +* If HOWMNY = 'A' or 'B', M is set to N. +* Each selected real eigenvector occupies one column and each +* selected complex eigenvector occupies two columns. +* +* WORK (workspace) REAL array, dimension (3*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The algorithm used in this program is basically backward (forward) +* substitution, with scaling to make the the code robust against +* possible overflow. +* +* Each eigenvector is normalized so that the element of largest +* magnitude has magnitude 1; here the magnitude of a complex number +* (x,y) is taken to be |x| + |y|. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLV, BOTHV, LEFTV, OVER, PAIR, RIGHTV, SOMEV + INTEGER I, IERR, II, IP, IS, J, J1, J2, JNXT, K, KI, N2 + REAL BETA, BIGNUM, EMAX, OVFL, REC, REMAX, SCALE, + $ SMIN, SMLNUM, ULP, UNFL, VCRIT, VMAX, WI, WR, + $ XNORM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX + REAL SDOT, SLAMCH + EXTERNAL LSAME, ISAMAX, SDOT, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SGEMV, SLABAD, SLALN2, SSCAL, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Local Arrays .. + REAL X( 2, 2 ) +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + BOTHV = LSAME( SIDE, 'B' ) + RIGHTV = LSAME( SIDE, 'R' ) .OR. BOTHV + LEFTV = LSAME( SIDE, 'L' ) .OR. BOTHV +* + ALLV = LSAME( HOWMNY, 'A' ) + OVER = LSAME( HOWMNY, 'B' ) + SOMEV = LSAME( HOWMNY, 'S' ) +* + INFO = 0 + IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN + INFO = -1 + ELSE IF( .NOT.ALLV .AND. .NOT.OVER .AND. .NOT.SOMEV ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDT.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDVL.LT.1 .OR. ( LEFTV .AND. LDVL.LT.N ) ) THEN + INFO = -8 + ELSE IF( LDVR.LT.1 .OR. ( RIGHTV .AND. LDVR.LT.N ) ) THEN + INFO = -10 + ELSE +* +* Set M to the number of columns required to store the selected +* eigenvectors, standardize the array SELECT if necessary, and +* test MM. +* + IF( SOMEV ) THEN + M = 0 + PAIR = .FALSE. + DO 10 J = 1, N + IF( PAIR ) THEN + PAIR = .FALSE. + SELECT( J ) = .FALSE. + ELSE + IF( J.LT.N ) THEN + IF( T( J+1, J ).EQ.ZERO ) THEN + IF( SELECT( J ) ) + $ M = M + 1 + ELSE + PAIR = .TRUE. + IF( SELECT( J ) .OR. SELECT( J+1 ) ) THEN + SELECT( J ) = .TRUE. + M = M + 2 + END IF + END IF + ELSE + IF( SELECT( N ) ) + $ M = M + 1 + END IF + END IF + 10 CONTINUE + ELSE + M = N + END IF +* + IF( MM.LT.M ) THEN + INFO = -11 + END IF + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STREVC', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* +* Set the constants to control overflow. +* + UNFL = SLAMCH( 'Safe minimum' ) + OVFL = ONE / UNFL + CALL SLABAD( UNFL, OVFL ) + ULP = SLAMCH( 'Precision' ) + SMLNUM = UNFL*( N / ULP ) + BIGNUM = ( ONE-ULP ) / SMLNUM +* +* Compute 1-norm of each column of strictly upper triangular +* part of T to control overflow in triangular solver. +* + WORK( 1 ) = ZERO + DO 30 J = 2, N + WORK( J ) = ZERO + DO 20 I = 1, J - 1 + WORK( J ) = WORK( J ) + ABS( T( I, J ) ) + 20 CONTINUE + 30 CONTINUE +* +* Index IP is used to specify the real or complex eigenvalue: +* IP = 0, real eigenvalue, +* 1, first of conjugate complex pair: (wr,wi) +* -1, second of conjugate complex pair: (wr,wi) +* + N2 = 2*N +* + IF( RIGHTV ) THEN +* +* Compute right eigenvectors. +* + IP = 0 + IS = M + DO 140 KI = N, 1, -1 +* + IF( IP.EQ.1 ) + $ GO TO 130 + IF( KI.EQ.1 ) + $ GO TO 40 + IF( T( KI, KI-1 ).EQ.ZERO ) + $ GO TO 40 + IP = -1 +* + 40 CONTINUE + IF( SOMEV ) THEN + IF( IP.EQ.0 ) THEN + IF( .NOT.SELECT( KI ) ) + $ GO TO 130 + ELSE + IF( .NOT.SELECT( KI-1 ) ) + $ GO TO 130 + END IF + END IF +* +* Compute the KI-th eigenvalue (WR,WI). +* + WR = T( KI, KI ) + WI = ZERO + IF( IP.NE.0 ) + $ WI = SQRT( ABS( T( KI, KI-1 ) ) )* + $ SQRT( ABS( T( KI-1, KI ) ) ) + SMIN = MAX( ULP*( ABS( WR )+ABS( WI ) ), SMLNUM ) +* + IF( IP.EQ.0 ) THEN +* +* Real right eigenvector +* + WORK( KI+N ) = ONE +* +* Form right-hand side +* + DO 50 K = 1, KI - 1 + WORK( K+N ) = -T( K, KI ) + 50 CONTINUE +* +* Solve the upper quasi-triangular system: +* (T(1:KI-1,1:KI-1) - WR)*X = SCALE*WORK. +* + JNXT = KI - 1 + DO 60 J = KI - 1, 1, -1 + IF( J.GT.JNXT ) + $ GO TO 60 + J1 = J + J2 = J + JNXT = J - 1 + IF( J.GT.1 ) THEN + IF( T( J, J-1 ).NE.ZERO ) THEN + J1 = J - 1 + JNXT = J - 2 + END IF + END IF +* + IF( J1.EQ.J2 ) THEN +* +* 1-by-1 diagonal block +* + CALL SLALN2( .FALSE., 1, 1, SMIN, ONE, T( J, J ), + $ LDT, ONE, ONE, WORK( J+N ), N, WR, + $ ZERO, X, 2, SCALE, XNORM, IERR ) +* +* Scale X(1,1) to avoid overflow when updating +* the right-hand side. +* + IF( XNORM.GT.ONE ) THEN + IF( WORK( J ).GT.BIGNUM / XNORM ) THEN + X( 1, 1 ) = X( 1, 1 ) / XNORM + SCALE = SCALE / XNORM + END IF + END IF +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) + $ CALL SSCAL( KI, SCALE, WORK( 1+N ), 1 ) + WORK( J+N ) = X( 1, 1 ) +* +* Update right-hand side +* + CALL SAXPY( J-1, -X( 1, 1 ), T( 1, J ), 1, + $ WORK( 1+N ), 1 ) +* + ELSE +* +* 2-by-2 diagonal block +* + CALL SLALN2( .FALSE., 2, 1, SMIN, ONE, + $ T( J-1, J-1 ), LDT, ONE, ONE, + $ WORK( J-1+N ), N, WR, ZERO, X, 2, + $ SCALE, XNORM, IERR ) +* +* Scale X(1,1) and X(2,1) to avoid overflow when +* updating the right-hand side. +* + IF( XNORM.GT.ONE ) THEN + BETA = MAX( WORK( J-1 ), WORK( J ) ) + IF( BETA.GT.BIGNUM / XNORM ) THEN + X( 1, 1 ) = X( 1, 1 ) / XNORM + X( 2, 1 ) = X( 2, 1 ) / XNORM + SCALE = SCALE / XNORM + END IF + END IF +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) + $ CALL SSCAL( KI, SCALE, WORK( 1+N ), 1 ) + WORK( J-1+N ) = X( 1, 1 ) + WORK( J+N ) = X( 2, 1 ) +* +* Update right-hand side +* + CALL SAXPY( J-2, -X( 1, 1 ), T( 1, J-1 ), 1, + $ WORK( 1+N ), 1 ) + CALL SAXPY( J-2, -X( 2, 1 ), T( 1, J ), 1, + $ WORK( 1+N ), 1 ) + END IF + 60 CONTINUE +* +* Copy the vector x or Q*x to VR and normalize. +* + IF( .NOT.OVER ) THEN + CALL SCOPY( KI, WORK( 1+N ), 1, VR( 1, IS ), 1 ) +* + II = ISAMAX( KI, VR( 1, IS ), 1 ) + REMAX = ONE / ABS( VR( II, IS ) ) + CALL SSCAL( KI, REMAX, VR( 1, IS ), 1 ) +* + DO 70 K = KI + 1, N + VR( K, IS ) = ZERO + 70 CONTINUE + ELSE + IF( KI.GT.1 ) + $ CALL SGEMV( 'N', N, KI-1, ONE, VR, LDVR, + $ WORK( 1+N ), 1, WORK( KI+N ), + $ VR( 1, KI ), 1 ) +* + II = ISAMAX( N, VR( 1, KI ), 1 ) + REMAX = ONE / ABS( VR( II, KI ) ) + CALL SSCAL( N, REMAX, VR( 1, KI ), 1 ) + END IF +* + ELSE +* +* Complex right eigenvector. +* +* Initial solve +* [ (T(KI-1,KI-1) T(KI-1,KI) ) - (WR + I* WI)]*X = 0. +* [ (T(KI,KI-1) T(KI,KI) ) ] +* + IF( ABS( T( KI-1, KI ) ).GE.ABS( T( KI, KI-1 ) ) ) THEN + WORK( KI-1+N ) = ONE + WORK( KI+N2 ) = WI / T( KI-1, KI ) + ELSE + WORK( KI-1+N ) = -WI / T( KI, KI-1 ) + WORK( KI+N2 ) = ONE + END IF + WORK( KI+N ) = ZERO + WORK( KI-1+N2 ) = ZERO +* +* Form right-hand side +* + DO 80 K = 1, KI - 2 + WORK( K+N ) = -WORK( KI-1+N )*T( K, KI-1 ) + WORK( K+N2 ) = -WORK( KI+N2 )*T( K, KI ) + 80 CONTINUE +* +* Solve upper quasi-triangular system: +* (T(1:KI-2,1:KI-2) - (WR+i*WI))*X = SCALE*(WORK+i*WORK2) +* + JNXT = KI - 2 + DO 90 J = KI - 2, 1, -1 + IF( J.GT.JNXT ) + $ GO TO 90 + J1 = J + J2 = J + JNXT = J - 1 + IF( J.GT.1 ) THEN + IF( T( J, J-1 ).NE.ZERO ) THEN + J1 = J - 1 + JNXT = J - 2 + END IF + END IF +* + IF( J1.EQ.J2 ) THEN +* +* 1-by-1 diagonal block +* + CALL SLALN2( .FALSE., 1, 2, SMIN, ONE, T( J, J ), + $ LDT, ONE, ONE, WORK( J+N ), N, WR, WI, + $ X, 2, SCALE, XNORM, IERR ) +* +* Scale X(1,1) and X(1,2) to avoid overflow when +* updating the right-hand side. +* + IF( XNORM.GT.ONE ) THEN + IF( WORK( J ).GT.BIGNUM / XNORM ) THEN + X( 1, 1 ) = X( 1, 1 ) / XNORM + X( 1, 2 ) = X( 1, 2 ) / XNORM + SCALE = SCALE / XNORM + END IF + END IF +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) THEN + CALL SSCAL( KI, SCALE, WORK( 1+N ), 1 ) + CALL SSCAL( KI, SCALE, WORK( 1+N2 ), 1 ) + END IF + WORK( J+N ) = X( 1, 1 ) + WORK( J+N2 ) = X( 1, 2 ) +* +* Update the right-hand side +* + CALL SAXPY( J-1, -X( 1, 1 ), T( 1, J ), 1, + $ WORK( 1+N ), 1 ) + CALL SAXPY( J-1, -X( 1, 2 ), T( 1, J ), 1, + $ WORK( 1+N2 ), 1 ) +* + ELSE +* +* 2-by-2 diagonal block +* + CALL SLALN2( .FALSE., 2, 2, SMIN, ONE, + $ T( J-1, J-1 ), LDT, ONE, ONE, + $ WORK( J-1+N ), N, WR, WI, X, 2, SCALE, + $ XNORM, IERR ) +* +* Scale X to avoid overflow when updating +* the right-hand side. +* + IF( XNORM.GT.ONE ) THEN + BETA = MAX( WORK( J-1 ), WORK( J ) ) + IF( BETA.GT.BIGNUM / XNORM ) THEN + REC = ONE / XNORM + X( 1, 1 ) = X( 1, 1 )*REC + X( 1, 2 ) = X( 1, 2 )*REC + X( 2, 1 ) = X( 2, 1 )*REC + X( 2, 2 ) = X( 2, 2 )*REC + SCALE = SCALE*REC + END IF + END IF +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) THEN + CALL SSCAL( KI, SCALE, WORK( 1+N ), 1 ) + CALL SSCAL( KI, SCALE, WORK( 1+N2 ), 1 ) + END IF + WORK( J-1+N ) = X( 1, 1 ) + WORK( J+N ) = X( 2, 1 ) + WORK( J-1+N2 ) = X( 1, 2 ) + WORK( J+N2 ) = X( 2, 2 ) +* +* Update the right-hand side +* + CALL SAXPY( J-2, -X( 1, 1 ), T( 1, J-1 ), 1, + $ WORK( 1+N ), 1 ) + CALL SAXPY( J-2, -X( 2, 1 ), T( 1, J ), 1, + $ WORK( 1+N ), 1 ) + CALL SAXPY( J-2, -X( 1, 2 ), T( 1, J-1 ), 1, + $ WORK( 1+N2 ), 1 ) + CALL SAXPY( J-2, -X( 2, 2 ), T( 1, J ), 1, + $ WORK( 1+N2 ), 1 ) + END IF + 90 CONTINUE +* +* Copy the vector x or Q*x to VR and normalize. +* + IF( .NOT.OVER ) THEN + CALL SCOPY( KI, WORK( 1+N ), 1, VR( 1, IS-1 ), 1 ) + CALL SCOPY( KI, WORK( 1+N2 ), 1, VR( 1, IS ), 1 ) +* + EMAX = ZERO + DO 100 K = 1, KI + EMAX = MAX( EMAX, ABS( VR( K, IS-1 ) )+ + $ ABS( VR( K, IS ) ) ) + 100 CONTINUE +* + REMAX = ONE / EMAX + CALL SSCAL( KI, REMAX, VR( 1, IS-1 ), 1 ) + CALL SSCAL( KI, REMAX, VR( 1, IS ), 1 ) +* + DO 110 K = KI + 1, N + VR( K, IS-1 ) = ZERO + VR( K, IS ) = ZERO + 110 CONTINUE +* + ELSE +* + IF( KI.GT.2 ) THEN + CALL SGEMV( 'N', N, KI-2, ONE, VR, LDVR, + $ WORK( 1+N ), 1, WORK( KI-1+N ), + $ VR( 1, KI-1 ), 1 ) + CALL SGEMV( 'N', N, KI-2, ONE, VR, LDVR, + $ WORK( 1+N2 ), 1, WORK( KI+N2 ), + $ VR( 1, KI ), 1 ) + ELSE + CALL SSCAL( N, WORK( KI-1+N ), VR( 1, KI-1 ), 1 ) + CALL SSCAL( N, WORK( KI+N2 ), VR( 1, KI ), 1 ) + END IF +* + EMAX = ZERO + DO 120 K = 1, N + EMAX = MAX( EMAX, ABS( VR( K, KI-1 ) )+ + $ ABS( VR( K, KI ) ) ) + 120 CONTINUE + REMAX = ONE / EMAX + CALL SSCAL( N, REMAX, VR( 1, KI-1 ), 1 ) + CALL SSCAL( N, REMAX, VR( 1, KI ), 1 ) + END IF + END IF +* + IS = IS - 1 + IF( IP.NE.0 ) + $ IS = IS - 1 + 130 CONTINUE + IF( IP.EQ.1 ) + $ IP = 0 + IF( IP.EQ.-1 ) + $ IP = 1 + 140 CONTINUE + END IF +* + IF( LEFTV ) THEN +* +* Compute left eigenvectors. +* + IP = 0 + IS = 1 + DO 260 KI = 1, N +* + IF( IP.EQ.-1 ) + $ GO TO 250 + IF( KI.EQ.N ) + $ GO TO 150 + IF( T( KI+1, KI ).EQ.ZERO ) + $ GO TO 150 + IP = 1 +* + 150 CONTINUE + IF( SOMEV ) THEN + IF( .NOT.SELECT( KI ) ) + $ GO TO 250 + END IF +* +* Compute the KI-th eigenvalue (WR,WI). +* + WR = T( KI, KI ) + WI = ZERO + IF( IP.NE.0 ) + $ WI = SQRT( ABS( T( KI, KI+1 ) ) )* + $ SQRT( ABS( T( KI+1, KI ) ) ) + SMIN = MAX( ULP*( ABS( WR )+ABS( WI ) ), SMLNUM ) +* + IF( IP.EQ.0 ) THEN +* +* Real left eigenvector. +* + WORK( KI+N ) = ONE +* +* Form right-hand side +* + DO 160 K = KI + 1, N + WORK( K+N ) = -T( KI, K ) + 160 CONTINUE +* +* Solve the quasi-triangular system: +* (T(KI+1:N,KI+1:N) - WR)'*X = SCALE*WORK +* + VMAX = ONE + VCRIT = BIGNUM +* + JNXT = KI + 1 + DO 170 J = KI + 1, N + IF( J.LT.JNXT ) + $ GO TO 170 + J1 = J + J2 = J + JNXT = J + 1 + IF( J.LT.N ) THEN + IF( T( J+1, J ).NE.ZERO ) THEN + J2 = J + 1 + JNXT = J + 2 + END IF + END IF +* + IF( J1.EQ.J2 ) THEN +* +* 1-by-1 diagonal block +* +* Scale if necessary to avoid overflow when forming +* the right-hand side. +* + IF( WORK( J ).GT.VCRIT ) THEN + REC = ONE / VMAX + CALL SSCAL( N-KI+1, REC, WORK( KI+N ), 1 ) + VMAX = ONE + VCRIT = BIGNUM + END IF +* + WORK( J+N ) = WORK( J+N ) - + $ SDOT( J-KI-1, T( KI+1, J ), 1, + $ WORK( KI+1+N ), 1 ) +* +* Solve (T(J,J)-WR)'*X = WORK +* + CALL SLALN2( .FALSE., 1, 1, SMIN, ONE, T( J, J ), + $ LDT, ONE, ONE, WORK( J+N ), N, WR, + $ ZERO, X, 2, SCALE, XNORM, IERR ) +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) + $ CALL SSCAL( N-KI+1, SCALE, WORK( KI+N ), 1 ) + WORK( J+N ) = X( 1, 1 ) + VMAX = MAX( ABS( WORK( J+N ) ), VMAX ) + VCRIT = BIGNUM / VMAX +* + ELSE +* +* 2-by-2 diagonal block +* +* Scale if necessary to avoid overflow when forming +* the right-hand side. +* + BETA = MAX( WORK( J ), WORK( J+1 ) ) + IF( BETA.GT.VCRIT ) THEN + REC = ONE / VMAX + CALL SSCAL( N-KI+1, REC, WORK( KI+N ), 1 ) + VMAX = ONE + VCRIT = BIGNUM + END IF +* + WORK( J+N ) = WORK( J+N ) - + $ SDOT( J-KI-1, T( KI+1, J ), 1, + $ WORK( KI+1+N ), 1 ) +* + WORK( J+1+N ) = WORK( J+1+N ) - + $ SDOT( J-KI-1, T( KI+1, J+1 ), 1, + $ WORK( KI+1+N ), 1 ) +* +* Solve +* [T(J,J)-WR T(J,J+1) ]'* X = SCALE*( WORK1 ) +* [T(J+1,J) T(J+1,J+1)-WR] ( WORK2 ) +* + CALL SLALN2( .TRUE., 2, 1, SMIN, ONE, T( J, J ), + $ LDT, ONE, ONE, WORK( J+N ), N, WR, + $ ZERO, X, 2, SCALE, XNORM, IERR ) +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) + $ CALL SSCAL( N-KI+1, SCALE, WORK( KI+N ), 1 ) + WORK( J+N ) = X( 1, 1 ) + WORK( J+1+N ) = X( 2, 1 ) +* + VMAX = MAX( ABS( WORK( J+N ) ), + $ ABS( WORK( J+1+N ) ), VMAX ) + VCRIT = BIGNUM / VMAX +* + END IF + 170 CONTINUE +* +* Copy the vector x or Q*x to VL and normalize. +* + IF( .NOT.OVER ) THEN + CALL SCOPY( N-KI+1, WORK( KI+N ), 1, VL( KI, IS ), 1 ) +* + II = ISAMAX( N-KI+1, VL( KI, IS ), 1 ) + KI - 1 + REMAX = ONE / ABS( VL( II, IS ) ) + CALL SSCAL( N-KI+1, REMAX, VL( KI, IS ), 1 ) +* + DO 180 K = 1, KI - 1 + VL( K, IS ) = ZERO + 180 CONTINUE +* + ELSE +* + IF( KI.LT.N ) + $ CALL SGEMV( 'N', N, N-KI, ONE, VL( 1, KI+1 ), LDVL, + $ WORK( KI+1+N ), 1, WORK( KI+N ), + $ VL( 1, KI ), 1 ) +* + II = ISAMAX( N, VL( 1, KI ), 1 ) + REMAX = ONE / ABS( VL( II, KI ) ) + CALL SSCAL( N, REMAX, VL( 1, KI ), 1 ) +* + END IF +* + ELSE +* +* Complex left eigenvector. +* +* Initial solve: +* ((T(KI,KI) T(KI,KI+1) )' - (WR - I* WI))*X = 0. +* ((T(KI+1,KI) T(KI+1,KI+1)) ) +* + IF( ABS( T( KI, KI+1 ) ).GE.ABS( T( KI+1, KI ) ) ) THEN + WORK( KI+N ) = WI / T( KI, KI+1 ) + WORK( KI+1+N2 ) = ONE + ELSE + WORK( KI+N ) = ONE + WORK( KI+1+N2 ) = -WI / T( KI+1, KI ) + END IF + WORK( KI+1+N ) = ZERO + WORK( KI+N2 ) = ZERO +* +* Form right-hand side +* + DO 190 K = KI + 2, N + WORK( K+N ) = -WORK( KI+N )*T( KI, K ) + WORK( K+N2 ) = -WORK( KI+1+N2 )*T( KI+1, K ) + 190 CONTINUE +* +* Solve complex quasi-triangular system: +* ( T(KI+2,N:KI+2,N) - (WR-i*WI) )*X = WORK1+i*WORK2 +* + VMAX = ONE + VCRIT = BIGNUM +* + JNXT = KI + 2 + DO 200 J = KI + 2, N + IF( J.LT.JNXT ) + $ GO TO 200 + J1 = J + J2 = J + JNXT = J + 1 + IF( J.LT.N ) THEN + IF( T( J+1, J ).NE.ZERO ) THEN + J2 = J + 1 + JNXT = J + 2 + END IF + END IF +* + IF( J1.EQ.J2 ) THEN +* +* 1-by-1 diagonal block +* +* Scale if necessary to avoid overflow when +* forming the right-hand side elements. +* + IF( WORK( J ).GT.VCRIT ) THEN + REC = ONE / VMAX + CALL SSCAL( N-KI+1, REC, WORK( KI+N ), 1 ) + CALL SSCAL( N-KI+1, REC, WORK( KI+N2 ), 1 ) + VMAX = ONE + VCRIT = BIGNUM + END IF +* + WORK( J+N ) = WORK( J+N ) - + $ SDOT( J-KI-2, T( KI+2, J ), 1, + $ WORK( KI+2+N ), 1 ) + WORK( J+N2 ) = WORK( J+N2 ) - + $ SDOT( J-KI-2, T( KI+2, J ), 1, + $ WORK( KI+2+N2 ), 1 ) +* +* Solve (T(J,J)-(WR-i*WI))*(X11+i*X12)= WK+I*WK2 +* + CALL SLALN2( .FALSE., 1, 2, SMIN, ONE, T( J, J ), + $ LDT, ONE, ONE, WORK( J+N ), N, WR, + $ -WI, X, 2, SCALE, XNORM, IERR ) +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) THEN + CALL SSCAL( N-KI+1, SCALE, WORK( KI+N ), 1 ) + CALL SSCAL( N-KI+1, SCALE, WORK( KI+N2 ), 1 ) + END IF + WORK( J+N ) = X( 1, 1 ) + WORK( J+N2 ) = X( 1, 2 ) + VMAX = MAX( ABS( WORK( J+N ) ), + $ ABS( WORK( J+N2 ) ), VMAX ) + VCRIT = BIGNUM / VMAX +* + ELSE +* +* 2-by-2 diagonal block +* +* Scale if necessary to avoid overflow when forming +* the right-hand side elements. +* + BETA = MAX( WORK( J ), WORK( J+1 ) ) + IF( BETA.GT.VCRIT ) THEN + REC = ONE / VMAX + CALL SSCAL( N-KI+1, REC, WORK( KI+N ), 1 ) + CALL SSCAL( N-KI+1, REC, WORK( KI+N2 ), 1 ) + VMAX = ONE + VCRIT = BIGNUM + END IF +* + WORK( J+N ) = WORK( J+N ) - + $ SDOT( J-KI-2, T( KI+2, J ), 1, + $ WORK( KI+2+N ), 1 ) +* + WORK( J+N2 ) = WORK( J+N2 ) - + $ SDOT( J-KI-2, T( KI+2, J ), 1, + $ WORK( KI+2+N2 ), 1 ) +* + WORK( J+1+N ) = WORK( J+1+N ) - + $ SDOT( J-KI-2, T( KI+2, J+1 ), 1, + $ WORK( KI+2+N ), 1 ) +* + WORK( J+1+N2 ) = WORK( J+1+N2 ) - + $ SDOT( J-KI-2, T( KI+2, J+1 ), 1, + $ WORK( KI+2+N2 ), 1 ) +* +* Solve 2-by-2 complex linear equation +* ([T(j,j) T(j,j+1) ]'-(wr-i*wi)*I)*X = SCALE*B +* ([T(j+1,j) T(j+1,j+1)] ) +* + CALL SLALN2( .TRUE., 2, 2, SMIN, ONE, T( J, J ), + $ LDT, ONE, ONE, WORK( J+N ), N, WR, + $ -WI, X, 2, SCALE, XNORM, IERR ) +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) THEN + CALL SSCAL( N-KI+1, SCALE, WORK( KI+N ), 1 ) + CALL SSCAL( N-KI+1, SCALE, WORK( KI+N2 ), 1 ) + END IF + WORK( J+N ) = X( 1, 1 ) + WORK( J+N2 ) = X( 1, 2 ) + WORK( J+1+N ) = X( 2, 1 ) + WORK( J+1+N2 ) = X( 2, 2 ) + VMAX = MAX( ABS( X( 1, 1 ) ), ABS( X( 1, 2 ) ), + $ ABS( X( 2, 1 ) ), ABS( X( 2, 2 ) ), VMAX ) + VCRIT = BIGNUM / VMAX +* + END IF + 200 CONTINUE +* +* Copy the vector x or Q*x to VL and normalize. +* + IF( .NOT.OVER ) THEN + CALL SCOPY( N-KI+1, WORK( KI+N ), 1, VL( KI, IS ), 1 ) + CALL SCOPY( N-KI+1, WORK( KI+N2 ), 1, VL( KI, IS+1 ), + $ 1 ) +* + EMAX = ZERO + DO 220 K = KI, N + EMAX = MAX( EMAX, ABS( VL( K, IS ) )+ + $ ABS( VL( K, IS+1 ) ) ) + 220 CONTINUE + REMAX = ONE / EMAX + CALL SSCAL( N-KI+1, REMAX, VL( KI, IS ), 1 ) + CALL SSCAL( N-KI+1, REMAX, VL( KI, IS+1 ), 1 ) +* + DO 230 K = 1, KI - 1 + VL( K, IS ) = ZERO + VL( K, IS+1 ) = ZERO + 230 CONTINUE + ELSE + IF( KI.LT.N-1 ) THEN + CALL SGEMV( 'N', N, N-KI-1, ONE, VL( 1, KI+2 ), + $ LDVL, WORK( KI+2+N ), 1, WORK( KI+N ), + $ VL( 1, KI ), 1 ) + CALL SGEMV( 'N', N, N-KI-1, ONE, VL( 1, KI+2 ), + $ LDVL, WORK( KI+2+N2 ), 1, + $ WORK( KI+1+N2 ), VL( 1, KI+1 ), 1 ) + ELSE + CALL SSCAL( N, WORK( KI+N ), VL( 1, KI ), 1 ) + CALL SSCAL( N, WORK( KI+1+N2 ), VL( 1, KI+1 ), 1 ) + END IF +* + EMAX = ZERO + DO 240 K = 1, N + EMAX = MAX( EMAX, ABS( VL( K, KI ) )+ + $ ABS( VL( K, KI+1 ) ) ) + 240 CONTINUE + REMAX = ONE / EMAX + CALL SSCAL( N, REMAX, VL( 1, KI ), 1 ) + CALL SSCAL( N, REMAX, VL( 1, KI+1 ), 1 ) +* + END IF +* + END IF +* + IS = IS + 1 + IF( IP.NE.0 ) + $ IS = IS + 1 + 250 CONTINUE + IF( IP.EQ.-1 ) + $ IP = 0 + IF( IP.EQ.1 ) + $ IP = -1 +* + 260 CONTINUE +* + END IF +* + RETURN +* +* End of STREVC +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/strexc.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/strexc.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,345 @@ + SUBROUTINE STREXC( COMPQ, N, T, LDT, Q, LDQ, IFST, ILST, WORK, + $ INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER COMPQ + INTEGER IFST, ILST, INFO, LDQ, LDT, N +* .. +* .. Array Arguments .. + REAL Q( LDQ, * ), T( LDT, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* STREXC reorders the real Schur factorization of a real matrix +* A = Q*T*Q**T, so that the diagonal block of T with row index IFST is +* moved to row ILST. +* +* The real Schur form T is reordered by an orthogonal similarity +* transformation Z**T*T*Z, and optionally the matrix Q of Schur vectors +* is updated by postmultiplying it with Z. +* +* T must be in Schur canonical form (as returned by SHSEQR), that is, +* block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each +* 2-by-2 diagonal block has its diagonal elements equal and its +* off-diagonal elements of opposite sign. +* +* Arguments +* ========= +* +* COMPQ (input) CHARACTER*1 +* = 'V': update the matrix Q of Schur vectors; +* = 'N': do not update Q. +* +* N (input) INTEGER +* The order of the matrix T. N >= 0. +* +* T (input/output) REAL array, dimension (LDT,N) +* On entry, the upper quasi-triangular matrix T, in Schur +* Schur canonical form. +* On exit, the reordered upper quasi-triangular matrix, again +* in Schur canonical form. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= max(1,N). +* +* Q (input/output) REAL array, dimension (LDQ,N) +* On entry, if COMPQ = 'V', the matrix Q of Schur vectors. +* On exit, if COMPQ = 'V', Q has been postmultiplied by the +* orthogonal transformation matrix Z which reorders T. +* If COMPQ = 'N', Q is not referenced. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= max(1,N). +* +* IFST (input/output) INTEGER +* ILST (input/output) INTEGER +* Specify the reordering of the diagonal blocks of T. +* The block with row index IFST is moved to row ILST, by a +* sequence of transpositions between adjacent blocks. +* On exit, if IFST pointed on entry to the second row of a +* 2-by-2 block, it is changed to point to the first row; ILST +* always points to the first row of the block in its final +* position (which may differ from its input value by +1 or -1). +* 1 <= IFST <= N; 1 <= ILST <= N. +* +* WORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* = 1: two adjacent blocks were too close to swap (the problem +* is very ill-conditioned); T may have been partially +* reordered, and ILST points to the first row of the +* current position of the block being moved. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL WANTQ + INTEGER HERE, NBF, NBL, NBNEXT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SLAEXC, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Decode and test the input arguments. +* + INFO = 0 + WANTQ = LSAME( COMPQ, 'V' ) + IF( .NOT.WANTQ .AND. .NOT.LSAME( COMPQ, 'N' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDT.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.MAX( 1, N ) ) ) THEN + INFO = -6 + ELSE IF( IFST.LT.1 .OR. IFST.GT.N ) THEN + INFO = -7 + ELSE IF( ILST.LT.1 .OR. ILST.GT.N ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STREXC', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.1 ) + $ RETURN +* +* Determine the first row of specified block +* and find out it is 1 by 1 or 2 by 2. +* + IF( IFST.GT.1 ) THEN + IF( T( IFST, IFST-1 ).NE.ZERO ) + $ IFST = IFST - 1 + END IF + NBF = 1 + IF( IFST.LT.N ) THEN + IF( T( IFST+1, IFST ).NE.ZERO ) + $ NBF = 2 + END IF +* +* Determine the first row of the final block +* and find out it is 1 by 1 or 2 by 2. +* + IF( ILST.GT.1 ) THEN + IF( T( ILST, ILST-1 ).NE.ZERO ) + $ ILST = ILST - 1 + END IF + NBL = 1 + IF( ILST.LT.N ) THEN + IF( T( ILST+1, ILST ).NE.ZERO ) + $ NBL = 2 + END IF +* + IF( IFST.EQ.ILST ) + $ RETURN +* + IF( IFST.LT.ILST ) THEN +* +* Update ILST +* + IF( NBF.EQ.2 .AND. NBL.EQ.1 ) + $ ILST = ILST - 1 + IF( NBF.EQ.1 .AND. NBL.EQ.2 ) + $ ILST = ILST + 1 +* + HERE = IFST +* + 10 CONTINUE +* +* Swap block with next one below +* + IF( NBF.EQ.1 .OR. NBF.EQ.2 ) THEN +* +* Current block either 1 by 1 or 2 by 2 +* + NBNEXT = 1 + IF( HERE+NBF+1.LE.N ) THEN + IF( T( HERE+NBF+1, HERE+NBF ).NE.ZERO ) + $ NBNEXT = 2 + END IF + CALL SLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE, NBF, NBNEXT, + $ WORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE + NBNEXT +* +* Test if 2 by 2 block breaks into two 1 by 1 blocks +* + IF( NBF.EQ.2 ) THEN + IF( T( HERE+1, HERE ).EQ.ZERO ) + $ NBF = 3 + END IF +* + ELSE +* +* Current block consists of two 1 by 1 blocks each of which +* must be swapped individually +* + NBNEXT = 1 + IF( HERE+3.LE.N ) THEN + IF( T( HERE+3, HERE+2 ).NE.ZERO ) + $ NBNEXT = 2 + END IF + CALL SLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE+1, 1, NBNEXT, + $ WORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + IF( NBNEXT.EQ.1 ) THEN +* +* Swap two 1 by 1 blocks, no problems possible +* + CALL SLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE, 1, NBNEXT, + $ WORK, INFO ) + HERE = HERE + 1 + ELSE +* +* Recompute NBNEXT in case 2 by 2 split +* + IF( T( HERE+2, HERE+1 ).EQ.ZERO ) + $ NBNEXT = 1 + IF( NBNEXT.EQ.2 ) THEN +* +* 2 by 2 Block did not split +* + CALL SLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE, 1, + $ NBNEXT, WORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE + 2 + ELSE +* +* 2 by 2 Block did split +* + CALL SLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE, 1, 1, + $ WORK, INFO ) + CALL SLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE+1, 1, 1, + $ WORK, INFO ) + HERE = HERE + 2 + END IF + END IF + END IF + IF( HERE.LT.ILST ) + $ GO TO 10 +* + ELSE +* + HERE = IFST + 20 CONTINUE +* +* Swap block with next one above +* + IF( NBF.EQ.1 .OR. NBF.EQ.2 ) THEN +* +* Current block either 1 by 1 or 2 by 2 +* + NBNEXT = 1 + IF( HERE.GE.3 ) THEN + IF( T( HERE-1, HERE-2 ).NE.ZERO ) + $ NBNEXT = 2 + END IF + CALL SLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE-NBNEXT, NBNEXT, + $ NBF, WORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE - NBNEXT +* +* Test if 2 by 2 block breaks into two 1 by 1 blocks +* + IF( NBF.EQ.2 ) THEN + IF( T( HERE+1, HERE ).EQ.ZERO ) + $ NBF = 3 + END IF +* + ELSE +* +* Current block consists of two 1 by 1 blocks each of which +* must be swapped individually +* + NBNEXT = 1 + IF( HERE.GE.3 ) THEN + IF( T( HERE-1, HERE-2 ).NE.ZERO ) + $ NBNEXT = 2 + END IF + CALL SLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE-NBNEXT, NBNEXT, + $ 1, WORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + IF( NBNEXT.EQ.1 ) THEN +* +* Swap two 1 by 1 blocks, no problems possible +* + CALL SLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE, NBNEXT, 1, + $ WORK, INFO ) + HERE = HERE - 1 + ELSE +* +* Recompute NBNEXT in case 2 by 2 split +* + IF( T( HERE, HERE-1 ).EQ.ZERO ) + $ NBNEXT = 1 + IF( NBNEXT.EQ.2 ) THEN +* +* 2 by 2 Block did not split +* + CALL SLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE-1, 2, 1, + $ WORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE - 2 + ELSE +* +* 2 by 2 Block did split +* + CALL SLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE, 1, 1, + $ WORK, INFO ) + CALL SLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE-1, 1, 1, + $ WORK, INFO ) + HERE = HERE - 2 + END IF + END IF + END IF + IF( HERE.GT.ILST ) + $ GO TO 20 + END IF + ILST = HERE +* + RETURN +* +* End of STREXC +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/strsen.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/strsen.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,461 @@ + SUBROUTINE STRSEN( JOB, COMPQ, SELECT, N, T, LDT, Q, LDQ, WR, WI, + $ M, S, SEP, WORK, LWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* Modified to call SLACN2 in place of SLACON, 7 Feb 03, SJH. +* +* .. Scalar Arguments .. + CHARACTER COMPQ, JOB + INTEGER INFO, LDQ, LDT, LIWORK, LWORK, M, N + REAL S, SEP +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + INTEGER IWORK( * ) + REAL Q( LDQ, * ), T( LDT, * ), WI( * ), WORK( * ), + $ WR( * ) +* .. +* +* Purpose +* ======= +* +* STRSEN reorders the real Schur factorization of a real matrix +* A = Q*T*Q**T, so that a selected cluster of eigenvalues appears in +* the leading diagonal blocks of the upper quasi-triangular matrix T, +* and the leading columns of Q form an orthonormal basis of the +* corresponding right invariant subspace. +* +* Optionally the routine computes the reciprocal condition numbers of +* the cluster of eigenvalues and/or the invariant subspace. +* +* T must be in Schur canonical form (as returned by SHSEQR), that is, +* block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each +* 2-by-2 diagonal block has its diagonal elemnts equal and its +* off-diagonal elements of opposite sign. +* +* Arguments +* ========= +* +* JOB (input) CHARACTER*1 +* Specifies whether condition numbers are required for the +* cluster of eigenvalues (S) or the invariant subspace (SEP): +* = 'N': none; +* = 'E': for eigenvalues only (S); +* = 'V': for invariant subspace only (SEP); +* = 'B': for both eigenvalues and invariant subspace (S and +* SEP). +* +* COMPQ (input) CHARACTER*1 +* = 'V': update the matrix Q of Schur vectors; +* = 'N': do not update Q. +* +* SELECT (input) LOGICAL array, dimension (N) +* SELECT specifies the eigenvalues in the selected cluster. To +* select a real eigenvalue w(j), SELECT(j) must be set to +* .TRUE.. To select a complex conjugate pair of eigenvalues +* w(j) and w(j+1), corresponding to a 2-by-2 diagonal block, +* either SELECT(j) or SELECT(j+1) or both must be set to +* .TRUE.; a complex conjugate pair of eigenvalues must be +* either both included in the cluster or both excluded. +* +* N (input) INTEGER +* The order of the matrix T. N >= 0. +* +* T (input/output) REAL array, dimension (LDT,N) +* On entry, the upper quasi-triangular matrix T, in Schur +* canonical form. +* On exit, T is overwritten by the reordered matrix T, again in +* Schur canonical form, with the selected eigenvalues in the +* leading diagonal blocks. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= max(1,N). +* +* Q (input/output) REAL array, dimension (LDQ,N) +* On entry, if COMPQ = 'V', the matrix Q of Schur vectors. +* On exit, if COMPQ = 'V', Q has been postmultiplied by the +* orthogonal transformation matrix which reorders T; the +* leading M columns of Q form an orthonormal basis for the +* specified invariant subspace. +* If COMPQ = 'N', Q is not referenced. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. +* LDQ >= 1; and if COMPQ = 'V', LDQ >= N. +* +* WR (output) REAL array, dimension (N) +* WI (output) REAL array, dimension (N) +* The real and imaginary parts, respectively, of the reordered +* eigenvalues of T. The eigenvalues are stored in the same +* order as on the diagonal of T, with WR(i) = T(i,i) and, if +* T(i:i+1,i:i+1) is a 2-by-2 diagonal block, WI(i) > 0 and +* WI(i+1) = -WI(i). Note that if a complex eigenvalue is +* sufficiently ill-conditioned, then its value may differ +* significantly from its value before reordering. +* +* M (output) INTEGER +* The dimension of the specified invariant subspace. +* 0 < = M <= N. +* +* S (output) REAL +* If JOB = 'E' or 'B', S is a lower bound on the reciprocal +* condition number for the selected cluster of eigenvalues. +* S cannot underestimate the true reciprocal condition number +* by more than a factor of sqrt(N). If M = 0 or N, S = 1. +* If JOB = 'N' or 'V', S is not referenced. +* +* SEP (output) REAL +* If JOB = 'V' or 'B', SEP is the estimated reciprocal +* condition number of the specified invariant subspace. If +* M = 0 or N, SEP = norm(T). +* If JOB = 'N' or 'E', SEP is not referenced. +* +* WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If JOB = 'N', LWORK >= max(1,N); +* if JOB = 'E', LWORK >= max(1,M*(N-M)); +* if JOB = 'V' or 'B', LWORK >= max(1,2*M*(N-M)). +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* IWORK (workspace) INTEGER array, dimension (MAX(1,LIWORK)) +* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +* +* LIWORK (input) INTEGER +* The dimension of the array IWORK. +* If JOB = 'N' or 'E', LIWORK >= 1; +* if JOB = 'V' or 'B', LIWORK >= max(1,M*(N-M)). +* +* If LIWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the IWORK array, +* returns this value as the first entry of the IWORK array, and +* no error message related to LIWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* = 1: reordering of T failed because some eigenvalues are too +* close to separate (the problem is very ill-conditioned); +* T may have been partially reordered, and WR and WI +* contain the eigenvalues in the same order as in T; S and +* SEP (if requested) are set to zero. +* +* Further Details +* =============== +* +* STRSEN first collects the selected eigenvalues by computing an +* orthogonal transformation Z to move them to the top left corner of T. +* In other words, the selected eigenvalues are the eigenvalues of T11 +* in: +* +* Z'*T*Z = ( T11 T12 ) n1 +* ( 0 T22 ) n2 +* n1 n2 +* +* where N = n1+n2 and Z' means the transpose of Z. The first n1 columns +* of Z span the specified invariant subspace of T. +* +* If T has been obtained from the real Schur factorization of a matrix +* A = Q*T*Q', then the reordered real Schur factorization of A is given +* by A = (Q*Z)*(Z'*T*Z)*(Q*Z)', and the first n1 columns of Q*Z span +* the corresponding invariant subspace of A. +* +* The reciprocal condition number of the average of the eigenvalues of +* T11 may be returned in S. S lies between 0 (very badly conditioned) +* and 1 (very well conditioned). It is computed as follows. First we +* compute R so that +* +* P = ( I R ) n1 +* ( 0 0 ) n2 +* n1 n2 +* +* is the projector on the invariant subspace associated with T11. +* R is the solution of the Sylvester equation: +* +* T11*R - R*T22 = T12. +* +* Let F-norm(M) denote the Frobenius-norm of M and 2-norm(M) denote +* the two-norm of M. Then S is computed as the lower bound +* +* (1 + F-norm(R)**2)**(-1/2) +* +* on the reciprocal of 2-norm(P), the true reciprocal condition number. +* S cannot underestimate 1 / 2-norm(P) by more than a factor of +* sqrt(N). +* +* An approximate error bound for the computed average of the +* eigenvalues of T11 is +* +* EPS * norm(T) / S +* +* where EPS is the machine precision. +* +* The reciprocal condition number of the right invariant subspace +* spanned by the first n1 columns of Z (or of Q*Z) is returned in SEP. +* SEP is defined as the separation of T11 and T22: +* +* sep( T11, T22 ) = sigma-min( C ) +* +* where sigma-min(C) is the smallest singular value of the +* n1*n2-by-n1*n2 matrix +* +* C = kprod( I(n2), T11 ) - kprod( transpose(T22), I(n1) ) +* +* I(m) is an m by m identity matrix, and kprod denotes the Kronecker +* product. We estimate sigma-min(C) by the reciprocal of an estimate of +* the 1-norm of inverse(C). The true reciprocal 1-norm of inverse(C) +* cannot differ from sigma-min(C) by more than a factor of sqrt(n1*n2). +* +* When SEP is small, small changes in T can cause large changes in +* the invariant subspace. An approximate bound on the maximum angular +* error in the computed right invariant subspace is +* +* EPS * norm(T) / SEP +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, PAIR, SWAP, WANTBH, WANTQ, WANTS, + $ WANTSP + INTEGER IERR, K, KASE, KK, KS, LIWMIN, LWMIN, N1, N2, + $ NN + REAL EST, RNORM, SCALE +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLANGE + EXTERNAL LSAME, SLANGE +* .. +* .. External Subroutines .. + EXTERNAL SLACN2, SLACPY, STREXC, STRSYL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + WANTBH = LSAME( JOB, 'B' ) + WANTS = LSAME( JOB, 'E' ) .OR. WANTBH + WANTSP = LSAME( JOB, 'V' ) .OR. WANTBH + WANTQ = LSAME( COMPQ, 'V' ) +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.WANTS .AND. .NOT.WANTSP ) + $ THEN + INFO = -1 + ELSE IF( .NOT.LSAME( COMPQ, 'N' ) .AND. .NOT.WANTQ ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDT.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN + INFO = -8 + ELSE +* +* Set M to the dimension of the specified invariant subspace, +* and test LWORK and LIWORK. +* + M = 0 + PAIR = .FALSE. + DO 10 K = 1, N + IF( PAIR ) THEN + PAIR = .FALSE. + ELSE + IF( K.LT.N ) THEN + IF( T( K+1, K ).EQ.ZERO ) THEN + IF( SELECT( K ) ) + $ M = M + 1 + ELSE + PAIR = .TRUE. + IF( SELECT( K ) .OR. SELECT( K+1 ) ) + $ M = M + 2 + END IF + ELSE + IF( SELECT( N ) ) + $ M = M + 1 + END IF + END IF + 10 CONTINUE +* + N1 = M + N2 = N - M + NN = N1*N2 +* + IF( WANTSP ) THEN + LWMIN = MAX( 1, 2*NN ) + LIWMIN = MAX( 1, NN ) + ELSE IF( LSAME( JOB, 'N' ) ) THEN + LWMIN = MAX( 1, N ) + LIWMIN = 1 + ELSE IF( LSAME( JOB, 'E' ) ) THEN + LWMIN = MAX( 1, NN ) + LIWMIN = 1 + END IF +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -15 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -17 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STRSEN', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible. +* + IF( M.EQ.N .OR. M.EQ.0 ) THEN + IF( WANTS ) + $ S = ONE + IF( WANTSP ) + $ SEP = SLANGE( '1', N, N, T, LDT, WORK ) + GO TO 40 + END IF +* +* Collect the selected blocks at the top-left corner of T. +* + KS = 0 + PAIR = .FALSE. + DO 20 K = 1, N + IF( PAIR ) THEN + PAIR = .FALSE. + ELSE + SWAP = SELECT( K ) + IF( K.LT.N ) THEN + IF( T( K+1, K ).NE.ZERO ) THEN + PAIR = .TRUE. + SWAP = SWAP .OR. SELECT( K+1 ) + END IF + END IF + IF( SWAP ) THEN + KS = KS + 1 +* +* Swap the K-th block to position KS. +* + IERR = 0 + KK = K + IF( K.NE.KS ) + $ CALL STREXC( COMPQ, N, T, LDT, Q, LDQ, KK, KS, WORK, + $ IERR ) + IF( IERR.EQ.1 .OR. IERR.EQ.2 ) THEN +* +* Blocks too close to swap: exit. +* + INFO = 1 + IF( WANTS ) + $ S = ZERO + IF( WANTSP ) + $ SEP = ZERO + GO TO 40 + END IF + IF( PAIR ) + $ KS = KS + 1 + END IF + END IF + 20 CONTINUE +* + IF( WANTS ) THEN +* +* Solve Sylvester equation for R: +* +* T11*R - R*T22 = scale*T12 +* + CALL SLACPY( 'F', N1, N2, T( 1, N1+1 ), LDT, WORK, N1 ) + CALL STRSYL( 'N', 'N', -1, N1, N2, T, LDT, T( N1+1, N1+1 ), + $ LDT, WORK, N1, SCALE, IERR ) +* +* Estimate the reciprocal of the condition number of the cluster +* of eigenvalues. +* + RNORM = SLANGE( 'F', N1, N2, WORK, N1, WORK ) + IF( RNORM.EQ.ZERO ) THEN + S = ONE + ELSE + S = SCALE / ( SQRT( SCALE*SCALE / RNORM+RNORM )* + $ SQRT( RNORM ) ) + END IF + END IF +* + IF( WANTSP ) THEN +* +* Estimate sep(T11,T22). +* + EST = ZERO + KASE = 0 + 30 CONTINUE + CALL SLACN2( NN, WORK( NN+1 ), WORK, IWORK, EST, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Solve T11*R - R*T22 = scale*X. +* + CALL STRSYL( 'N', 'N', -1, N1, N2, T, LDT, + $ T( N1+1, N1+1 ), LDT, WORK, N1, SCALE, + $ IERR ) + ELSE +* +* Solve T11'*R - R*T22' = scale*X. +* + CALL STRSYL( 'T', 'T', -1, N1, N2, T, LDT, + $ T( N1+1, N1+1 ), LDT, WORK, N1, SCALE, + $ IERR ) + END IF + GO TO 30 + END IF +* + SEP = SCALE / EST + END IF +* + 40 CONTINUE +* +* Store the output eigenvalues in WR and WI. +* + DO 50 K = 1, N + WR( K ) = T( K, K ) + WI( K ) = ZERO + 50 CONTINUE + DO 60 K = 1, N - 1 + IF( T( K+1, K ).NE.ZERO ) THEN + WI( K ) = SQRT( ABS( T( K, K+1 ) ) )* + $ SQRT( ABS( T( K+1, K ) ) ) + WI( K+1 ) = -WI( K ) + END IF + 60 CONTINUE +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of STRSEN +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/strsyl.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/strsyl.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,913 @@ + SUBROUTINE STRSYL( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C, + $ LDC, SCALE, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER TRANA, TRANB + INTEGER INFO, ISGN, LDA, LDB, LDC, M, N + REAL SCALE +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* STRSYL solves the real Sylvester matrix equation: +* +* op(A)*X + X*op(B) = scale*C or +* op(A)*X - X*op(B) = scale*C, +* +* where op(A) = A or A**T, and A and B are both upper quasi- +* triangular. A is M-by-M and B is N-by-N; the right hand side C and +* the solution X are M-by-N; and scale is an output scale factor, set +* <= 1 to avoid overflow in X. +* +* A and B must be in Schur canonical form (as returned by SHSEQR), that +* is, block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; +* each 2-by-2 diagonal block has its diagonal elements equal and its +* off-diagonal elements of opposite sign. +* +* Arguments +* ========= +* +* TRANA (input) CHARACTER*1 +* Specifies the option op(A): +* = 'N': op(A) = A (No transpose) +* = 'T': op(A) = A**T (Transpose) +* = 'C': op(A) = A**H (Conjugate transpose = Transpose) +* +* TRANB (input) CHARACTER*1 +* Specifies the option op(B): +* = 'N': op(B) = B (No transpose) +* = 'T': op(B) = B**T (Transpose) +* = 'C': op(B) = B**H (Conjugate transpose = Transpose) +* +* ISGN (input) INTEGER +* Specifies the sign in the equation: +* = +1: solve op(A)*X + X*op(B) = scale*C +* = -1: solve op(A)*X - X*op(B) = scale*C +* +* M (input) INTEGER +* The order of the matrix A, and the number of rows in the +* matrices X and C. M >= 0. +* +* N (input) INTEGER +* The order of the matrix B, and the number of columns in the +* matrices X and C. N >= 0. +* +* A (input) REAL array, dimension (LDA,M) +* The upper quasi-triangular matrix A, in Schur canonical form. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (input) REAL array, dimension (LDB,N) +* The upper quasi-triangular matrix B, in Schur canonical form. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* C (input/output) REAL array, dimension (LDC,N) +* On entry, the M-by-N right hand side matrix C. +* On exit, C is overwritten by the solution matrix X. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M) +* +* SCALE (output) REAL +* The scale factor, scale, set <= 1 to avoid overflow in X. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* = 1: A and B have common or very close eigenvalues; perturbed +* values were used to solve the equation (but the matrices +* A and B are unchanged). +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRNA, NOTRNB + INTEGER IERR, J, K, K1, K2, KNEXT, L, L1, L2, LNEXT + REAL A11, BIGNUM, DA11, DB, EPS, SCALOC, SGN, SMIN, + $ SMLNUM, SUML, SUMR, XNORM +* .. +* .. Local Arrays .. + REAL DUM( 1 ), VEC( 2, 2 ), X( 2, 2 ) +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SDOT, SLAMCH, SLANGE + EXTERNAL LSAME, SDOT, SLAMCH, SLANGE +* .. +* .. External Subroutines .. + EXTERNAL SLABAD, SLALN2, SLASY2, SSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, REAL +* .. +* .. Executable Statements .. +* +* Decode and Test input parameters +* + NOTRNA = LSAME( TRANA, 'N' ) + NOTRNB = LSAME( TRANB, 'N' ) +* + INFO = 0 + IF( .NOT.NOTRNA .AND. .NOT.LSAME( TRANA, 'T' ) .AND. .NOT. + $ LSAME( TRANA, 'C' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRNB .AND. .NOT.LSAME( TRANB, 'T' ) .AND. .NOT. + $ LSAME( TRANB, 'C' ) ) THEN + INFO = -2 + ELSE IF( ISGN.NE.1 .AND. ISGN.NE.-1 ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STRSYL', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Set constants to control overflow +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) + SMLNUM = SMLNUM*REAL( M*N ) / EPS + BIGNUM = ONE / SMLNUM +* + SMIN = MAX( SMLNUM, EPS*SLANGE( 'M', M, M, A, LDA, DUM ), + $ EPS*SLANGE( 'M', N, N, B, LDB, DUM ) ) +* + SCALE = ONE + SGN = ISGN +* + IF( NOTRNA .AND. NOTRNB ) THEN +* +* Solve A*X + ISGN*X*B = scale*C. +* +* The (K,L)th block of X is determined starting from +* bottom-left corner column by column by +* +* A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L) +* +* Where +* M L-1 +* R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(J,L)]. +* I=K+1 J=1 +* +* Start column loop (index = L) +* L1 (L2) : column index of the first (first) row of X(K,L). +* + LNEXT = 1 + DO 70 L = 1, N + IF( L.LT.LNEXT ) + $ GO TO 70 + IF( L.EQ.N ) THEN + L1 = L + L2 = L + ELSE + IF( B( L+1, L ).NE.ZERO ) THEN + L1 = L + L2 = L + 1 + LNEXT = L + 2 + ELSE + L1 = L + L2 = L + LNEXT = L + 1 + END IF + END IF +* +* Start row loop (index = K) +* K1 (K2): row index of the first (last) row of X(K,L). +* + KNEXT = M + DO 60 K = M, 1, -1 + IF( K.GT.KNEXT ) + $ GO TO 60 + IF( K.EQ.1 ) THEN + K1 = K + K2 = K + ELSE + IF( A( K, K-1 ).NE.ZERO ) THEN + K1 = K - 1 + K2 = K + KNEXT = K - 2 + ELSE + K1 = K + K2 = K + KNEXT = K - 1 + END IF + END IF +* + IF( L1.EQ.L2 .AND. K1.EQ.K2 ) THEN + SUML = SDOT( M-K1, A( K1, MIN( K1+1, M ) ), LDA, + $ C( MIN( K1+1, M ), L1 ), 1 ) + SUMR = SDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) + SCALOC = ONE +* + A11 = A( K1, K1 ) + SGN*B( L1, L1 ) + DA11 = ABS( A11 ) + IF( DA11.LE.SMIN ) THEN + A11 = SMIN + DA11 = SMIN + INFO = 1 + END IF + DB = ABS( VEC( 1, 1 ) ) + IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN + IF( DB.GT.BIGNUM*DA11 ) + $ SCALOC = ONE / DB + END IF + X( 1, 1 ) = ( VEC( 1, 1 )*SCALOC ) / A11 +* + IF( SCALOC.NE.ONE ) THEN + DO 10 J = 1, N + CALL SSCAL( M, SCALOC, C( 1, J ), 1 ) + 10 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) +* + ELSE IF( L1.EQ.L2 .AND. K1.NE.K2 ) THEN +* + SUML = SDOT( M-K2, A( K1, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L1 ), 1 ) + SUMR = SDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = SDOT( M-K2, A( K2, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L1 ), 1 ) + SUMR = SDOT( L1-1, C( K2, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR ) +* + CALL SLALN2( .FALSE., 2, 1, SMIN, ONE, A( K1, K1 ), + $ LDA, ONE, ONE, VEC, 2, -SGN*B( L1, L1 ), + $ ZERO, X, 2, SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 20 J = 1, N + CALL SSCAL( M, SCALOC, C( 1, J ), 1 ) + 20 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K2, L1 ) = X( 2, 1 ) +* + ELSE IF( L1.NE.L2 .AND. K1.EQ.K2 ) THEN +* + SUML = SDOT( M-K1, A( K1, MIN( K1+1, M ) ), LDA, + $ C( MIN( K1+1, M ), L1 ), 1 ) + SUMR = SDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 1, 1 ) = SGN*( C( K1, L1 )-( SUML+SGN*SUMR ) ) +* + SUML = SDOT( M-K1, A( K1, MIN( K1+1, M ) ), LDA, + $ C( MIN( K1+1, M ), L2 ), 1 ) + SUMR = SDOT( L1-1, C( K1, 1 ), LDC, B( 1, L2 ), 1 ) + VEC( 2, 1 ) = SGN*( C( K1, L2 )-( SUML+SGN*SUMR ) ) +* + CALL SLALN2( .TRUE., 2, 1, SMIN, ONE, B( L1, L1 ), + $ LDB, ONE, ONE, VEC, 2, -SGN*A( K1, K1 ), + $ ZERO, X, 2, SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 40 J = 1, N + CALL SSCAL( M, SCALOC, C( 1, J ), 1 ) + 40 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K1, L2 ) = X( 2, 1 ) +* + ELSE IF( L1.NE.L2 .AND. K1.NE.K2 ) THEN +* + SUML = SDOT( M-K2, A( K1, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L1 ), 1 ) + SUMR = SDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = SDOT( M-K2, A( K1, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L2 ), 1 ) + SUMR = SDOT( L1-1, C( K1, 1 ), LDC, B( 1, L2 ), 1 ) + VEC( 1, 2 ) = C( K1, L2 ) - ( SUML+SGN*SUMR ) +* + SUML = SDOT( M-K2, A( K2, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L1 ), 1 ) + SUMR = SDOT( L1-1, C( K2, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = SDOT( M-K2, A( K2, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L2 ), 1 ) + SUMR = SDOT( L1-1, C( K2, 1 ), LDC, B( 1, L2 ), 1 ) + VEC( 2, 2 ) = C( K2, L2 ) - ( SUML+SGN*SUMR ) +* + CALL SLASY2( .FALSE., .FALSE., ISGN, 2, 2, + $ A( K1, K1 ), LDA, B( L1, L1 ), LDB, VEC, + $ 2, SCALOC, X, 2, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 50 J = 1, N + CALL SSCAL( M, SCALOC, C( 1, J ), 1 ) + 50 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K1, L2 ) = X( 1, 2 ) + C( K2, L1 ) = X( 2, 1 ) + C( K2, L2 ) = X( 2, 2 ) + END IF +* + 60 CONTINUE +* + 70 CONTINUE +* + ELSE IF( .NOT.NOTRNA .AND. NOTRNB ) THEN +* +* Solve A' *X + ISGN*X*B = scale*C. +* +* The (K,L)th block of X is determined starting from +* upper-left corner column by column by +* +* A(K,K)'*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L) +* +* Where +* K-1 L-1 +* R(K,L) = SUM [A(I,K)'*X(I,L)] +ISGN*SUM [X(K,J)*B(J,L)] +* I=1 J=1 +* +* Start column loop (index = L) +* L1 (L2): column index of the first (last) row of X(K,L) +* + LNEXT = 1 + DO 130 L = 1, N + IF( L.LT.LNEXT ) + $ GO TO 130 + IF( L.EQ.N ) THEN + L1 = L + L2 = L + ELSE + IF( B( L+1, L ).NE.ZERO ) THEN + L1 = L + L2 = L + 1 + LNEXT = L + 2 + ELSE + L1 = L + L2 = L + LNEXT = L + 1 + END IF + END IF +* +* Start row loop (index = K) +* K1 (K2): row index of the first (last) row of X(K,L) +* + KNEXT = 1 + DO 120 K = 1, M + IF( K.LT.KNEXT ) + $ GO TO 120 + IF( K.EQ.M ) THEN + K1 = K + K2 = K + ELSE + IF( A( K+1, K ).NE.ZERO ) THEN + K1 = K + K2 = K + 1 + KNEXT = K + 2 + ELSE + K1 = K + K2 = K + KNEXT = K + 1 + END IF + END IF +* + IF( L1.EQ.L2 .AND. K1.EQ.K2 ) THEN + SUML = SDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 ) + SUMR = SDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) + SCALOC = ONE +* + A11 = A( K1, K1 ) + SGN*B( L1, L1 ) + DA11 = ABS( A11 ) + IF( DA11.LE.SMIN ) THEN + A11 = SMIN + DA11 = SMIN + INFO = 1 + END IF + DB = ABS( VEC( 1, 1 ) ) + IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN + IF( DB.GT.BIGNUM*DA11 ) + $ SCALOC = ONE / DB + END IF + X( 1, 1 ) = ( VEC( 1, 1 )*SCALOC ) / A11 +* + IF( SCALOC.NE.ONE ) THEN + DO 80 J = 1, N + CALL SSCAL( M, SCALOC, C( 1, J ), 1 ) + 80 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) +* + ELSE IF( L1.EQ.L2 .AND. K1.NE.K2 ) THEN +* + SUML = SDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 ) + SUMR = SDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = SDOT( K1-1, A( 1, K2 ), 1, C( 1, L1 ), 1 ) + SUMR = SDOT( L1-1, C( K2, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR ) +* + CALL SLALN2( .TRUE., 2, 1, SMIN, ONE, A( K1, K1 ), + $ LDA, ONE, ONE, VEC, 2, -SGN*B( L1, L1 ), + $ ZERO, X, 2, SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 90 J = 1, N + CALL SSCAL( M, SCALOC, C( 1, J ), 1 ) + 90 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K2, L1 ) = X( 2, 1 ) +* + ELSE IF( L1.NE.L2 .AND. K1.EQ.K2 ) THEN +* + SUML = SDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 ) + SUMR = SDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 1, 1 ) = SGN*( C( K1, L1 )-( SUML+SGN*SUMR ) ) +* + SUML = SDOT( K1-1, A( 1, K1 ), 1, C( 1, L2 ), 1 ) + SUMR = SDOT( L1-1, C( K1, 1 ), LDC, B( 1, L2 ), 1 ) + VEC( 2, 1 ) = SGN*( C( K1, L2 )-( SUML+SGN*SUMR ) ) +* + CALL SLALN2( .TRUE., 2, 1, SMIN, ONE, B( L1, L1 ), + $ LDB, ONE, ONE, VEC, 2, -SGN*A( K1, K1 ), + $ ZERO, X, 2, SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 100 J = 1, N + CALL SSCAL( M, SCALOC, C( 1, J ), 1 ) + 100 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K1, L2 ) = X( 2, 1 ) +* + ELSE IF( L1.NE.L2 .AND. K1.NE.K2 ) THEN +* + SUML = SDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 ) + SUMR = SDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = SDOT( K1-1, A( 1, K1 ), 1, C( 1, L2 ), 1 ) + SUMR = SDOT( L1-1, C( K1, 1 ), LDC, B( 1, L2 ), 1 ) + VEC( 1, 2 ) = C( K1, L2 ) - ( SUML+SGN*SUMR ) +* + SUML = SDOT( K1-1, A( 1, K2 ), 1, C( 1, L1 ), 1 ) + SUMR = SDOT( L1-1, C( K2, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = SDOT( K1-1, A( 1, K2 ), 1, C( 1, L2 ), 1 ) + SUMR = SDOT( L1-1, C( K2, 1 ), LDC, B( 1, L2 ), 1 ) + VEC( 2, 2 ) = C( K2, L2 ) - ( SUML+SGN*SUMR ) +* + CALL SLASY2( .TRUE., .FALSE., ISGN, 2, 2, A( K1, K1 ), + $ LDA, B( L1, L1 ), LDB, VEC, 2, SCALOC, X, + $ 2, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 110 J = 1, N + CALL SSCAL( M, SCALOC, C( 1, J ), 1 ) + 110 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K1, L2 ) = X( 1, 2 ) + C( K2, L1 ) = X( 2, 1 ) + C( K2, L2 ) = X( 2, 2 ) + END IF +* + 120 CONTINUE + 130 CONTINUE +* + ELSE IF( .NOT.NOTRNA .AND. .NOT.NOTRNB ) THEN +* +* Solve A'*X + ISGN*X*B' = scale*C. +* +* The (K,L)th block of X is determined starting from +* top-right corner column by column by +* +* A(K,K)'*X(K,L) + ISGN*X(K,L)*B(L,L)' = C(K,L) - R(K,L) +* +* Where +* K-1 N +* R(K,L) = SUM [A(I,K)'*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)']. +* I=1 J=L+1 +* +* Start column loop (index = L) +* L1 (L2): column index of the first (last) row of X(K,L) +* + LNEXT = N + DO 190 L = N, 1, -1 + IF( L.GT.LNEXT ) + $ GO TO 190 + IF( L.EQ.1 ) THEN + L1 = L + L2 = L + ELSE + IF( B( L, L-1 ).NE.ZERO ) THEN + L1 = L - 1 + L2 = L + LNEXT = L - 2 + ELSE + L1 = L + L2 = L + LNEXT = L - 1 + END IF + END IF +* +* Start row loop (index = K) +* K1 (K2): row index of the first (last) row of X(K,L) +* + KNEXT = 1 + DO 180 K = 1, M + IF( K.LT.KNEXT ) + $ GO TO 180 + IF( K.EQ.M ) THEN + K1 = K + K2 = K + ELSE + IF( A( K+1, K ).NE.ZERO ) THEN + K1 = K + K2 = K + 1 + KNEXT = K + 2 + ELSE + K1 = K + K2 = K + KNEXT = K + 1 + END IF + END IF +* + IF( L1.EQ.L2 .AND. K1.EQ.K2 ) THEN + SUML = SDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 ) + SUMR = SDOT( N-L1, C( K1, MIN( L1+1, N ) ), LDC, + $ B( L1, MIN( L1+1, N ) ), LDB ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) + SCALOC = ONE +* + A11 = A( K1, K1 ) + SGN*B( L1, L1 ) + DA11 = ABS( A11 ) + IF( DA11.LE.SMIN ) THEN + A11 = SMIN + DA11 = SMIN + INFO = 1 + END IF + DB = ABS( VEC( 1, 1 ) ) + IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN + IF( DB.GT.BIGNUM*DA11 ) + $ SCALOC = ONE / DB + END IF + X( 1, 1 ) = ( VEC( 1, 1 )*SCALOC ) / A11 +* + IF( SCALOC.NE.ONE ) THEN + DO 140 J = 1, N + CALL SSCAL( M, SCALOC, C( 1, J ), 1 ) + 140 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) +* + ELSE IF( L1.EQ.L2 .AND. K1.NE.K2 ) THEN +* + SUML = SDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 ) + SUMR = SDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC, + $ B( L1, MIN( L2+1, N ) ), LDB ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = SDOT( K1-1, A( 1, K2 ), 1, C( 1, L1 ), 1 ) + SUMR = SDOT( N-L2, C( K2, MIN( L2+1, N ) ), LDC, + $ B( L1, MIN( L2+1, N ) ), LDB ) + VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR ) +* + CALL SLALN2( .TRUE., 2, 1, SMIN, ONE, A( K1, K1 ), + $ LDA, ONE, ONE, VEC, 2, -SGN*B( L1, L1 ), + $ ZERO, X, 2, SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 150 J = 1, N + CALL SSCAL( M, SCALOC, C( 1, J ), 1 ) + 150 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K2, L1 ) = X( 2, 1 ) +* + ELSE IF( L1.NE.L2 .AND. K1.EQ.K2 ) THEN +* + SUML = SDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 ) + SUMR = SDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC, + $ B( L1, MIN( L2+1, N ) ), LDB ) + VEC( 1, 1 ) = SGN*( C( K1, L1 )-( SUML+SGN*SUMR ) ) +* + SUML = SDOT( K1-1, A( 1, K1 ), 1, C( 1, L2 ), 1 ) + SUMR = SDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC, + $ B( L2, MIN( L2+1, N ) ), LDB ) + VEC( 2, 1 ) = SGN*( C( K1, L2 )-( SUML+SGN*SUMR ) ) +* + CALL SLALN2( .FALSE., 2, 1, SMIN, ONE, B( L1, L1 ), + $ LDB, ONE, ONE, VEC, 2, -SGN*A( K1, K1 ), + $ ZERO, X, 2, SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 160 J = 1, N + CALL SSCAL( M, SCALOC, C( 1, J ), 1 ) + 160 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K1, L2 ) = X( 2, 1 ) +* + ELSE IF( L1.NE.L2 .AND. K1.NE.K2 ) THEN +* + SUML = SDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 ) + SUMR = SDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC, + $ B( L1, MIN( L2+1, N ) ), LDB ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = SDOT( K1-1, A( 1, K1 ), 1, C( 1, L2 ), 1 ) + SUMR = SDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC, + $ B( L2, MIN( L2+1, N ) ), LDB ) + VEC( 1, 2 ) = C( K1, L2 ) - ( SUML+SGN*SUMR ) +* + SUML = SDOT( K1-1, A( 1, K2 ), 1, C( 1, L1 ), 1 ) + SUMR = SDOT( N-L2, C( K2, MIN( L2+1, N ) ), LDC, + $ B( L1, MIN( L2+1, N ) ), LDB ) + VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = SDOT( K1-1, A( 1, K2 ), 1, C( 1, L2 ), 1 ) + SUMR = SDOT( N-L2, C( K2, MIN( L2+1, N ) ), LDC, + $ B( L2, MIN(L2+1, N ) ), LDB ) + VEC( 2, 2 ) = C( K2, L2 ) - ( SUML+SGN*SUMR ) +* + CALL SLASY2( .TRUE., .TRUE., ISGN, 2, 2, A( K1, K1 ), + $ LDA, B( L1, L1 ), LDB, VEC, 2, SCALOC, X, + $ 2, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 170 J = 1, N + CALL SSCAL( M, SCALOC, C( 1, J ), 1 ) + 170 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K1, L2 ) = X( 1, 2 ) + C( K2, L1 ) = X( 2, 1 ) + C( K2, L2 ) = X( 2, 2 ) + END IF +* + 180 CONTINUE + 190 CONTINUE +* + ELSE IF( NOTRNA .AND. .NOT.NOTRNB ) THEN +* +* Solve A*X + ISGN*X*B' = scale*C. +* +* The (K,L)th block of X is determined starting from +* bottom-right corner column by column by +* +* A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L)' = C(K,L) - R(K,L) +* +* Where +* M N +* R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)']. +* I=K+1 J=L+1 +* +* Start column loop (index = L) +* L1 (L2): column index of the first (last) row of X(K,L) +* + LNEXT = N + DO 250 L = N, 1, -1 + IF( L.GT.LNEXT ) + $ GO TO 250 + IF( L.EQ.1 ) THEN + L1 = L + L2 = L + ELSE + IF( B( L, L-1 ).NE.ZERO ) THEN + L1 = L - 1 + L2 = L + LNEXT = L - 2 + ELSE + L1 = L + L2 = L + LNEXT = L - 1 + END IF + END IF +* +* Start row loop (index = K) +* K1 (K2): row index of the first (last) row of X(K,L) +* + KNEXT = M + DO 240 K = M, 1, -1 + IF( K.GT.KNEXT ) + $ GO TO 240 + IF( K.EQ.1 ) THEN + K1 = K + K2 = K + ELSE + IF( A( K, K-1 ).NE.ZERO ) THEN + K1 = K - 1 + K2 = K + KNEXT = K - 2 + ELSE + K1 = K + K2 = K + KNEXT = K - 1 + END IF + END IF +* + IF( L1.EQ.L2 .AND. K1.EQ.K2 ) THEN + SUML = SDOT( M-K1, A( K1, MIN(K1+1, M ) ), LDA, + $ C( MIN( K1+1, M ), L1 ), 1 ) + SUMR = SDOT( N-L1, C( K1, MIN( L1+1, N ) ), LDC, + $ B( L1, MIN( L1+1, N ) ), LDB ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) + SCALOC = ONE +* + A11 = A( K1, K1 ) + SGN*B( L1, L1 ) + DA11 = ABS( A11 ) + IF( DA11.LE.SMIN ) THEN + A11 = SMIN + DA11 = SMIN + INFO = 1 + END IF + DB = ABS( VEC( 1, 1 ) ) + IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN + IF( DB.GT.BIGNUM*DA11 ) + $ SCALOC = ONE / DB + END IF + X( 1, 1 ) = ( VEC( 1, 1 )*SCALOC ) / A11 +* + IF( SCALOC.NE.ONE ) THEN + DO 200 J = 1, N + CALL SSCAL( M, SCALOC, C( 1, J ), 1 ) + 200 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) +* + ELSE IF( L1.EQ.L2 .AND. K1.NE.K2 ) THEN +* + SUML = SDOT( M-K2, A( K1, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L1 ), 1 ) + SUMR = SDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC, + $ B( L1, MIN( L2+1, N ) ), LDB ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = SDOT( M-K2, A( K2, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L1 ), 1 ) + SUMR = SDOT( N-L2, C( K2, MIN( L2+1, N ) ), LDC, + $ B( L1, MIN( L2+1, N ) ), LDB ) + VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR ) +* + CALL SLALN2( .FALSE., 2, 1, SMIN, ONE, A( K1, K1 ), + $ LDA, ONE, ONE, VEC, 2, -SGN*B( L1, L1 ), + $ ZERO, X, 2, SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 210 J = 1, N + CALL SSCAL( M, SCALOC, C( 1, J ), 1 ) + 210 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K2, L1 ) = X( 2, 1 ) +* + ELSE IF( L1.NE.L2 .AND. K1.EQ.K2 ) THEN +* + SUML = SDOT( M-K1, A( K1, MIN( K1+1, M ) ), LDA, + $ C( MIN( K1+1, M ), L1 ), 1 ) + SUMR = SDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC, + $ B( L1, MIN( L2+1, N ) ), LDB ) + VEC( 1, 1 ) = SGN*( C( K1, L1 )-( SUML+SGN*SUMR ) ) +* + SUML = SDOT( M-K1, A( K1, MIN( K1+1, M ) ), LDA, + $ C( MIN( K1+1, M ), L2 ), 1 ) + SUMR = SDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC, + $ B( L2, MIN( L2+1, N ) ), LDB ) + VEC( 2, 1 ) = SGN*( C( K1, L2 )-( SUML+SGN*SUMR ) ) +* + CALL SLALN2( .FALSE., 2, 1, SMIN, ONE, B( L1, L1 ), + $ LDB, ONE, ONE, VEC, 2, -SGN*A( K1, K1 ), + $ ZERO, X, 2, SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 220 J = 1, N + CALL SSCAL( M, SCALOC, C( 1, J ), 1 ) + 220 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K1, L2 ) = X( 2, 1 ) +* + ELSE IF( L1.NE.L2 .AND. K1.NE.K2 ) THEN +* + SUML = SDOT( M-K2, A( K1, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L1 ), 1 ) + SUMR = SDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC, + $ B( L1, MIN( L2+1, N ) ), LDB ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = SDOT( M-K2, A( K1, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L2 ), 1 ) + SUMR = SDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC, + $ B( L2, MIN( L2+1, N ) ), LDB ) + VEC( 1, 2 ) = C( K1, L2 ) - ( SUML+SGN*SUMR ) +* + SUML = SDOT( M-K2, A( K2, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L1 ), 1 ) + SUMR = SDOT( N-L2, C( K2, MIN( L2+1, N ) ), LDC, + $ B( L1, MIN( L2+1, N ) ), LDB ) + VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = SDOT( M-K2, A( K2, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L2 ), 1 ) + SUMR = SDOT( N-L2, C( K2, MIN( L2+1, N ) ), LDC, + $ B( L2, MIN( L2+1, N ) ), LDB ) + VEC( 2, 2 ) = C( K2, L2 ) - ( SUML+SGN*SUMR ) +* + CALL SLASY2( .FALSE., .TRUE., ISGN, 2, 2, A( K1, K1 ), + $ LDA, B( L1, L1 ), LDB, VEC, 2, SCALOC, X, + $ 2, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 230 J = 1, N + CALL SSCAL( M, SCALOC, C( 1, J ), 1 ) + 230 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K1, L2 ) = X( 1, 2 ) + C( K2, L1 ) = X( 2, 1 ) + C( K2, L2 ) = X( 2, 2 ) + END IF +* + 240 CONTINUE + 250 CONTINUE +* + END IF +* + RETURN +* +* End of STRSYL +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/strti2.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/strti2.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,146 @@ + SUBROUTINE STRTI2( UPLO, DIAG, N, A, LDA, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER DIAG, UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* STRTI2 computes the inverse of a real upper or lower triangular +* matrix. +* +* This is the Level 2 BLAS version of the algorithm. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the matrix A is upper or lower triangular. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* DIAG (input) CHARACTER*1 +* Specifies whether or not the matrix A is unit triangular. +* = 'N': Non-unit triangular +* = 'U': Unit triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the triangular matrix A. If UPLO = 'U', the +* leading n by n upper triangular part of the array A contains +* the upper triangular matrix, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading n by n lower triangular part of the array A contains +* the lower triangular matrix, and the strictly upper +* triangular part of A is not referenced. If DIAG = 'U', the +* diagonal elements of A are also not referenced and are +* assumed to be 1. +* +* On exit, the (triangular) inverse of the original matrix, in +* the same storage format. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, UPPER + INTEGER J + REAL AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SSCAL, STRMV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOUNIT = LSAME( DIAG, 'N' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STRTI2', -INFO ) + RETURN + END IF +* + IF( UPPER ) THEN +* +* Compute inverse of upper triangular matrix. +* + DO 10 J = 1, N + IF( NOUNIT ) THEN + A( J, J ) = ONE / A( J, J ) + AJJ = -A( J, J ) + ELSE + AJJ = -ONE + END IF +* +* Compute elements 1:j-1 of j-th column. +* + CALL STRMV( 'Upper', 'No transpose', DIAG, J-1, A, LDA, + $ A( 1, J ), 1 ) + CALL SSCAL( J-1, AJJ, A( 1, J ), 1 ) + 10 CONTINUE + ELSE +* +* Compute inverse of lower triangular matrix. +* + DO 20 J = N, 1, -1 + IF( NOUNIT ) THEN + A( J, J ) = ONE / A( J, J ) + AJJ = -A( J, J ) + ELSE + AJJ = -ONE + END IF + IF( J.LT.N ) THEN +* +* Compute elements j+1:n of j-th column. +* + CALL STRMV( 'Lower', 'No transpose', DIAG, N-J, + $ A( J+1, J+1 ), LDA, A( J+1, J ), 1 ) + CALL SSCAL( N-J, AJJ, A( J+1, J ), 1 ) + END IF + 20 CONTINUE + END IF +* + RETURN +* +* End of STRTI2 +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/strtri.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/strtri.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,176 @@ + SUBROUTINE STRTRI( UPLO, DIAG, N, A, LDA, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER DIAG, UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* STRTRI computes the inverse of a real upper or lower triangular +* matrix A. +* +* This is the Level 3 BLAS version of the algorithm. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': A is upper triangular; +* = 'L': A is lower triangular. +* +* DIAG (input) CHARACTER*1 +* = 'N': A is non-unit triangular; +* = 'U': A is unit triangular. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the triangular matrix A. If UPLO = 'U', the +* leading N-by-N upper triangular part of the array A contains +* the upper triangular matrix, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading N-by-N lower triangular part of the array A contains +* the lower triangular matrix, and the strictly upper +* triangular part of A is not referenced. If DIAG = 'U', the +* diagonal elements of A are also not referenced and are +* assumed to be 1. +* On exit, the (triangular) inverse of the original matrix, in +* the same storage format. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, A(i,i) is exactly zero. The triangular +* matrix is singular and its inverse can not be computed. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, UPPER + INTEGER J, JB, NB, NN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL STRMM, STRSM, STRTI2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOUNIT = LSAME( DIAG, 'N' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STRTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check for singularity if non-unit. +* + IF( NOUNIT ) THEN + DO 10 INFO = 1, N + IF( A( INFO, INFO ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + INFO = 0 + END IF +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'STRTRI', UPLO // DIAG, N, -1, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code +* + CALL STRTI2( UPLO, DIAG, N, A, LDA, INFO ) + ELSE +* +* Use blocked code +* + IF( UPPER ) THEN +* +* Compute inverse of upper triangular matrix +* + DO 20 J = 1, N, NB + JB = MIN( NB, N-J+1 ) +* +* Compute rows 1:j-1 of current block column +* + CALL STRMM( 'Left', 'Upper', 'No transpose', DIAG, J-1, + $ JB, ONE, A, LDA, A( 1, J ), LDA ) + CALL STRSM( 'Right', 'Upper', 'No transpose', DIAG, J-1, + $ JB, -ONE, A( J, J ), LDA, A( 1, J ), LDA ) +* +* Compute inverse of current diagonal block +* + CALL STRTI2( 'Upper', DIAG, JB, A( J, J ), LDA, INFO ) + 20 CONTINUE + ELSE +* +* Compute inverse of lower triangular matrix +* + NN = ( ( N-1 ) / NB )*NB + 1 + DO 30 J = NN, 1, -NB + JB = MIN( NB, N-J+1 ) + IF( J+JB.LE.N ) THEN +* +* Compute rows j+jb:n of current block column +* + CALL STRMM( 'Left', 'Lower', 'No transpose', DIAG, + $ N-J-JB+1, JB, ONE, A( J+JB, J+JB ), LDA, + $ A( J+JB, J ), LDA ) + CALL STRSM( 'Right', 'Lower', 'No transpose', DIAG, + $ N-J-JB+1, JB, -ONE, A( J, J ), LDA, + $ A( J+JB, J ), LDA ) + END IF +* +* Compute inverse of current diagonal block +* + CALL STRTI2( 'Lower', DIAG, JB, A( J, J ), LDA, INFO ) + 30 CONTINUE + END IF + END IF +* + RETURN +* +* End of STRTRI +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/strtrs.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/strtrs.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,147 @@ + SUBROUTINE STRTRS( UPLO, TRANS, DIAG, N, NRHS, A, LDA, B, LDB, + $ INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER DIAG, TRANS, UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* STRTRS solves a triangular system of the form +* +* A * X = B or A**T * X = B, +* +* where A is a triangular matrix of order N, and B is an N-by-NRHS +* matrix. A check is made to verify that A is nonsingular. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': A is upper triangular; +* = 'L': A is lower triangular. +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose = Transpose) +* +* DIAG (input) CHARACTER*1 +* = 'N': A is non-unit triangular; +* = 'U': A is unit triangular. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* A (input) REAL array, dimension (LDA,N) +* The triangular matrix A. If UPLO = 'U', the leading N-by-N +* upper triangular part of the array A contains the upper +* triangular matrix, and the strictly lower triangular part of +* A is not referenced. If UPLO = 'L', the leading N-by-N lower +* triangular part of the array A contains the lower triangular +* matrix, and the strictly upper triangular part of A is not +* referenced. If DIAG = 'U', the diagonal elements of A are +* also not referenced and are assumed to be 1. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, if INFO = 0, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the i-th diagonal element of A is zero, +* indicating that the matrix is singular and the solutions +* X have not been computed. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL STRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOUNIT = LSAME( DIAG, 'N' ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT. + $ LSAME( TRANS, 'T' ) .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STRTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check for singularity. +* + IF( NOUNIT ) THEN + DO 10 INFO = 1, N + IF( A( INFO, INFO ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + END IF + INFO = 0 +* +* Solve A * x = b or A' * x = b. +* + CALL STRSM( 'Left', UPLO, TRANS, DIAG, N, NRHS, ONE, A, LDA, B, + $ LDB ) +* + RETURN +* +* End of STRTRS +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/lapack/stzrzf.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/lapack/stzrzf.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,244 @@ + SUBROUTINE STZRZF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.1) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* STZRZF reduces the M-by-N ( M<=N ) real upper trapezoidal matrix A +* to upper triangular form by means of orthogonal transformations. +* +* The upper trapezoidal matrix A is factored as +* +* A = ( R 0 ) * Z, +* +* where Z is an N-by-N orthogonal matrix and R is an M-by-M upper +* triangular matrix. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= M. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the leading M-by-N upper trapezoidal part of the +* array A must contain the matrix to be factorized. +* On exit, the leading M-by-M upper triangular part of A +* contains the upper triangular matrix R, and elements M+1 to +* N of the first M rows of A, with the array TAU, represent the +* orthogonal matrix Z as a product of M elementary reflectors. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) REAL array, dimension (M) +* The scalar factors of the elementary reflectors. +* +* WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,M). +* For optimum performance LWORK >= M*NB, where NB is +* the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* Based on contributions by +* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +* The factorization is obtained by Householder's method. The kth +* transformation matrix, Z( k ), which is used to introduce zeros into +* the ( m - k + 1 )th row of A, is given in the form +* +* Z( k ) = ( I 0 ), +* ( 0 T( k ) ) +* +* where +* +* T( k ) = I - tau*u( k )*u( k )', u( k ) = ( 1 ), +* ( 0 ) +* ( z( k ) ) +* +* tau is a scalar and z( k ) is an ( n - m ) element vector. +* tau and z( k ) are chosen to annihilate the elements of the kth row +* of X. +* +* The scalar tau is returned in the kth element of TAU and the vector +* u( k ) in the kth row of A, such that the elements of z( k ) are +* in a( k, m + 1 ), ..., a( k, n ). The elements of R are returned in +* the upper triangular part of A. +* +* Z is given by +* +* Z = Z( 1 ) * Z( 2 ) * ... * Z( m ). +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IWS, KI, KK, LDWORK, LWKOPT, M1, MU, NB, + $ NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL SLARZB, SLARZT, SLATRZ, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( M.EQ.0 .OR. M.EQ.N ) THEN + LWKOPT = 1 + ELSE +* +* Determine the block size. +* + NB = ILAENV( 1, 'SGERQF', ' ', M, N, -1, -1 ) + LWKOPT = M*NB + END IF + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STZRZF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 ) THEN + RETURN + ELSE IF( M.EQ.N ) THEN + DO 10 I = 1, N + TAU( I ) = ZERO + 10 CONTINUE + RETURN + END IF +* + NBMIN = 2 + NX = 1 + IWS = M + IF( NB.GT.1 .AND. NB.LT.M ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'SGERQF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.M ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = M + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'SGERQF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.M .AND. NX.LT.M ) THEN +* +* Use blocked code initially. +* The last kk rows are handled by the block method. +* + M1 = MIN( M+1, N ) + KI = ( ( M-NX-1 ) / NB )*NB + KK = MIN( M, KI+NB ) +* + DO 20 I = M - KK + KI + 1, M - KK + 1, -NB + IB = MIN( M-I+1, NB ) +* +* Compute the TZ factorization of the current block +* A(i:i+ib-1,i:n) +* + CALL SLATRZ( IB, N-I+1, N-M, A( I, I ), LDA, TAU( I ), + $ WORK ) + IF( I.GT.1 ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL SLARZT( 'Backward', 'Rowwise', N-M, IB, A( I, M1 ), + $ LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(1:i-1,i:n) from the right +* + CALL SLARZB( 'Right', 'No transpose', 'Backward', + $ 'Rowwise', I-1, N-I+1, IB, N-M, A( I, M1 ), + $ LDA, WORK, LDWORK, A( 1, I ), LDA, + $ WORK( IB+1 ), LDWORK ) + END IF + 20 CONTINUE + MU = I + NB - 1 + ELSE + MU = M + END IF +* +* Use unblocked code to factor the last or only block +* + IF( MU.GT.0 ) + $ CALL SLATRZ( MU, N, N-M, A, LDA, TAU, WORK ) +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of STZRZF +* + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/misc/Makefile.in --- a/libcruft/misc/Makefile.in Wed May 14 18:09:56 2008 +0200 +++ b/libcruft/misc/Makefile.in Sun Apr 27 22:34:17 2008 +0200 @@ -27,13 +27,15 @@ EXTERNAL_DISTFILES = $(DISTFILES) -FSRC = d1mach.f i1mach.f +FSRC = d1mach.f r1mach.f i1mach.f CSRC = machar.c f77-fcn.c lo-error.c cquit.c CXXSRC = f77-extern.cc quit.cc -MAKEDEPS := $(patsubst %.c, %.d, $(CSRC)) $(patsubst %.cc, %.d, $(CXXSRC)) +CEXTRA = smachar.c + +MAKEDEPS := $(patsubst %.c, %.d, $(CSRC) $(CEXTRA)) $(patsubst %.cc, %.d, $(CXXSRC)) INCLUDES := f77-fcn.h lo-error.h oct-dlldefs.h quit.h @@ -66,9 +68,15 @@ machar.o: $(srcdir)/machar.c $(XCC) -c $(CPPFLAGS) $(XALL_CFLAGS) -DDP $< -o $@ +smachar.o: $(srcdir)/machar.c + $(XCC) -c $(CPPFLAGS) $(XALL_CFLAGS) -DSP $< -o $@ + pic/machar.o: $(srcdir)/machar.c $(XCC) -c $(CPPFLAGS) $(CPICFLAG) $(XALL_CFLAGS) -DDP $< -o $@ +pic/smachar.o: $(srcdir)/machar.c + $(XCC) -c $(CPPFLAGS) $(CPICFLAG) $(XALL_CFLAGS) -DSP $< -o $@ + ifdef omit_deps .PHONY: $(MAKEDEPS) endif diff -r 45f5faba05a2 -r 82be108cc558 libcruft/misc/machar.c --- a/libcruft/misc/machar.c Wed May 14 18:09:56 2008 +0200 +++ b/libcruft/misc/machar.c Sun Apr 27 22:34:17 2008 +0200 @@ -37,7 +37,7 @@ #define ABS(xxx) ((xxx>ZERO)?(xxx):(-xxx)) -void +static void rmachar(int *ibeta, int *it, int *irnd, int *ngrd, int *machep, int *negep, int *iexp, int *minexp, int *maxexp, REAL *eps, REAL *epsneg, REAL *xmin, REAL *xmax) @@ -368,10 +368,18 @@ #ifndef TEST +#ifdef SP +F77_RET_T +F77_FUNC (smachar, SMACHAR) (REAL *xmin, REAL *xmax, REAL *epsneg, + REAL *eps, REAL *log10_ibeta) +{ +#else F77_RET_T F77_FUNC (machar, MACHAR) (REAL *xmin, REAL *xmax, REAL *epsneg, REAL *eps, REAL *log10_ibeta) { +#endif + #if defined (_CRAY) // FIXME -- make machar work for the Cray too. diff -r 45f5faba05a2 -r 82be108cc558 libcruft/misc/r1mach.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/misc/r1mach.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,18 @@ + real function r1mach (i) + integer i + logical init + real rmach(5) + save init, rmach + data init /.false./ + if (.not. init) then + call smachar (rmach(1), rmach(2), rmach(3), rmach(4), rmach(5)) + init = .true. + endif + if (i .lt. 1 .or. i .gt. 5) goto 999 + r1mach = rmach(i) + return + 999 write(*,1999) i + 1999 format(' s1mach - i out of bounds', i10) + call xstopx (' ') + r1mach = 0 + end diff -r 45f5faba05a2 -r 82be108cc558 libcruft/qrupdate/Makefile.in --- a/libcruft/qrupdate/Makefile.in Wed May 14 18:09:56 2008 +0200 +++ b/libcruft/qrupdate/Makefile.in Sun Apr 27 22:34:17 2008 +0200 @@ -38,7 +38,20 @@ dchdex.f zchdex.f \ dqrqhu.f zqrqhu.f \ dqrqhv.f zqrqhv.f \ - dqhqr.f zqhqr.f + dqhqr.f zqhqr.f \ + sch1up.f cch1up.f \ + sqrinc.f cqrinc.f \ + sqrdec.f cqrdec.f \ + sqrinr.f cqrinr.f \ + sqrder.f cqrder.f \ + sqrshc.f cqrshc.f \ + sqr1up.f cqr1up.f \ + sch1dn.f cch1dn.f \ + schinx.f cchinx.f \ + schdex.f cchdex.f \ + sqrqhu.f cqrqhu.f \ + sqrqhv.f cqrqhv.f \ + sqhqr.f cqhqr.f include $(TOPDIR)/Makeconf diff -r 45f5faba05a2 -r 82be108cc558 libcruft/qrupdate/cch1dn.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/qrupdate/cch1dn.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,81 @@ +c Copyright (C) 2008 VZLU Prague, a.s., Czech Republic +c +c Author: Jaroslav Hajek +c +c This source is free software; you can redistribute it and/or modify +c it under the terms of the GNU General Public License as published by +c the Free Software Foundation; either version 2 of the License, or +c (at your option) any later version. +c +c This program is distributed in the hope that it will be useful, +c but WITHOUT ANY WARRANTY; without even the implied warranty of +c MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +c GNU General Public License for more details. +c +c You should have received a copy of the GNU General Public License +c along with this software; see the file COPYING. If not, see +c . +c + subroutine cch1dn(n,R,u,w,info) +c purpose: given an upper triangular matrix R that is a Cholesky +c factor of a hermitian positive definite matrix A, i.e. +c A = R'*R, this subroutine downdates R -> R1 so that +c R1'*R1 = A - u*u' +c (complex version) +c arguments: +c n (in) the order of matrix R +c R (io) on entry, the upper triangular matrix R +c on exit, the updated matrix R1 +c u (io) the vector determining the rank-1 update +c on exit, u is destroyed. +c w (w) a workspace vector of size n +c +c NOTE: the workspace vector is used to store the rotations +c so that R does not need to be traversed by rows. +c + integer n,info + complex R(n,n),u(n) + real w(n) + external ctrsv,clartg,scnrm2 + real rho,scnrm2 + complex crho,rr,ui,t + integer i,j + +c quick return if possible + if (n <= 0) return +c check for singularity of R + do i = 1,n + if (R(i,i) == 0e0) then + info = 2 + return + end if + end do +c form R' \ u + call ctrsv('U','C','N',n,R,n,u,1) + rho = scnrm2(n,u,1) +c check positive definiteness + rho = 1 - rho**2 + if (rho <= 0e0) then + info = 1 + return + end if + crho = sqrt(rho) +c eliminate R' \ u + do i = n,1,-1 + ui = u(i) +c generate next rotation + call clartg(crho,ui,w(i),u(i),rr) + crho = rr + end do +c apply rotations + do i = n,1,-1 + ui = 0e0 + do j = i,1,-1 + t = w(j)*ui + u(j)*R(j,i) + R(j,i) = w(j)*R(j,i) - conjg(u(j))*ui + ui = t + end do + end do + + info = 0 + end diff -r 45f5faba05a2 -r 82be108cc558 libcruft/qrupdate/cch1up.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/qrupdate/cch1up.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,56 @@ +c Copyright (C) 2008 VZLU Prague, a.s., Czech Republic +c +c Author: Jaroslav Hajek +c +c This source is free software; you can redistribute it and/or modify +c it under the terms of the GNU General Public License as published by +c the Free Software Foundation; either version 2 of the License, or +c (at your option) any later version. +c +c This program is distributed in the hope that it will be useful, +c but WITHOUT ANY WARRANTY; without even the implied warranty of +c MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +c GNU General Public License for more details. +c +c You should have received a copy of the GNU General Public License +c along with this software; see the file COPYING. If not, see +c . +c + subroutine cch1up(n,R,u,w) +c purpose: given an upper triangular matrix R that is a Cholesky +c factor of a hermitian positive definite matrix A, i.e. +c A = R'*R, this subroutine updates R -> R1 so that +c R1'*R1 = A + u*u' or A - u*u' +c (complex version) +c arguments: +c n (in) the order of matrix R +c R (io) on entry, the upper triangular matrix R +c on exit, the updated matrix R1 +c u (io) the vector determining the rank-1 update +c on exit, u is destroyed. +c w (w) a real workspace vector of size n +c +c NOTE: the workspace vector is used to store the rotations +c so that R does not need to be traversed by rows. +c + integer n + complex R(n,n),u(n) + real w(n) + external clartg + complex rr,ui,t + integer i,j + + do i = 1,n +c apply stored rotations, column-wise + ui = conjg(u(i)) + do j = 1,i-1 + t = w(j)*R(j,i) + u(j)*ui + ui = w(j)*ui - conjg(u(j))*R(j,i) + R(j,i) = t + end do +c generate next rotation + call clartg(R(i,i),ui,w(i),u(i),rr) + R(i,i) = rr + end do + end + diff -r 45f5faba05a2 -r 82be108cc558 libcruft/qrupdate/cchdex.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/qrupdate/cchdex.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,62 @@ +c Copyright (C) 2008 VZLU Prague, a.s., Czech Republic +c +c Author: Jaroslav Hajek +c +c This source is free software; you can redistribute it and/or modify +c it under the terms of the GNU General Public License as published by +c the Free Software Foundation; either version 2 of the License, or +c (at your option) any later version. +c +c This program is distributed in the hope that it will be useful, +c but WITHOUT ANY WARRANTY; without even the implied warranty of +c MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +c GNU General Public License for more details. +c +c You should have received a copy of the GNU General Public License +c along with this software; see the file COPYING. If not, see +c . +c + + subroutine cchdex(n,R,R1,j) +c purpose: given an upper triangular matrix R that is a Cholesky +c factor of a symmetric positive definite matrix A, i.e. +c A = R'*R, this subroutine updates R -> R1 so that +c R1'*R1 = A(jj,jj), where jj = [1:j-1,j+1:n+1]. +c (complex version) +c arguments: +c n (in) the order of matrix R +c R (in) the original upper trapezoidal matrix R +c R1 (out) the updated matrix R1 +c j (in) the position of the deleted row/column + integer n,j,info + complex R(n,n),R1(n-1,n-1) + real c + complex Qdum,s,rr + external xerbla,clacpy,cqhqr,clartg + +c quick return if possible + if (n == 1) return + +c check arguments + info = 0 + if (n <= 0) then + info = 1 + else if (j < 1 .or. j > n) then + info = 4 + end if + if (info /= 0) then + call xerbla('CCHDEX',info) + end if + +c setup the new matrix R1 + if (j > 1) then + call clacpy('0',n-1,j-1,R(1,1),n,R1(1,1),n-1) + end if + if (j < n) then + call clacpy('0',n-1,n-j,R(1,j+1),n,R1(1,j),n-1) + call cqhqr(0,n-j,n-j,Qdum,1,R1(j,j),n-1) +c eliminate R(n,n) + call clartg(R1(n-1,n-1),R(n,n),c,s,rr) + R1(n-1,n-1) = rr + endif + end diff -r 45f5faba05a2 -r 82be108cc558 libcruft/qrupdate/cchinx.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/qrupdate/cchinx.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,109 @@ +c Copyright (C) 2008 VZLU Prague, a.s., Czech Republic +c +c Author: Jaroslav Hajek +c +c This source is free software; you can redistribute it and/or modify +c it under the terms of the GNU General Public License as published by +c the Free Software Foundation; either version 2 of the License, or +c (at your option) any later version. +c +c This program is distributed in the hope that it will be useful, +c but WITHOUT ANY WARRANTY; without even the implied warranty of +c MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +c GNU General Public License for more details. +c +c You should have received a copy of the GNU General Public License +c along with this software; see the file COPYING. If not, see +c . +c + + subroutine cchinx(n,R,R1,j,u,info) +c purpose: given an upper triangular matrix R that is a Cholesky +c factor of a symmetric positive definite matrix A, i.e. +c A = R'*R, this subroutine updates R -> R1 so that +c R1'*R1 = A1, A1(jj,jj) = A, A(j,:) = u', A(:,j) = u, +c jj = [1:j-1,j+1:n+1]. +c (complex version) +c arguments: +c n (in) the order of matrix R +c R (in) the original upper trapezoidal matrix R +c R1 (out) the updated matrix R1 +c j (in) the position of the inserted row/column +c u (in) the vector (n+1) determining the rank-1 update +c info (out) on exit, if info = 1, the +c definiteness. + + integer n,j,info + complex R(n,n),R1(n+1,n+1),u(n+1) + real rho,scnrm2 + complex Qdum,w + external xerbla,ccopy,clacpy,ctrsv,scnrm2,cqrqhu + integer jj + +c quick return if possible + if (n == 0) then + if (real(u(1)) <= 0) then + info = 1 + return + else + R(1,1) = sqrt(real(u(1))) + end if + end if + +c check arguments + info = 0 + if (n < 0) then + info = 1 + else if (j < 1 .or. j > n+1) then + info = 4 + end if + if (info /= 0) then + call xerbla('CCHINX',info) + end if + +c copy shifted vector + if (j > 1) then + call ccopy(j-1,u,1,R1(1,j),1) + end if + w = u(j) + if (j < n+1) then + call ccopy(n-j+1,u(j+1),1,R1(j,j),1) + end if + +c check for singularity of R + do i = 1,n + if (R(i,i) == 0e0) then + info = 2 + return + end if + end do +c form R' \ u + call ctrsv('U','T','N',n,R,n,R1(1,j),1) + rho = scnrm2(n,R1(1,j),1) +c check positive definiteness + rho = u(j) - rho**2 + if (rho <= 0e0) then + info = 1 + return + end if + R1(n+1,n+1) = sqrt(rho) + +c setup the new matrix R1 + do i = 1,n+1 + R1(n+1,i) = 0e0 + end do + if (j > 1) then + call clacpy('0',j-1,n,R(1,1),n,R1(1,1),n+1) + end if + if (j <= n) then + call clacpy('0',n,n-j+1,R(1,j),n,R1(1,j+1),n+1) +c retriangularize + jj = min(j+1,n) + call cqrqhu(0,n+1-j,n-j,Qdum,1,R1(j,jj),n+1,R1(j,j),w) + R1(j,j) = w + do jj = j+1,n + R1(jj,j) = 0e0 + end do + end if + + end diff -r 45f5faba05a2 -r 82be108cc558 libcruft/qrupdate/cqhqr.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/qrupdate/cqhqr.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,69 @@ +c Copyright (C) 2008 VZLU Prague, a.s., Czech Republic +c +c Author: Jaroslav Hajek +c +c This source is free software; you can redistribute it and/or modify +c it under the terms of the GNU General Public License as published by +c the Free Software Foundation; either version 2 of the License, or +c (at your option) any later version. +c +c This program is distributed in the hope that it will be useful, +c but WITHOUT ANY WARRANTY; without even the implied warranty of +c MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +c GNU General Public License for more details. +c +c You should have received a copy of the GNU General Public License +c along with this software; see the file COPYING. If not, see +c . +c + subroutine cqhqr(m,n,k,Q,ldq,R,ldr) +c purpose: given an k-by-n upper Hessenberg matrix R and +c an m-by-k matrix Q, this subroutine updates +c R -> R1 and Q -> Q1 so that R1 is upper +c trapezoidal, R1 = G*R and Q1 = Q*G', where +c G is an unitary matrix, giving Q1*R1 = Q*R. +c (complex version) +c arguments: +c m (in) number of rows of the matrix Q +c n (in) number of columns of the matrix R +c k (in) number of columns of Q and rows of R. +c Q (io) on entry, the unitary matrix Q +c on exit, the updated matrix Q1 +c ldq (in) leading dimension of Q +c R (io) on entry, the upper triangular matrix R +c on exit, the updated upper Hessenberg matrix R1 +c ldr (in) leading dimension of R +c + integer m,n,k,ldq,ldr + complex Q(ldq,*),R(ldr,*) + real c + complex s,rr + external xerbla,clartg,crot + integer info,i +c quick return if possible. + if (n <= 0 .or. k <= 1) return +c check arguments. + info = 0 + if (ldq < 1) then + info = 5 + else if (ldr < 1) then + info = 7 + end if + if (info /= 0) then + call xerbla('CQHQR',info) + end if +c triangularize + do i = 1,min(k-1,n) + call clartg(R(i,i),R(i+1,i),c,s,rr) + R(i,i) = rr + R(i+1,i) = 0e0 + if (i < n) then + call crot(n-i,R(i,i+1),ldr,R(i+1,i+1),ldr,c,s) + end if +c apply rotation to Q + if (m > 0) then + call crot(m,Q(1,i),1,Q(1,i+1),1,c,conjg(s)) + end if + end do + end + diff -r 45f5faba05a2 -r 82be108cc558 libcruft/qrupdate/cqr1up.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/qrupdate/cqr1up.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,53 @@ +c Copyright (C) 2008 VZLU Prague, a.s., Czech Republic +c +c Author: Jaroslav Hajek +c +c This source is free software; you can redistribute it and/or modify +c it under the terms of the GNU General Public License as published by +c the Free Software Foundation; either version 2 of the License, or +c (at your option) any later version. +c +c This program is distributed in the hope that it will be useful, +c but WITHOUT ANY WARRANTY; without even the implied warranty of +c MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +c GNU General Public License for more details. +c +c You should have received a copy of the GNU General Public License +c along with this software; see the file COPYING. If not, see +c . +c + subroutine cqr1up(m,n,k,Q,R,u,v) +c purpose: updates a QR factorization after rank-1 modification +c i.e., given a m-by-k unitary Q and m-by-n upper +c trapezoidal R, an m-vector u and n-vector v, +c this subroutine updates Q -> Q1 and R -> R1 so that +c Q1*R1 = Q*R + Q*Q'u*v', and Q1 is again unitary +c and R1 upper trapezoidal. +c (complex version) +c arguments: +c m (in) number of rows of the matrix Q. +c n (in) number of columns of the matrix R. +c k (in) number of columns of Q, and rows of R. k <= m. +c Q (io) on entry, the unitary m-by-k matrix Q. +c on exit, the updated matrix Q1. +c R (io) on entry, the upper trapezoidal m-by-n matrix R. +c on exit, the updated matrix R1. +c u (in) the left m-vector. +c v (in) the right n-vector. +c + integer m,n,k + complex Q(m,k),R(k,n),u(m),v(n) + complex w + external cqrqhv,cqhqr + integer i +c quick return if possible + if (m <= 0 .or. n <= 0) return +c eliminate tail of Q'*u + call cqrqhv(m,n,k,Q,m,R,m,u,w) +c update R + do i = 1,n + R(1,i) = R(1,i) + w*conjg(v(i)) + end do +c retriangularize R + call cqhqr(m,n,k,Q,m,R,k) + end diff -r 45f5faba05a2 -r 82be108cc558 libcruft/qrupdate/cqrdec.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/qrupdate/cqrdec.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,66 @@ +c Copyright (C) 2008 VZLU Prague, a.s., Czech Republic +c +c Author: Jaroslav Hajek +c +c This source is free software; you can redistribute it and/or modify +c it under the terms of the GNU General Public License as published by +c the Free Software Foundation; either version 2 of the License, or +c (at your option) any later version. +c +c This program is distributed in the hope that it will be useful, +c but WITHOUT ANY WARRANTY; without even the implied warranty of +c MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +c GNU General Public License for more details. +c +c You should have received a copy of the GNU General Public License +c along with this software; see the file COPYING. If not, see +c . +c + subroutine cqrdec(m,n,k,Q,R,R1,j) +c purpose: updates a QR factorization after deleting +c a column. +c i.e., given an m-by-k unitary matrix Q, an k-by-n +c upper trapezoidal matrix R and index j in the range +c 1:n+1, this subroutine updates the matrix Q -> Q1 and +c forms an m-by-(n-1) matrix R1 so that Q1 remains +c unitary, R1 is upper trapezoidal, and +c Q1*R1 = [A(:,1:j-1) A(:,j+1:n)], where A = Q*R. +c (complex version) +c arguments: +c m (in) number of rows of the matrix Q. +c n (in) number of columns of the matrix R. +c k (in) number of columns of Q, and rows of R. +c Q (io) on entry, the unitary m-by-k matrix Q. +c on exit, the updated matrix Q1. +c R (in) the original upper trapezoidal matrix R. +c R1 (out) the updated matrix R1. +c j (in) the position of the deleted column in R. +c 1 <= j <= n. +c + integer m,n,k,j + complex Q(m,k),R(k,n),R1(k,n-1) + external xerbla,ccopy,cqhqr + integer info +c quick return if possible + if (m <= 0 .or. k <= 0 .or. n == 1) return +c check arguments + info = 0 + if (n < 1) then + info = 2 + else if (j < 1 .or. j > n) then + info = 7 + end if + if (info /= 0) then + call xerbla('CQRDEC',info) + end if +c copy leading portion + call ccopy(k*(j-1),R,1,R1,1) + if (j < n) then +c copy trailing portion of R + call ccopy(k*(n-j),R(1,j+1),1,R1(1,j),1) +c if necessary, retriangularize R1(j:k,j:n-1) and update Q(:,j:k) + if (j < k) then + call cqhqr(m,n-j,k-j+1,Q(1,j),m,R1(j,j),k) + end if + end if + end diff -r 45f5faba05a2 -r 82be108cc558 libcruft/qrupdate/cqrder.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/qrupdate/cqrder.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,93 @@ +c Copyright (C) 2008 VZLU Prague, a.s., Czech Republic +c +c Author: Jaroslav Hajek +c +c This source is free software; you can redistribute it and/or modify +c it under the terms of the GNU General Public License as published by +c the Free Software Foundation; either version 2 of the License, or +c (at your option) any later version. +c +c This program is distributed in the hope that it will be useful, +c but WITHOUT ANY WARRANTY; without even the implied warranty of +c MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +c GNU General Public License for more details. +c +c You should have received a copy of the GNU General Public License +c along with this software; see the file COPYING. If not, see +c . +c + subroutine cqrder(m,n,Q,Q1,R,R1,j) +c purpose: updates a QR factorization after deleting a row. +c i.e., given an m-by-m unitary matrix Q, an m-by-n +c upper trapezoidal matrix R and index j in the range +c 1:m, this subroutine forms the (m-1)-by-(m-1) matrix +c Q1 and an (m-1)-by-n matrix R1 so that Q1 is again +c unitary, R1 upper trapezoidal, and +c Q1*R1 = [A(1:j-1,:); A(j+1:m,:)], where A = Q*R. +c (complex version) +c +c arguments: +c m (in) number of rows of the matrix R. +c n (in) number of columns of the matrix R +c Q (in) the unitary matrix Q +c Q1 (out) the updated matrix Q1 +c R (in) the upper trapezoidal matrix R +c R1 (out) the updated matrix R1 +c j (in) the position of the new row in R1 +c + integer m,n,j + complex Q(m,m),Q1(m-1,m-1),R(m,n),R1(m-1,n) + real c + complex s,rr,w + external xerbla,clacpy,clartg,crot,csscal,caxpy + integer i +c quick return if possible + if (m == 1) return +c check arguments + info = 0 + if (m < 1) then + info = 1 + else if (j < 1 .or. j > n) then + info = 7 + end if + if (info /= 0) then + call xerbla('CQRDER',info) + end if +c setup the new matrix Q1 +c permute the columns of Q and rows of R so that the deleted row ends +c up being the topmost row. + if (j > 1) then + call clacpy('0',j-1,m-1,Q(1,2),m,Q1(1,1),m-1) + end if + if (j < m) then + call clacpy('0',m-j,m-1,Q(j+1,2),m,Q1(j,1),m-1) + end if +c setup the new matrix R1 + call clacpy('0',m-1,n,R(2,1),m,R1(1,1),m-1) +c eliminate Q(j,2:m) + w = Q(j,m) + do i = m-1,2,-1 + call clartg(Q(j,i),w,c,s,rr) + w = rr +c apply rotation to rows of R1 + if (i <= n) then + call crot(n-i+1,R1(i-1,i),m-1,R1(i,i),m-1,c,conjg(s)) + end if +c apply rotation to columns of Q1 + call crot(m-1,Q1(1,i-1),1,Q1(1,i),1,c,s) + end do +c the last iteration is special, as we don't have the first row of +c R and first column of Q + call clartg(Q(j,1),w,c,s,rr) + w = rr + call csscal(n,c,R1(1,1),m-1) + call caxpy(n,-s,R(1,1),m,R1(1,1),m-1) +c apply rotation to columns of Q1 + call csscal(m-1,c,Q1(1,1),1) + if (j > 1) then + call caxpy(j-1,-conjg(s),Q(1,1),1,Q1(1,1),1) + end if + if (j < m) then + call caxpy(m-j,-conjg(s),Q(j+1,1),1,Q1(j,1),1) + end if + end diff -r 45f5faba05a2 -r 82be108cc558 libcruft/qrupdate/cqrinc.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/qrupdate/cqrinc.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,74 @@ +c Copyright (C) 2008 VZLU Prague, a.s., Czech Republic +c +c Author: Jaroslav Hajek +c +c This source is free software; you can redistribute it and/or modify +c it under the terms of the GNU General Public License as published by +c the Free Software Foundation; either version 2 of the License, or +c (at your option) any later version. +c +c This program is distributed in the hope that it will be useful, +c but WITHOUT ANY WARRANTY; without even the implied warranty of +c MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +c GNU General Public License for more details. +c +c You should have received a copy of the GNU General Public License +c along with this software; see the file COPYING. If not, see +c . +c + subroutine cqrinc(m,n,k,Q,R,R1,j,x) +c purpose: updates a QR factorization after inserting a new +c column. +c i.e., given an m-by-k unitary matrix Q, an m-by-n +c upper trapezoidal matrix R and index j in the range +c 1:n+1, this subroutine updates the matrix Q -> Q1 and +c forms an m-by-(n+1) matrix R1 so that Q1 is again unitary, +c R1 upper trapezoidal, and +c Q1*R1 = [A(:,1:j-1); Q*Q'*x; A(:,j:n-1)], where A = Q*R. +c (complex version) +c arguments: +c m (in) number of rows of the matrix Q. +c n (in) number of columns of the matrix R. +c k (in) number of columns of Q, and rows of R. k <= m. +c Q (io) on entry, the unitary matrix Q. +c on exit, the updated matrix Q1 +c R (in) the original upper trapezoidal matrix R +c R1 (out) the updated matrix R1 +c j (in) the position of the new column in R1 +c x (in) the column being inserted +c + integer m,n,k,j + complex Q(m,k),R(k,n),R1(k,n+1),x(m) + complex w + external xerbla,ccopy,cqrqhv,cgemv + integer info,i,jj +c quick return if possible + if (m <= 0) return +c check arguments + info = 0 + if (n < 0) then + info = 2 + else if (j < 1 .or. j > n+1) then + info = 6 + end if + if (info /= 0) then + call xerbla('CQRINC',info) + end if +c copy leading portion of R + call ccopy(k*(j-1),R,1,R1,1) + if (j <= n) then + call ccopy(k*(n+1-j),R(1,j),1,R1(1,j+1),1) + end if + call cgemv('C',m,min(k,j-1),cmplx(1e0),Q,m,x,1, + + cmplx(0e0),R1(1,j),1) + if (j < k) then +c eliminate tail, updating Q(:,j:k) and R1(j:k,j+1:n+1) + jj = min(j,n)+1 + call cqrqhv(m,n+1-j,k-j+1,Q(1,j),m,R1(j,jj),m,x,w) +c assemble inserted column + R1(j,j) = w + do i = j+1,k + R1(i,j) = 0e0 + end do + end if + end diff -r 45f5faba05a2 -r 82be108cc558 libcruft/qrupdate/cqrinr.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/qrupdate/cqrinr.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,73 @@ +c Copyright (C) 2008 VZLU Prague, a.s., Czech Republic +c +c Author: Jaroslav Hajek +c +c This source is free software; you can redistribute it and/or modify +c it under the terms of the GNU General Public License as published by +c the Free Software Foundation; either version 2 of the License, or +c (at your option) any later version. +c +c This program is distributed in the hope that it will be useful, +c but WITHOUT ANY WARRANTY; without even the implied warranty of +c MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +c GNU General Public License for more details. +c +c You should have received a copy of the GNU General Public License +c along with this software; see the file COPYING. If not, see +c . +c + subroutine cqrinr(m,n,Q,Q1,R,R1,j,x) +c purpose: updates a QR factorization after inserting a new +c row. +c i.e., given an m-by-m unitary matrix Q, an m-by-n +c upper trapezoidal matrix R and index j in the range +c 1:m+1, this subroutine forms the (m+1)-by-(m+1) matrix +c Q1 and an (m+1)-by-n matrix R1 so that Q1 is again +c unitary, R1 upper trapezoidal, and +c Q1*R1 = [A(1:j-1,:); x; A(j:m,:)], where A = Q*R. +c (complex version) +c arguments: +c m (in) number of rows of the matrix R. +c n (in) number of columns of the matrix R +c Q (in) the orthogonal matrix Q +c Q1 (out) the updated matrix Q1 +c R (in) the upper trapezoidal matrix R +c R1 (out) the updated matrix R1 +c j (in) the position of the new row in R1 +c x (in) the row being added +c + integer m,n,j + complex Q(m,m),Q1(m+1,m+1),R(m,n),R1(m+1,n),x(n) + external xerbla,clacpy,ccopy,cqhqr + integer i +c check arguments + info = 0 + if (n < 0) then + info = 2 + else if (j < 1 .or. j > m+1) then + info = 7 + end if + if (info /= 0) then + call xerbla('CQRINR',info) + end if +c setup the new matrix Q1 +c permute the columns of Q1 and rows of R1 so that c the new row ends +c up being the topmost row. + if (j > 1) then + call clacpy('0',j-1,m,Q(1,1),m,Q1(1,2),m+1) + end if + if (j <= m) then + call clacpy('0',m-j+1,m,Q(j,1),m,Q1(j+1,2),m+1) + end if +c zero the rest of Q1 + do i = 1,m+1 + Q1(i,1) = 0e0 + Q1(j,i) = 0e0 + end do + Q1(j,1) = 1e0 +c setup the new matrix R1 + call ccopy(n,x,1,R1(1,1),m+1) + call clacpy('0',m,n,R(1,1),m,R1(2,1),m+1) +c rotate to form proper QR + call cqhqr(m+1,n,m+1,Q1,m+1,R1,m+1) + end diff -r 45f5faba05a2 -r 82be108cc558 libcruft/qrupdate/cqrqhu.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/qrupdate/cqrqhu.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,78 @@ +c Copyright (C) 2008 VZLU Prague, a.s., Czech Republic +c +c Author: Jaroslav Hajek +c +c This source is free software; you can redistribute it and/or modify +c it under the terms of the GNU General Public License as published by +c the Free Software Foundation; either version 2 of the License, or +c (at your option) any later version. +c +c This program is distributed in the hope that it will be useful, +c but WITHOUT ANY WARRANTY; without even the implied warranty of +c MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +c GNU General Public License for more details. +c +c You should have received a copy of the GNU General Public License +c along with this software; see the file COPYING. If not, see +c . +c + subroutine cqrqhu(m,n,k,Q,ldq,R,ldr,u,rr) +c purpose: given an m-by-k matrix Q, an upper trapezoidal +c k-by-n matrix R, and a k-vector u, +c this subroutine updates the matrices Q -> Q1 and +c R -> R1 so that Q1 = Q*G', R1 = G*R, u1(2:k) = 0 +c with G unitary, R1 upper Hessenberg, and u1 = G*u. +c (complex version) +c arguments: +c m (in) number of rows of the matrix Q. +c n (in) number of columns of the matrix R. +c k (in) number of columns of Q and rows of R. +c Q (io) on entry, the unitary matrix Q. +c on exit, the updated matrix Q1. +c ldq (in) leading dimension of Q. +c R (io) on entry, the upper triangular matrix R. +c on exit, the updated upper Hessenberg matrix R1. +c ldr (in) leading dimension of R. +c u (in) the k-vector u. +c rr (out) the first element of Q1'*u on exit. +c +c if Q is unitary, so is Q1. It is not strictly +c necessary, however. + integer m,n,k,ldq,ldr + complex Q(ldq,*),R(ldr,*),u(*),rr + real c + complex s,w + external xerbla,clartg,crot + integer i,info +c quick return if possible. + if (k <= 0) return +c check arguments. + info = 0 + if (ldq < 1) then + info = 5 + else if (ldr < 1) then + info = 7 + end if + if (info /= 0) then + call xerbla('CQRQHU',info) + end if + rr = u(k) + do i = k-1,1,-1 + w = rr + if (w /= cmplx(0e0,0e0)) then + call clartg(u(i),w,c,s,rr) +c apply rotation to rows of R if necessary + if (i <= n) then + call crot(n+1-i,R(i,i),ldr,R(i+1,i),ldr,c,s) + end if +c apply rotation to columns of Q if necessary + if (m > 0) then + call crot(m,Q(1,i),1,Q(1,i+1),1,c,conjg(s)) + end if + else +c no rotation necessary + rr = u(i) + end if + end do + end + diff -r 45f5faba05a2 -r 82be108cc558 libcruft/qrupdate/cqrqhv.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/qrupdate/cqrqhv.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,75 @@ +c Copyright (C) 2008 VZLU Prague, a.s., Czech Republic +c +c Author: Jaroslav Hajek +c +c This source is free software; you can redistribute it and/or modify +c it under the terms of the GNU General Public License as published by +c the Free Software Foundation; either version 2 of the License, or +c (at your option) any later version. +c +c This program is distributed in the hope that it will be useful, +c but WITHOUT ANY WARRANTY; without even the implied warranty of +c MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +c GNU General Public License for more details. +c +c You should have received a copy of the GNU General Public License +c along with this software; see the file COPYING. If not, see +c . +c + subroutine cqrqhv(m,n,k,Q,ldq,R,ldr,u,rr) +c purpose: given an m-by-k matrix Q, an upper trapezoidal +c k-by-n matrix R, and an m-vector u, this subroutine +c updates the matrices Q -> Q1 and R -> R1 so that +c Q1 = Q*G', R1 = G*R, w1(2:m) = 0 with G unitary, +c R1 upper Hessenberg, and w1 = Q1'*u. +c (complex version) +c arguments: +c m (in) number of rows of the matrix Q. +c n (in) number of columns of the matrix R. +c k (in) number of columns of Q and rows of R. k <= m. +c Q (io) on entry, the unitary matrix Q. +c on exit, the updated matrix Q1. +c ldq (in) leading dimension of Q. +c R (io) on entry, the upper triangular matrix R. +c on exit, the updated upper Hessenberg matrix R1. +c ldr (in) leading dimension of R. +c u (in) the m-vector u. +c rr (out) the first element of Q1'*u on exit. +c +c if Q is unitary, so is Q1. It is not strictly +c necessary, however. + integer m,n,k,ldq,ldr + complex Q(ldq,*),R(ldr,*),u(*),rr + real c + complex s,w,w1,cdotc + external xerbla,cdotc,clartg,crot + integer i,info +c quick return if possible. + if (k <= 0) return +c check arguments. + info = 0 + if (k > m) then + info = 3 + else if (ldq < 1) then + info = 5 + else if (ldr < 1) then + info = 7 + end if + if (info /= 0) then + call xerbla('CQRQHV',info) + end if +c form each element of w = Q'*u when necessary. + rr = cdotc(m,Q(1,k),1,u,1) + do i = k-1,1,-1 + w1 = rr + w = cdotc(m,Q(1,i),1,u,1) + call clartg(w,w1,c,s,rr) +c apply rotation to rows of R if necessary + if (i <= n) then + call crot(n+1-i,R(i,i),ldr,R(i+1,i),ldr,c,s) + end if +c apply rotation to columns of Q + call crot(m,Q(1,i),1,Q(1,i+1),1,c,conjg(s)) + end do + end + diff -r 45f5faba05a2 -r 82be108cc558 libcruft/qrupdate/cqrshc.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/qrupdate/cqrshc.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,97 @@ +c Copyright (C) 2008 VZLU Prague, a.s., Czech Republic +c +c Author: Jaroslav Hajek +c +c This source is free software; you can redistribute it and/or modify +c it under the terms of the GNU General Public License as published by +c the Free Software Foundation; either version 2 of the License, or +c (at your option) any later version. +c +c This program is distributed in the hope that it will be useful, +c but WITHOUT ANY WARRANTY; without even the implied warranty of +c MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +c GNU General Public License for more details. +c +c You should have received a copy of the GNU General Public License +c along with this software; see the file COPYING. If not, see +c . +c + subroutine cqrshc(m,n,k,Q,R,i,j) +c purpose: updates a QR factorization after circular shift of +c columns. +c i.e., given an m-by-k unitary matrix Q, an k-by-n +c upper trapezoidal matrix R and index j in the range +c 1:n+1, this subroutine updates the matrix Q -> Q1 and +c R -> R1 so that Q1 is again unitary, R1 upper trapezoidal, +c and +c Q1*R1 = A(:,p), where A = Q*R and p is the permutation +c [1:i-1,shift(i:j,-1),j+1:n] if i < j or +c [1:j-1,shift(j:i,+1),i+1:n] if j > i. +c if m == 0, the matrix Q is ignored. +c (complex version) +c arguments: +c m (in) number of rows of the matrix Q, or 0 if Q is not needed. +c n (in) number of columns of the matrix R. +c k (in) number of columns of Q, and rows of R. +c Q (io) on entry, the (unitary) matrix Q. +c on exit, the updated matrix Q1 +c R (io) on entry, the upper trapezoidal m-by-n matrix R. +c on exit, the updated matrix R1. +c i (in) the first index determining the range (see above) +c j (in) the second index determining the range (see above) +c + integer m,n,k,i,j + complex Q(m,k),R(k,n) + external xerbla,cswap,cqhqr,cqrqhu + complex w + integer l,jj,kk,info + +c quick return if possible + if (k <= 0 .or. n <= 1) return + info = 0 + if (m /= 0 .and. k > m) then + info = 3 + else if (i < 1 .or. i > n) then + info = 6 + else if (j < 1 .or. j > n) then + info = 7 + end if + if (info /= 0) then + call xerbla('CQRSHC',info) + end if + + if (i < j) then +c shift columns + do l = i,j-1 + call cswap(min(k,l+1),R(1,l),1,R(1,l+1),1) + end do +c retriangularize + if (i < k) then + kk = min(k,j) + if (m > 0) then + call cqhqr(m,n+1-i,kk+1-i,Q(1,i),m,R(i,i),k) + else + call cqhqr(0,n+1-i,kk+1-i,Q,1,R(i,i),k) + endif + end if + else if (j < i) then +c shift columns + do l = i,j+1,-1 + call cswap(min(k,i),R(1,l),1,R(1,l-1),1) + end do +c retriangularize + if (j < k) then + jj = min(j+1,n) + kk = min(k,i) + if (m > 0) then + call cqrqhu(m,n-j,kk+1-j,Q(1,j),m,R(j,jj),k,R(j,j),w) + else + call cqrqhu(0,n-j,kk+1-j,Q,1,R(j,jj),k,R(j,j),w) + end if + R(j,j) = w + do jj = j+1,kk + R(jj,j) = 0 + end do + end if + end if + end diff -r 45f5faba05a2 -r 82be108cc558 libcruft/qrupdate/sch1dn.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/qrupdate/sch1dn.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,81 @@ +c Copyright (C) 2008 VZLU Prague, a.s., Czech Republic +c +c Author: Jaroslav Hajek +c +c This source is free software; you can redistribute it and/or modify +c it under the terms of the GNU General Public License as published by +c the Free Software Foundation; either version 2 of the License, or +c (at your option) any later version. +c +c This program is distributed in the hope that it will be useful, +c but WITHOUT ANY WARRANTY; without even the implied warranty of +c MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +c GNU General Public License for more details. +c +c You should have received a copy of the GNU General Public License +c along with this software; see the file COPYING. If not, see +c . +c + subroutine sch1dn(n,R,u,w,info) +c purpose: given an upper triangular matrix R that is a Cholesky +c factor of a symmetric positive definite matrix A, i.e. +c A = R'*R, this subroutine downdates R -> R1 so that +c R1'*R1 = A - u*u' +c (real version) +c arguments: +c n (in) the order of matrix R +c R (io) on entry, the upper triangular matrix R +c on exit, the updated matrix R1 +c u (io) the vector determining the rank-1 update +c on exit, u is destroyed. +c w (w) a workspace vector of size n +c +c NOTE: the workspace vector is used to store the rotations +c so that R does not need to be traversed by rows. +c + integer n,info + real R(n,n),u(n) + real w(n) + external strsv,slartg,snrm2 + real rho,snrm2 + real rr,ui,t + integer i,j + +c quick return if possible + if (n <= 0) return +c check for singularity of R + do i = 1,n + if (R(i,i) == 0e0) then + info = 2 + return + end if + end do +c form R' \ u + call strsv('U','T','N',n,R,n,u,1) + rho = snrm2(n,u,1) +c check positive definiteness + rho = 1 - rho**2 + if (rho <= 0e0) then + info = 1 + return + end if + rho = sqrt(rho) +c eliminate R' \ u + do i = n,1,-1 + ui = u(i) +c generate next rotation + call slartg(rho,ui,w(i),u(i),rr) + rho = rr + end do +c apply rotations + do i = n,1,-1 + ui = 0e0 + do j = i,1,-1 + t = w(j)*ui + u(j)*R(j,i) + R(j,i) = w(j)*R(j,i) - u(j)*ui + ui = t + end do + end do + + info = 0 + end diff -r 45f5faba05a2 -r 82be108cc558 libcruft/qrupdate/sch1up.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/qrupdate/sch1up.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,57 @@ +c Copyright (C) 2008 VZLU Prague, a.s., Czech Republic +c +c Author: Jaroslav Hajek +c +c This source is free software; you can redistribute it and/or modify +c it under the terms of the GNU General Public License as published by +c the Free Software Foundation; either version 2 of the License, or +c (at your option) any later version. +c +c This program is distributed in the hope that it will be useful, +c but WITHOUT ANY WARRANTY; without even the implied warranty of +c MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +c GNU General Public License for more details. +c +c You should have received a copy of the GNU General Public License +c along with this software; see the file COPYING. If not, see +c . +c + + subroutine sch1up(n,R,u,w) +c purpose: given an upper triangular matrix R that is a Cholesky +c factor of a symmetric positive definite matrix A, i.e. +c A = R'*R, this subroutine updates R -> R1 so that +c R1'*R1 = A + u*u' +c (real version) +c arguments: +c n (in) the order of matrix R +c R (io) on entry, the upper triangular matrix R +c on exit, the updated matrix R1 +c u (io) the vector determining the rank-1 update +c on exit, u is destroyed. +c w (w) a workspace vector of size n +c +c NOTE: the workspace vector is used to store the rotations +c so that R does not need to be traversed by rows. +c + integer n + real R(n,n),u(n) + real w(n) + external slartg + real rr,ui,t + integer i,j + + do i = 1,n +c apply stored rotations, column-wise + ui = u(i) + do j = 1,i-1 + t = w(j)*R(j,i) + u(j)*ui + ui = w(j)*ui - u(j)*R(j,i) + R(j,i) = t + end do +c generate next rotation + call slartg(R(i,i),ui,w(i),u(i),rr) + R(i,i) = rr + end do + end + diff -r 45f5faba05a2 -r 82be108cc558 libcruft/qrupdate/schdex.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/qrupdate/schdex.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,61 @@ +c Copyright (C) 2008 VZLU Prague, a.s., Czech Republic +c +c Author: Jaroslav Hajek +c +c This source is free software; you can redistribute it and/or modify +c it under the terms of the GNU General Public License as published by +c the Free Software Foundation; either version 2 of the License, or +c (at your option) any later version. +c +c This program is distributed in the hope that it will be useful, +c but WITHOUT ANY WARRANTY; without even the implied warranty of +c MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +c GNU General Public License for more details. +c +c You should have received a copy of the GNU General Public License +c along with this software; see the file COPYING. If not, see +c . +c + + subroutine schdex(n,R,R1,j) +c purpose: given an upper triangular matrix R that is a Cholesky +c factor of a symmetric positive definite matrix A, i.e. +c A = R'*R, this subroutine updates R -> R1 so that +c R1'*R1 = A(jj,jj), where jj = [1:j-1,j+1:n+1]. +c (real version) +c arguments: +c n (in) the order of matrix R +c R (in) the original upper trapezoidal matrix R +c R1 (out) the updated matrix R1 +c j (in) the position of the deleted row/column + integer n,j,info + real R(n,n),R1(n-1,n-1) + real Qdum,c,s,rr + external xerbla,slacpy,sqhqr,slartg + +c quick return if possible + if (n == 1) return + +c check arguments + info = 0 + if (n <= 0) then + info = 1 + else if (j < 1 .or. j > n) then + info = 4 + end if + if (info /= 0) then + call xerbla('SQRDEX',info) + end if + +c setup the new matrix R1 + if (j > 1) then + call slacpy('0',n-1,j-1,R(1,1),n,R1(1,1),n-1) + end if + if (j < n) then + call slacpy('0',n-1,n-j,R(1,j+1),n,R1(1,j),n-1) + call sqhqr(0,n-j,n-j,Qdum,1,R1(j,j),n-1) +c eliminate R(n,n) + call slartg(R1(n-1,n-1),R(n,n),c,s,rr) + R1(n-1,n-1) = rr + endif + end diff -r 45f5faba05a2 -r 82be108cc558 libcruft/qrupdate/schinx.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/qrupdate/schinx.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,108 @@ +c Copyright (C) 2008 VZLU Prague, a.s., Czech Republic +c +c Author: Jaroslav Hajek +c +c This source is free software; you can redistribute it and/or modify +c it under the terms of the GNU General Public License as published by +c the Free Software Foundation; either version 2 of the License, or +c (at your option) any later version. +c +c This program is distributed in the hope that it will be useful, +c but WITHOUT ANY WARRANTY; without even the implied warranty of +c MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +c GNU General Public License for more details. +c +c You should have received a copy of the GNU General Public License +c along with this software; see the file COPYING. If not, see +c . +c + + subroutine schinx(n,R,R1,j,u,info) +c purpose: given an upper triangular matrix R that is a Cholesky +c factor of a symmetric positive definite matrix A, i.e. +c A = R'*R, this subroutine updates R -> R1 so that +c R1'*R1 = A1, A1(jj,jj) = A, A(j,:) = u', A(:,j) = u, +c jj = [1:j-1,j+1:n+1]. +c (real version) +c arguments: +c n (in) the order of matrix R +c R (in) the original upper trapezoidal matrix R +c R1 (out) the updated matrix R1 +c j (in) the position of the inserted row/column +c u (in) the vector (n+1) determining the rank-1 update +c info (out) on exit, if info = 1, the +c definiteness. + + integer n,j,info + real R(n,n),R1(n+1,n+1),u(n+1) + real rho,Qdum,w,snrm2 + external xerbla,scopy,slacpy,strsv,snrm2,sqrqhu + integer jj + +c quick return if possible + if (n == 0) then + if (u(1) <= 0) then + info = 1 + return + else + R(1,1) = sqrt(u(1)) + end if + end if + +c check arguments + info = 0 + if (n < 0) then + info = 1 + else if (j < 1 .or. j > n+1) then + info = 4 + end if + if (info /= 0) then + call xerbla('SCHINX',info) + end if + +c copy shifted vector + if (j > 1) then + call scopy(j-1,u,1,R1(1,j),1) + end if + w = u(j) + if (j < n+1) then + call scopy(n-j+1,u(j+1),1,R1(j,j),1) + end if + +c check for singularity of R + do i = 1,n + if (R(i,i) == 0e0) then + info = 2 + return + end if + end do +c form R' \ u + call strsv('U','T','N',n,R,n,R1(1,j),1) + rho = snrm2(n,R1(1,j),1) +c check positive definiteness + rho = u(j) - rho**2 + if (rho <= 0e0) then + info = 1 + return + end if + R1(n+1,n+1) = sqrt(rho) + +c setup the new matrix R1 + do i = 1,n+1 + R1(n+1,i) = 0e0 + end do + if (j > 1) then + call slacpy('0',n,j-1,R(1,1),n,R1(1,1),n+1) + end if + if (j <= n) then + call slacpy('0',n,n-j+1,R(1,j),n,R1(1,j+1),n+1) +c retriangularize + jj = min(j+1,n) + call sqrqhu(0,n+1-j,n-j,Qdum,1,R1(j,jj),n+1,R1(j,j),w) + R1(j,j) = w + do jj = j+1,n + R1(jj,j) = 0e0 + end do + end if + + end diff -r 45f5faba05a2 -r 82be108cc558 libcruft/qrupdate/sqhqr.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/qrupdate/sqhqr.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,69 @@ +c Copyright (C) 2008 VZLU Prague, a.s., Czech Republic +c +c Author: Jaroslav Hajek +c +c This source is free software; you can redistribute it and/or modify +c it under the terms of the GNU General Public License as published by +c the Free Software Foundation; either version 2 of the License, or +c (at your option) any later version. +c +c This program is distributed in the hope that it will be useful, +c but WITHOUT ANY WARRANTY; without even the implied warranty of +c MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +c GNU General Public License for more details. +c +c You should have received a copy of the GNU General Public License +c along with this software; see the file COPYING. If not, see +c . +c + subroutine sqhqr(m,n,k,Q,ldq,R,ldr) +c purpose: given an k-by-n upper Hessenberg matrix R and +c an m-by-k matrix Q, this subroutine updates +c R -> R1 and Q -> Q1 so that R1 is upper +c trapezoidal, R1 = G*R and Q1 = Q*G', where +c G is an orthogonal matrix, giving Q1*R1 = Q*R. +c (real version) +c arguments: +c m (in) number of rows of the matrix Q +c n (in) number of columns of the matrix R +c k (in) number of columns of Q and rows of R. +c Q (io) on entry, the orthogonal matrix Q +c on exit, the updated matrix Q1 +c ldq (in) leading dimension of Q +c R (io) on entry, the upper triangular matrix R +c on exit, the updated upper Hessenberg matrix R1 +c ldr (in) leading dimension of R +c + integer m,n,k,ldq,ldr + real Q(ldq,*),R(ldr,*) + real c + real s,rr + external xerbla,slartg,srot + integer info,i +c quick return if possible. + if (n <= 0 .or. k <= 1) return +c check arguments. + info = 0 + if (ldq < 1) then + info = 5 + else if (ldr < 1) then + info = 7 + end if + if (info /= 0) then + call xerbla('SQHQR',info) + end if +c triangularize + do i = 1,min(k-1,n) + call slartg(R(i,i),R(i+1,i),c,s,rr) + R(i,i) = rr + R(i+1,i) = 0e0 + if (i < n) then + call srot(n-i,R(i,i+1),ldr,R(i+1,i+1),ldr,c,s) + end if +c apply rotation to Q + if (m > 0) then + call srot(m,Q(1,i),1,Q(1,i+1),1,c,s) + end if + end do + end + diff -r 45f5faba05a2 -r 82be108cc558 libcruft/qrupdate/sqr1up.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/qrupdate/sqr1up.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,52 @@ +c Copyright (C) 2008 VZLU Prague, a.s., Czech Republic +c +c Author: Jaroslav Hajek +c +c This source is free software; you can redistribute it and/or modify +c it under the terms of the GNU General Public License as published by +c the Free Software Foundation; either version 2 of the License, or +c (at your option) any later version. +c +c This program is distributed in the hope that it will be useful, +c but WITHOUT ANY WARRANTY; without even the implied warranty of +c MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +c GNU General Public License for more details. +c +c You should have received a copy of the GNU General Public License +c along with this software; see the file COPYING. If not, see +c . +c + subroutine sqr1up(m,n,k,Q,R,u,v) +c purpose: updates a QR factorization after rank-1 modification +c i.e., given a m-by-k orthogonal Q and m-by-n upper +c trapezoidal R, an m-vector u and n-vector v, +c this subroutine updates Q -> Q1 and R -> R1 so that +c Q1*R1 = Q*R + Q*Q'u*v', and Q1 is again orthonormal +c and R1 upper trapezoidal. +c (real version) +c arguments: +c m (in) number of rows of the matrix Q. +c n (in) number of columns of the matrix R. +c k (in) number of columns of Q, and rows of R. k <= m. +c Q (io) on entry, the orthogonal m-by-k matrix Q. +c on exit, the updated matrix Q1. +c R (io) on entry, the upper trapezoidal m-by-n matrix R.. +c on exit, the updated matrix R1. +c u (in) the left m-vector. +c v (in) the right n-vector. +c + integer m,n,k + real Q(m,k),R(k,n),u(m),v(n) + real w + external sqrqhv,sqhqr,saxpy +c quick return if possible + if (m <= 0 .or. n <= 0) return +c eliminate tail of Q'*u + call sqrqhv(m,n,k,Q,m,R,m,u,w) +c update R + + call saxpy(n,w,v,1,R(1,1),m) + +c retriangularize R + call sqhqr(m,n,k,Q,m,R,k) + end diff -r 45f5faba05a2 -r 82be108cc558 libcruft/qrupdate/sqrdec.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/qrupdate/sqrdec.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,66 @@ +c Copyright (C) 2008 VZLU Prague, a.s., Czech Republic +c +c Author: Jaroslav Hajek +c +c This source is free software; you can redistribute it and/or modify +c it under the terms of the GNU General Public License as published by +c the Free Software Foundation; either version 2 of the License, or +c (at your option) any later version. +c +c This program is distributed in the hope that it will be useful, +c but WITHOUT ANY WARRANTY; without even the implied warranty of +c MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +c GNU General Public License for more details. +c +c You should have received a copy of the GNU General Public License +c along with this software; see the file COPYING. If not, see +c . +c + subroutine sqrdec(m,n,k,Q,R,R1,j) +c purpose: updates a QR factorization after deleting +c a column. +c i.e., given an m-by-k orthogonal matrix Q, an k-by-n +c upper trapezoidal matrix R and index j in the range +c 1:n+1, this subroutine updates the matrix Q -> Q1 and +c forms an m-by-(n-1) matrix R1 so that Q1 remains +c orthogonal, R1 is upper trapezoidal, and +c Q1*R1 = [A(:,1:j-1) A(:,j+1:n)], where A = Q*R. +c (real version) +c arguments: +c m (in) number of rows of the matrix Q. +c n (in) number of columns of the matrix R. +c k (in) number of columns of Q, and rows of R. +c Q (io) on entry, the orthogonal m-by-k matrix Q. +c on exit, the updated matrix Q1. +c R (in) the original upper trapezoidal matrix R. +c R1 (out) the updated matrix R1. +c j (in) the position of the deleted column in R. +c 1 <= j <= n. +c + integer m,n,k,j + real Q(m,k),R(k,n),R1(k,n-1) + external xerbla,scopy,sqhqr + integer info +c quick return if possible + if (m <= 0 .or. k <= 0 .or. n == 1) return +c check arguments + info = 0 + if (n < 1) then + info = 2 + else if (j < 1 .or. j > n) then + info = 7 + end if + if (info /= 0) then + call xerbla('SQRDEC',info) + end if +c copy leading portion + call scopy(k*(j-1),R,1,R1,1) + if (j < n) then +c copy trailing portion of R + call scopy(k*(n-j),R(1,j+1),1,R1(1,j),1) +c if necessary, retriangularize R1(j:k,j:n-1) and update Q(:,j:k) + if (j < k) then + call sqhqr(m,n-j,k-j+1,Q(1,j),m,R1(j,j),k) + end if + end if + end diff -r 45f5faba05a2 -r 82be108cc558 libcruft/qrupdate/sqrder.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/qrupdate/sqrder.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,93 @@ +c Copyright (C) 2008 VZLU Prague, a.s., Czech Republic +c +c Author: Jaroslav Hajek +c +c This source is free software; you can redistribute it and/or modify +c it under the terms of the GNU General Public License as published by +c the Free Software Foundation; either version 2 of the License, or +c (at your option) any later version. +c +c This program is distributed in the hope that it will be useful, +c but WITHOUT ANY WARRANTY; without even the implied warranty of +c MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +c GNU General Public License for more details. +c +c You should have received a copy of the GNU General Public License +c along with this software; see the file COPYING. If not, see +c . +c + subroutine sqrder(m,n,Q,Q1,R,R1,j) +c purpose: updates a QR factorization after deleting a row. +c i.e., given an m-by-m orthogonal matrix Q, an m-by-n +c upper trapezoidal matrix R and index j in the range +c 1:m, this subroutine forms the (m-1)-by-(m-1) matrix +c Q1 and an (m-1)-by-n matrix R1 so that Q1 is again +c orthogonal, R1 upper trapezoidal, and +c Q1*R1 = [A(1:j-1,:); A(j+1:m,:)], where A = Q*R. +c (real version) +c +c arguments: +c m (in) number of rows of the matrix R. +c n (in) number of columns of the matrix R +c Q (in) the orthogonal matrix Q +c Q1 (out) the updated matrix Q1 +c R (in) the upper trapezoidal matrix R +c R1 (out) the updated matrix R1 +c j (in) the position of the new row in R1 +c + integer m,n,j + real Q(m,m),Q1(m-1,m-1),R(m,n),R1(m-1,n) + real c + real s,rr,w + external xerbla,slacpy,slartg,srot,sscal,saxpy + integer i +c quick return if possible + if (m == 1) return +c check arguments + info = 0 + if (m < 1) then + info = 1 + else if (j < 1 .or. j > n) then + info = 7 + end if + if (info /= 0) then + call xerbla('SQRDER',info) + end if +c setup the new matrix Q1 +c permute the columns of Q and rows of R so that the deleted row ends +c up being the topmost row. + if (j > 1) then + call slacpy('0',j-1,m-1,Q(1,2),m,Q1(1,1),m-1) + end if + if (j < m) then + call slacpy('0',m-j,m-1,Q(j+1,2),m,Q1(j,1),m-1) + end if +c setup the new matrix R1 + call slacpy('0',m-1,n,R(2,1),m,R1(1,1),m-1) +c eliminate Q(j,2:m) + w = Q(j,m) + do i = m-1,2,-1 + call slartg(Q(j,i),w,c,s,rr) + w = rr +c apply rotation to rows of R1 + if (i <= n) then + call srot(n-i+1,R1(i-1,i),m-1,R1(i,i),m-1,c,s) + end if +c apply rotation to columns of Q1 + call srot(m-1,Q1(1,i-1),1,Q1(1,i),1,c,s) + end do +c the last iteration is special, as we don't have the first row of +c R and first column of Q + call slartg(Q(j,1),w,c,s,rr) + w = rr + call sscal(n,c,R1(1,1),m-1) + call saxpy(n,-s,R(1,1),m,R1(1,1),m-1) +c apply rotation to columns of Q1 + call sscal(m-1,c,Q1(1,1),1) + if (j > 1) then + call saxpy(j-1,-s,Q(1,1),1,Q1(1,1),1) + end if + if (j < m) then + call saxpy(m-j,-s,Q(j+1,1),1,Q1(j,1),1) + end if + end diff -r 45f5faba05a2 -r 82be108cc558 libcruft/qrupdate/sqrinc.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/qrupdate/sqrinc.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,75 @@ +c Copyright (C) 2008 VZLU Prague, a.s., Czech Republic +c +c Author: Jaroslav Hajek +c +c This source is free software; you can redistribute it and/or modify +c it under the terms of the GNU General Public License as published by +c the Free Software Foundation; either version 2 of the License, or +c (at your option) any later version. +c +c This program is distributed in the hope that it will be useful, +c but WITHOUT ANY WARRANTY; without even the implied warranty of +c MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +c GNU General Public License for more details. +c +c You should have received a copy of the GNU General Public License +c along with this software; see the file COPYING. If not, see +c . +c + subroutine sqrinc(m,n,k,Q,R,R1,j,x) +c purpose: updates a QR factorization after inserting a new +c column. +c i.e., given an m-by-k orthogonal matrix Q, an m-by-n +c upper trapezoidal matrix R and index j in the range +c 1:n+1, this subroutine updates the matrix Q -> Q1 and +c forms an m-by-(n+1) matrix R1 so that Q1 is again +c orthogonal, R1 upper trapezoidal, and +c Q1*R1 = [A(:,1:j-1); Q*Q'*x; A(:,j:n-1)], where A = Q*R. +c (real version) +c arguments: +c m (in) number of rows of the matrix Q. +c n (in) number of columns of the matrix R. +c k (in) number of columns of Q, and rows of R. k <= m. +c Q (io) on entry, the orthogonal matrix Q. +c on exit, the updated matrix Q1 +c R (in) the original upper trapezoidal matrix R +c R1 (out) the updated matrix R1 +c j (in) the position of the new column in R1 +c x (in) the column being inserted +c + integer m,n,k,j + real Q(m,k),R(k,n),R1(k,n+1),x(m) + + + real w + external xerbla,scopy,sqrqhv,sgemv + integer info,i,jj +c quick return if possible + if (m <= 0) return +c check arguments + info = 0 + if (n < 0) then + info = 2 + else if (j < 1 .or. j > n+1) then + info = 6 + end if + if (info /= 0) then + call xerbla('SQRINC',info) + end if +c copy leading portion of R + call scopy(k*(j-1),R,1,R1,1) + if (j <= n) then + call scopy(k*(n+1-j),R(1,j),1,R1(1,j+1),1) + end if + call sgemv('T',m,min(k,j-1),1e0,Q,m,x,1,0e0,R1(1,j),1) + if (j < k) then +c eliminate tail, updating Q(:,j:k) and R1(j:k,j+1:n+1) + jj = min(j,n)+1 + call sqrqhv(m,n+1-j,k-j+1,Q(1,j),m,R1(j,jj),m,x,w) +c assemble inserted column + R1(j,j) = w + do i = j+1,k + R1(i,j) = 0e0 + end do + end if + end diff -r 45f5faba05a2 -r 82be108cc558 libcruft/qrupdate/sqrinr.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/qrupdate/sqrinr.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,73 @@ +c Copyright (C) 2008 VZLU Prague, a.s., Czech Republic +c +c Author: Jaroslav Hajek +c +c This source is free software; you can redistribute it and/or modify +c it under the terms of the GNU General Public License as published by +c the Free Software Foundation; either version 2 of the License, or +c (at your option) any later version. +c +c This program is distributed in the hope that it will be useful, +c but WITHOUT ANY WARRANTY; without even the implied warranty of +c MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +c GNU General Public License for more details. +c +c You should have received a copy of the GNU General Public License +c along with this software; see the file COPYING. If not, see +c . +c + subroutine sqrinr(m,n,Q,Q1,R,R1,j,x) +c purpose: updates a QR factorization after inserting a new +c row. +c i.e., given an m-by-m orthogonal matrix Q, an m-by-n +c upper trapezoidal matrix R and index j in the range +c 1:m+1, this subroutine forms the (m+1)-by-(m+1) matrix +c Q1 and an (m+1)-by-n matrix R1 so that Q1 is again +c orthogonal, R1 upper trapezoidal, and +c Q1*R1 = [A(1:j-1,:); x; A(j:m,:)], where A = Q*R. +c (real version) +c arguments: +c m (in) number of rows of the matrix R. +c n (in) number of columns of the matrix R +c Q (in) the orthogonal matrix Q +c Q1 (out) the updated matrix Q1 +c R (in) the upper trapezoidal matrix R +c R1 (out) the updated matrix R1 +c j (in) the position of the new row in R1 +c x (in) the row being added +c + integer m,n,j + real Q(m,m),Q1(m+1,m+1),R(m,n),R1(m+1,n),x(n) + external xerbla,slacpy,scopy,sqhqr + integer i +c check arguments + info = 0 + if (n < 0) then + info = 2 + else if (j < 1 .or. j > m+1) then + info = 7 + end if + if (info /= 0) then + call xerbla('SQRINR',info) + end if +c setup the new matrix Q1 +c permute the columns of Q1 and rows of R1 so that c the new row ends +c up being the topmost row. + if (j > 1) then + call slacpy('0',j-1,m,Q(1,1),m,Q1(1,2),m+1) + end if + if (j <= m) then + call slacpy('0',m-j+1,m,Q(j,1),m,Q1(j+1,2),m+1) + end if +c zero the rest of Q1 + do i = 1,m+1 + Q1(i,1) = 0e0 + Q1(j,i) = 0e0 + end do + Q1(j,1) = 1e0 +c setup the new matrix R1 + call scopy(n,x,1,R1(1,1),m+1) + call slacpy('0',m,n,R(1,1),m,R1(2,1),m+1) +c rotate to form proper QR + call sqhqr(m+1,n,m+1,Q1,m+1,R1,m+1) + end diff -r 45f5faba05a2 -r 82be108cc558 libcruft/qrupdate/sqrqhu.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/qrupdate/sqrqhu.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,78 @@ +c Copyright (C) 2008 VZLU Prague, a.s., Czech Republic +c +c Author: Jaroslav Hajek +c +c This source is free software; you can redistribute it and/or modify +c it under the terms of the GNU General Public License as published by +c the Free Software Foundation; either version 2 of the License, or +c (at your option) any later version. +c +c This program is distributed in the hope that it will be useful, +c but WITHOUT ANY WARRANTY; without even the implied warranty of +c MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +c GNU General Public License for more details. +c +c You should have received a copy of the GNU General Public License +c along with this software; see the file COPYING. If not, see +c . +c + subroutine sqrqhu(m,n,k,Q,ldq,R,ldr,u,rr) +c purpose: given an m-by-k matrix Q, an upper trapezoidal +c k-by-n matrix R, and a k-vector u, +c this subroutine updates the matrices Q -> Q1 and +c R -> R1 so that Q1 = Q*G', R1 = G*R, u1(2:k) = 0 +c with G orthogonal, R1 upper Hessenberg, and u1 = G*u. +c (real version) +c arguments: +c m (in) number of rows of the matrix Q. +c n (in) number of columns of the matrix R. +c k (in) number of columns of Q and rows of R. +c Q (io) on entry, the orthogonal matrix Q. +c on exit, the updated matrix Q1. +c ldq (in) leading dimension of Q. +c R (io) on entry, the upper triangular matrix R. +c on exit, the updated upper Hessenberg matrix R1. +c ldr (in) leading dimension of R. +c u (in) the k-vector u. +c rr (out) the first element of Q1'*u on exit. +c +c if Q is orthogonal, so is Q1. It is not strictly +c necessary, however. + integer m,n,k,ldq,ldr + real Q(ldq,*),R(ldr,*),u(*),rr + real c + real s,w + external xerbla,slartg,srot + integer i,info +c quick return if possible. + if (k <= 0) return +c check arguments. + info = 0 + if (ldq < 1) then + info = 5 + else if (ldr < 1) then + info = 7 + end if + if (info /= 0) then + call xerbla('SQRQHU',info) + end if + rr = u(k) + do i = k-1,1,-1 + w = rr + if (w /= 0e0) then + call slartg(u(i),w,c,s,rr) +c apply rotation to rows of R if necessary + if (i <= n) then + call srot(n+1-i,R(i,i),ldr,R(i+1,i),ldr,c,s) + end if +c apply rotation to columns of Q if necessary + if (m > 0) then + call srot(m,Q(1,i),1,Q(1,i+1),1,c,s) + end if + else +c no rotation necessary + rr = u(i) + end if + end do + end + diff -r 45f5faba05a2 -r 82be108cc558 libcruft/qrupdate/sqrqhv.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/qrupdate/sqrqhv.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,75 @@ +c Copyright (C) 2008 VZLU Prague, a.s., Czech Republic +c +c Author: Jaroslav Hajek +c +c This source is free software; you can redistribute it and/or modify +c it under the terms of the GNU General Public License as published by +c the Free Software Foundation; either version 2 of the License, or +c (at your option) any later version. +c +c This program is distributed in the hope that it will be useful, +c but WITHOUT ANY WARRANTY; without even the implied warranty of +c MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +c GNU General Public License for more details. +c +c You should have received a copy of the GNU General Public License +c along with this software; see the file COPYING. If not, see +c . +c + subroutine sqrqhv(m,n,k,Q,ldq,R,ldr,u,rr) +c purpose: given an m-by-k matrix Q, an upper trapezoidal +c k-by-n matrix R, and an m-vector u, this subroutine +c updates the matrices Q -> Q1 and R -> R1 so that +c Q1 = Q*G', R1 = G*R, w1(2:m) = 0 with G orthogonal, +c R1 upper Hessenberg, and w1 = Q1'*u. +c (real version) +c arguments: +c m (in) number of rows of the matrix Q. +c n (in) number of columns of the matrix R. +c k (in) number of columns of Q and rows of R. k <= m. +c Q (io) on entry, the orthogonal matrix Q. +c on exit, the updated matrix Q1. +c ldq (in) leading dimension of Q. +c R (io) on entry, the upper triangular matrix R. +c on exit, the updated upper Hessenberg matrix R1. +c ldr (in) leading dimension of R. +c u (in) the m-vector u. +c rr (out) the first element of Q1'*u on exit. +c +c if Q is orthogonal, so is Q1. It is not strictly +c necessary, however. + integer m,n,k,ldq,ldr + real Q(ldq,*),R(ldr,*),u(*),rr + real c + real s,w,w1,sdot + external xerbla,sdot,slartg,srot + integer i,info +c quick return if possible. + if (k <= 0) return +c check arguments. + info = 0 + if (k > m) then + info = 3 + else if (ldq < 1) then + info = 5 + else if (ldr < 1) then + info = 7 + end if + if (info /= 0) then + call xerbla('SQRQHV',info) + end if +c form each element of w = Q'*u when necessary. + rr = sdot(m,Q(1,k),1,u,1) + do i = k-1,1,-1 + w1 = rr + w = sdot(m,Q(1,i),1,u,1) + call slartg(w,w1,c,s,rr) +c apply rotation to rows of R if necessary + if (i <= n) then + call srot(n+1-i,R(i,i),ldr,R(i+1,i),ldr,c,s) + end if +c apply rotation to columns of Q + call srot(m,Q(1,i),1,Q(1,i+1),1,c,s) + end do + end + diff -r 45f5faba05a2 -r 82be108cc558 libcruft/qrupdate/sqrshc.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/qrupdate/sqrshc.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,97 @@ +c Copyright (C) 2008 VZLU Prague, a.s., Czech Republic +c +c Author: Jaroslav Hajek +c +c This source is free software; you can redistribute it and/or modify +c it under the terms of the GNU General Public License as published by +c the Free Software Foundation; either version 2 of the License, or +c (at your option) any later version. +c +c This program is distributed in the hope that it will be useful, +c but WITHOUT ANY WARRANTY; without even the implied warranty of +c MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +c GNU General Public License for more details. +c +c You should have received a copy of the GNU General Public License +c along with this software; see the file COPYING. If not, see +c . +c + subroutine sqrshc(m,n,k,Q,R,i,j) +c purpose: updates a QR factorization after circular shift of +c columns. +c i.e., given an m-by-k orthogonal matrix Q, an k-by-n +c upper trapezoidal matrix R and index j in the range +c 1:n+1, this subroutine updates the matrix Q -> Q1 and +c R -> R1 so that Q1 is again orthogonal, R1 upper +c trapezoidal, and +c Q1*R1 = A(:,p), where A = Q*R and p is the permutation +c [1:i-1,shift(i:j,-1),j+1:n] if i < j or +c [1:j-1,shift(j:i,+1),i+1:n] if j > i. +c if m == 0, the matrix Q is ignored. +c (real version) +c arguments: +c m (in) number of rows of the matrix Q, or 0 if Q is not needed. +c n (in) number of columns of the matrix R. +c k (in) number of columns of Q, and rows of R. +c Q (io) on entry, the (orthogonal) matrix Q. +c on exit, the updated matrix Q1 +c R (io) on entry, the upper trapezoidal m-by-n matrix R. +c on exit, the updated matrix R1. +c i (in) the first index determining the range (see above) +c j (in) the second index determining the range (see above) +c + integer m,n,k,i,j + real Q(m,k),R(k,n) + external xerbla,sswap,sqhqr,sqrqhu + real w + integer l,jj,kk,info + +c quick return if possible + if (k <= 0 .or. n <= 1) return + info = 0 + if (m /= 0 .and. k > m) then + info = 3 + else if (i < 1 .or. i > n) then + info = 6 + else if (j < 1 .or. j > n) then + info = 7 + end if + if (info /= 0) then + call xerbla('SQRSHC',info) + end if + + if (i < j) then +c shift columns + do l = i,j-1 + call sswap(min(k,l+1),R(1,l),1,R(1,l+1),1) + end do +c retriangularize + if (i < k) then + kk = min(k,j) + if (m > 0) then + call sqhqr(m,n+1-i,kk+1-i,Q(1,i),m,R(i,i),k) + else + call sqhqr(0,n+1-i,kk+1-i,Q,1,R(i,i),k) + endif + end if + else if (j < i) then +c shift columns + do l = i,j+1,-1 + call sswap(min(k,i),R(1,l),1,R(1,l-1),1) + end do +c retriangularize + if (j < k) then + jj = min(j+1,n) + kk = min(k,i) + if (m > 0) then + call sqrqhu(m,n-j,kk+1-j,Q(1,j),m,R(j,jj),k,R(j,j),w) + else + call sqrqhu(0,n-j,kk+1-j,Q,1,R(j,jj),k,R(j,j),w) + end if + R(j,j) = w + do jj = j+1,kk + R(jj,j) = 0 + end do + end if + end if + end diff -r 45f5faba05a2 -r 82be108cc558 libcruft/slatec-fn/Makefile.in --- a/libcruft/slatec-fn/Makefile.in Wed May 14 18:09:56 2008 +0200 +++ b/libcruft/slatec-fn/Makefile.in Sun Apr 27 22:34:17 2008 +0200 @@ -28,10 +28,13 @@ include $(TOPDIR)/Makeconf -FSRC = d9gmit.f d9lgic.f d9lgit.f d9lgmc.f dacosh.f dasinh.f datanh.f \ +FSRC = albeta.f alngam.f alnrel.f algams.f acosh.f asinh.f atanh.f betai.f \ + csevl.f d9gmit.f d9lgic.f d9lgit.f d9lgmc.f dacosh.f dasinh.f datanh.f \ dbetai.f dcsevl.f derf.f derfc.f dgami.f dgamit.f dgamlm.f dgamma.f \ - dgamr.f dlbeta.f dlgams.f dlngam.f dlnrel.f dpchim.f dpchst.f \ - initds.f xdacosh.f xdasinh.f xdatanh.f xdbetai.f xderf.f xderfc.f \ - xdgami.f xdgamit.f xdgamma.f xgmainc.f + dgamr.f dlbeta.f dlgams.f dlngam.f dlnrel.f dpchim.f dpchst.f erf.f erfc.f \ + gami.f gamit.f gamlim.f gamma.f gamr.f initds.f inits.f pchim.f pchst.f \ + r9lgmc.f r9lgit.f r9gmit.f r9lgic.f xdacosh.f xdasinh.f xdatanh.f \ + xdbetai.f xderf.f xderfc.f xdgami.f xdgamit.f xdgamma.f xgmainc.f xacosh.f \ + xasinh.f xatanh.f xerf.f xerfc.f xsgmainc.f xgamma.f xbetai.f include ../Makerules diff -r 45f5faba05a2 -r 82be108cc558 libcruft/slatec-fn/acosh.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/slatec-fn/acosh.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,39 @@ +*DECK ACOSH + FUNCTION ACOSH (X) +C***BEGIN PROLOGUE ACOSH +C***PURPOSE Compute the arc hyperbolic cosine. +C***LIBRARY SLATEC (FNLIB) +C***CATEGORY C4C +C***TYPE SINGLE PRECISION (ACOSH-S, DACOSH-D, CACOSH-C) +C***KEYWORDS ACOSH, ARC HYPERBOLIC COSINE, ELEMENTARY FUNCTIONS, FNLIB, +C INVERSE HYPERBOLIC COSINE +C***AUTHOR Fullerton, W., (LANL) +C***DESCRIPTION +C +C ACOSH(X) computes the arc hyperbolic cosine of X. +C +C***REFERENCES (NONE) +C***ROUTINES CALLED R1MACH, XERMSG +C***REVISION HISTORY (YYMMDD) +C 770401 DATE WRITTEN +C 890531 Changed all specific intrinsics to generic. (WRB) +C 890531 REVISION DATE from Version 3.2 +C 891214 Prologue converted to Version 4.0 format. (BAB) +C 900315 CALLs to XERROR changed to CALLs to XERMSG. (THJ) +C 900326 Removed duplicate information from DESCRIPTION section. +C (WRB) +C***END PROLOGUE ACOSH + SAVE ALN2,XMAX + DATA ALN2 / 0.6931471805 5994530942E0/ + DATA XMAX /0./ +C***FIRST EXECUTABLE STATEMENT ACOSH + IF (XMAX.EQ.0.) XMAX = 1.0/SQRT(R1MACH(3)) +C + IF (X .LT. 1.0) CALL XERMSG ('SLATEC', 'ACOSH', 'X LESS THAN 1', + + 1, 2) +C + IF (X.LT.XMAX) ACOSH = LOG (X + SQRT(X*X-1.0)) + IF (X.GE.XMAX) ACOSH = ALN2 + LOG(X) +C + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/slatec-fn/albeta.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/slatec-fn/albeta.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,63 @@ +*DECK ALBETA + FUNCTION ALBETA (A, B) +C***BEGIN PROLOGUE ALBETA +C***PURPOSE Compute the natural logarithm of the complete Beta +C function. +C***LIBRARY SLATEC (FNLIB) +C***CATEGORY C7B +C***TYPE SINGLE PRECISION (ALBETA-S, DLBETA-D, CLBETA-C) +C***KEYWORDS FNLIB, LOGARITHM OF THE COMPLETE BETA FUNCTION, +C SPECIAL FUNCTIONS +C***AUTHOR Fullerton, W., (LANL) +C***DESCRIPTION +C +C ALBETA computes the natural log of the complete beta function. +C +C Input Parameters: +C A real and positive +C B real and positive +C +C***REFERENCES (NONE) +C***ROUTINES CALLED ALNGAM, ALNREL, GAMMA, R9LGMC, XERMSG +C***REVISION HISTORY (YYMMDD) +C 770701 DATE WRITTEN +C 890531 Changed all specific intrinsics to generic. (WRB) +C 890531 REVISION DATE from Version 3.2 +C 891214 Prologue converted to Version 4.0 format. (BAB) +C 900315 CALLs to XERROR changed to CALLs to XERMSG. (THJ) +C 900326 Removed duplicate information from DESCRIPTION section. +C (WRB) +C 900727 Added EXTERNAL statement. (WRB) +C***END PROLOGUE ALBETA + EXTERNAL GAMMA + SAVE SQ2PIL + DATA SQ2PIL / 0.9189385332 0467274 E0 / +C***FIRST EXECUTABLE STATEMENT ALBETA + P = MIN (A, B) + Q = MAX (A, B) +C + IF (P .LE. 0.0) CALL XERMSG ('SLATEC', 'ALBETA', + + 'BOTH ARGUMENTS MUST BE GT ZERO', 1, 2) + IF (P.GE.10.0) GO TO 30 + IF (Q.GE.10.0) GO TO 20 +C +C P AND Q ARE SMALL. +C + ALBETA = LOG(GAMMA(P) * (GAMMA(Q)/GAMMA(P+Q)) ) + RETURN +C +C P IS SMALL, BUT Q IS BIG. +C + 20 CORR = R9LGMC(Q) - R9LGMC(P+Q) + ALBETA = ALNGAM(P) + CORR + P - P*LOG(P+Q) + + 1 (Q-0.5)*ALNREL(-P/(P+Q)) + RETURN +C +C P AND Q ARE BIG. +C + 30 CORR = R9LGMC(P) + R9LGMC(Q) - R9LGMC(P+Q) + ALBETA = -0.5*LOG(Q) + SQ2PIL + CORR + (P-0.5)*LOG(P/(P+Q)) + 1 + Q*ALNREL(-P/(P+Q)) + RETURN +C + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/slatec-fn/algams.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/slatec-fn/algams.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,38 @@ +*DECK ALGAMS + SUBROUTINE ALGAMS (X, ALGAM, SGNGAM) +C***BEGIN PROLOGUE ALGAMS +C***PURPOSE Compute the logarithm of the absolute value of the Gamma +C function. +C***LIBRARY SLATEC (FNLIB) +C***CATEGORY C7A +C***TYPE SINGLE PRECISION (ALGAMS-S, DLGAMS-D) +C***KEYWORDS ABSOLUTE VALUE OF THE LOGARITHM OF THE GAMMA FUNCTION, +C FNLIB, SPECIAL FUNCTIONS +C***AUTHOR Fullerton, W., (LANL) +C***DESCRIPTION +C +C Evaluates the logarithm of the absolute value of the gamma +C function. +C X - input argument +C ALGAM - result +C SGNGAM - is set to the sign of GAMMA(X) and will +C be returned at +1.0 or -1.0. +C +C***REFERENCES (NONE) +C***ROUTINES CALLED ALNGAM +C***REVISION HISTORY (YYMMDD) +C 770701 DATE WRITTEN +C 890531 Changed all specific intrinsics to generic. (WRB) +C 890531 REVISION DATE from Version 3.2 +C 891214 Prologue converted to Version 4.0 format. (BAB) +C***END PROLOGUE ALGAMS +C***FIRST EXECUTABLE STATEMENT ALGAMS + ALGAM = ALNGAM(X) + SGNGAM = 1.0 + IF (X.GT.0.0) RETURN +C + INT = MOD (-AINT(X), 2.0) + 0.1 + IF (INT.EQ.0) SGNGAM = -1.0 +C + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/slatec-fn/alngam.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/slatec-fn/alngam.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,70 @@ +*DECK ALNGAM + FUNCTION ALNGAM (X) +C***BEGIN PROLOGUE ALNGAM +C***PURPOSE Compute the logarithm of the absolute value of the Gamma +C function. +C***LIBRARY SLATEC (FNLIB) +C***CATEGORY C7A +C***TYPE SINGLE PRECISION (ALNGAM-S, DLNGAM-D, CLNGAM-C) +C***KEYWORDS ABSOLUTE VALUE, COMPLETE GAMMA FUNCTION, FNLIB, LOGARITHM, +C SPECIAL FUNCTIONS +C***AUTHOR Fullerton, W., (LANL) +C***DESCRIPTION +C +C ALNGAM(X) computes the logarithm of the absolute value of the +C gamma function at X. +C +C***REFERENCES (NONE) +C***ROUTINES CALLED GAMMA, R1MACH, R9LGMC, XERMSG +C***REVISION HISTORY (YYMMDD) +C 770601 DATE WRITTEN +C 890531 Changed all specific intrinsics to generic. (WRB) +C 890531 REVISION DATE from Version 3.2 +C 891214 Prologue converted to Version 4.0 format. (BAB) +C 900315 CALLs to XERROR changed to CALLs to XERMSG. (THJ) +C 900326 Removed duplicate information from DESCRIPTION section. +C (WRB) +C 900727 Added EXTERNAL statement. (WRB) +C***END PROLOGUE ALNGAM + LOGICAL FIRST + EXTERNAL GAMMA + SAVE SQ2PIL, SQPI2L, PI, XMAX, DXREL, FIRST + DATA SQ2PIL / 0.9189385332 0467274E0/ + DATA SQPI2L / 0.2257913526 4472743E0/ + DATA PI / 3.1415926535 8979324E0/ + DATA FIRST /.TRUE./ +C***FIRST EXECUTABLE STATEMENT ALNGAM + IF (FIRST) THEN + XMAX = R1MACH(2)/LOG(R1MACH(2)) + DXREL = SQRT (R1MACH(4)) + ENDIF + FIRST = .FALSE. +C + Y = ABS(X) + IF (Y.GT.10.0) GO TO 20 +C +C LOG (ABS (GAMMA(X))) FOR ABS(X) .LE. 10.0 +C + ALNGAM = LOG (ABS (GAMMA(X))) + RETURN +C +C LOG (ABS (GAMMA(X))) FOR ABS(X) .GT. 10.0 +C + 20 IF (Y .GT. XMAX) CALL XERMSG ('SLATEC', 'ALNGAM', + + 'ABS(X) SO BIG ALNGAM OVERFLOWS', 2, 2) +C + IF (X.GT.0.) ALNGAM = SQ2PIL + (X-0.5)*LOG(X) - X + R9LGMC(Y) + IF (X.GT.0.) RETURN +C + SINPIY = ABS (SIN(PI*Y)) + IF (SINPIY .EQ. 0.) CALL XERMSG ('SLATEC', 'ALNGAM', + + 'X IS A NEGATIVE INTEGER', 3, 2) +C + IF (ABS((X-AINT(X-0.5))/X) .LT. DXREL) CALL XERMSG ('SLATEC', + + 'ALNGAM', 'ANSWER LT HALF PRECISION BECAUSE X TOO NEAR ' // + + 'NEGATIVE INTEGER', 1, 1) +C + ALNGAM = SQPI2L + (X-0.5)*LOG(Y) - X - LOG(SINPIY) - R9LGMC(Y) + RETURN +C + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/slatec-fn/alnrel.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/slatec-fn/alnrel.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,78 @@ +*DECK ALNREL + FUNCTION ALNREL (X) +C***BEGIN PROLOGUE ALNREL +C***PURPOSE Evaluate ln(1+X) accurate in the sense of relative error. +C***LIBRARY SLATEC (FNLIB) +C***CATEGORY C4B +C***TYPE SINGLE PRECISION (ALNREL-S, DLNREL-D, CLNREL-C) +C***KEYWORDS ELEMENTARY FUNCTIONS, FNLIB, LOGARITHM +C***AUTHOR Fullerton, W., (LANL) +C***DESCRIPTION +C +C ALNREL(X) evaluates ln(1+X) accurately in the sense of relative +C error when X is very small. This routine must be used to +C maintain relative error accuracy whenever X is small and +C accurately known. +C +C Series for ALNR on the interval -3.75000D-01 to 3.75000D-01 +C with weighted error 1.93E-17 +C log weighted error 16.72 +C significant figures required 16.44 +C decimal places required 17.40 +C +C***REFERENCES (NONE) +C***ROUTINES CALLED CSEVL, INITS, R1MACH, XERMSG +C***REVISION HISTORY (YYMMDD) +C 770401 DATE WRITTEN +C 890531 Changed all specific intrinsics to generic. (WRB) +C 890531 REVISION DATE from Version 3.2 +C 891214 Prologue converted to Version 4.0 format. (BAB) +C 900315 CALLs to XERROR changed to CALLs to XERMSG. (THJ) +C 900326 Removed duplicate information from DESCRIPTION section. +C (WRB) +C***END PROLOGUE ALNREL + DIMENSION ALNRCS(23) + LOGICAL FIRST + SAVE ALNRCS, NLNREL, XMIN, FIRST + DATA ALNRCS( 1) / 1.0378693562 743770E0 / + DATA ALNRCS( 2) / -.1336430150 4908918E0 / + DATA ALNRCS( 3) / .0194082491 35520563E0 / + DATA ALNRCS( 4) / -.0030107551 12753577E0 / + DATA ALNRCS( 5) / .0004869461 47971548E0 / + DATA ALNRCS( 6) / -.0000810548 81893175E0 / + DATA ALNRCS( 7) / .0000137788 47799559E0 / + DATA ALNRCS( 8) / -.0000023802 21089435E0 / + DATA ALNRCS( 9) / .0000004164 04162138E0 / + DATA ALNRCS(10) / -.0000000735 95828378E0 / + DATA ALNRCS(11) / .0000000131 17611876E0 / + DATA ALNRCS(12) / -.0000000023 54670931E0 / + DATA ALNRCS(13) / .0000000004 25227732E0 / + DATA ALNRCS(14) / -.0000000000 77190894E0 / + DATA ALNRCS(15) / .0000000000 14075746E0 / + DATA ALNRCS(16) / -.0000000000 02576907E0 / + DATA ALNRCS(17) / .0000000000 00473424E0 / + DATA ALNRCS(18) / -.0000000000 00087249E0 / + DATA ALNRCS(19) / .0000000000 00016124E0 / + DATA ALNRCS(20) / -.0000000000 00002987E0 / + DATA ALNRCS(21) / .0000000000 00000554E0 / + DATA ALNRCS(22) / -.0000000000 00000103E0 / + DATA ALNRCS(23) / .0000000000 00000019E0 / + DATA FIRST /.TRUE./ +C***FIRST EXECUTABLE STATEMENT ALNREL + IF (FIRST) THEN + NLNREL = INITS (ALNRCS, 23, 0.1*R1MACH(3)) + XMIN = -1.0 + SQRT(R1MACH(4)) + ENDIF + FIRST = .FALSE. +C + IF (X .LE. (-1.0)) CALL XERMSG ('SLATEC', 'ALNREL', 'X IS LE -1', + + 2, 2) + IF (X .LT. XMIN) CALL XERMSG ('SLATEC', 'ALNREL', + + 'ANSWER LT HALF PRECISION BECAUSE X TOO NEAR -1', 1, 1) +C + IF (ABS(X).LE.0.375) ALNREL = X*(1. - + 1 X*CSEVL (X/.375, ALNRCS, NLNREL)) + IF (ABS(X).GT.0.375) ALNREL = LOG (1.0+X) +C + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/slatec-fn/asinh.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/slatec-fn/asinh.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,74 @@ +*DECK ASINH + FUNCTION ASINH (X) +C***BEGIN PROLOGUE ASINH +C***PURPOSE Compute the arc hyperbolic sine. +C***LIBRARY SLATEC (FNLIB) +C***CATEGORY C4C +C***TYPE SINGLE PRECISION (ASINH-S, DASINH-D, CASINH-C) +C***KEYWORDS ARC HYPERBOLIC SINE, ASINH, ELEMENTARY FUNCTIONS, FNLIB, +C INVERSE HYPERBOLIC SINE +C***AUTHOR Fullerton, W., (LANL) +C***DESCRIPTION +C +C ASINH(X) computes the arc hyperbolic sine of X. +C +C Series for ASNH on the interval 0. to 1.00000D+00 +C with weighted error 2.19E-17 +C log weighted error 16.66 +C significant figures required 15.60 +C decimal places required 17.31 +C +C***REFERENCES (NONE) +C***ROUTINES CALLED CSEVL, INITS, R1MACH +C***REVISION HISTORY (YYMMDD) +C 770401 DATE WRITTEN +C 890531 Changed all specific intrinsics to generic. (WRB) +C 890531 REVISION DATE from Version 3.2 +C 891214 Prologue converted to Version 4.0 format. (BAB) +C***END PROLOGUE ASINH + DIMENSION ASNHCS(20) + LOGICAL FIRST + SAVE ALN2, ASNHCS, NTERMS, XMAX, SQEPS, FIRST + DATA ALN2 /0.6931471805 5994530942E0/ + DATA ASNHCS( 1) / -.1282003991 1738186E0 / + DATA ASNHCS( 2) / -.0588117611 89951768E0 / + DATA ASNHCS( 3) / .0047274654 32212481E0 / + DATA ASNHCS( 4) / -.0004938363 16265361E0 / + DATA ASNHCS( 5) / .0000585062 07058557E0 / + DATA ASNHCS( 6) / -.0000074669 98328931E0 / + DATA ASNHCS( 7) / .0000010011 69358355E0 / + DATA ASNHCS( 8) / -.0000001390 35438587E0 / + DATA ASNHCS( 9) / .0000000198 23169483E0 / + DATA ASNHCS(10) / -.0000000028 84746841E0 / + DATA ASNHCS(11) / .0000000004 26729654E0 / + DATA ASNHCS(12) / -.0000000000 63976084E0 / + DATA ASNHCS(13) / .0000000000 09699168E0 / + DATA ASNHCS(14) / -.0000000000 01484427E0 / + DATA ASNHCS(15) / .0000000000 00229037E0 / + DATA ASNHCS(16) / -.0000000000 00035588E0 / + DATA ASNHCS(17) / .0000000000 00005563E0 / + DATA ASNHCS(18) / -.0000000000 00000874E0 / + DATA ASNHCS(19) / .0000000000 00000138E0 / + DATA ASNHCS(20) / -.0000000000 00000021E0 / + DATA FIRST /.TRUE./ +C***FIRST EXECUTABLE STATEMENT ASINH + IF (FIRST) THEN + NTERMS = INITS (ASNHCS, 20, 0.1*R1MACH(3)) + SQEPS = SQRT (R1MACH(3)) + XMAX = 1.0/SQEPS + ENDIF + FIRST = .FALSE. +C + Y = ABS(X) + IF (Y.GT.1.0) GO TO 20 +C + ASINH = X + IF (Y.GT.SQEPS) ASINH = X*(1.0 + CSEVL (2.*X*X-1., ASNHCS,NTERMS)) + RETURN +C + 20 IF (Y.LT.XMAX) ASINH = LOG (Y + SQRT(Y**2+1.)) + IF (Y.GE.XMAX) ASINH = ALN2 + LOG(Y) + ASINH = SIGN (ASINH, X) +C + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/slatec-fn/atanh.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/slatec-fn/atanh.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,72 @@ +*DECK ATANH + FUNCTION ATANH (X) +C***BEGIN PROLOGUE ATANH +C***PURPOSE Compute the arc hyperbolic tangent. +C***LIBRARY SLATEC (FNLIB) +C***CATEGORY C4C +C***TYPE SINGLE PRECISION (ATANH-S, DATANH-D, CATANH-C) +C***KEYWORDS ARC HYPERBOLIC TANGENT, ATANH, ELEMENTARY FUNCTIONS, +C FNLIB, INVERSE HYPERBOLIC TANGENT +C***AUTHOR Fullerton, W., (LANL) +C***DESCRIPTION +C +C ATANH(X) computes the arc hyperbolic tangent of X. +C +C Series for ATNH on the interval 0. to 2.50000D-01 +C with weighted error 6.70E-18 +C log weighted error 17.17 +C significant figures required 16.01 +C decimal places required 17.76 +C +C***REFERENCES (NONE) +C***ROUTINES CALLED CSEVL, INITS, R1MACH, XERMSG +C***REVISION HISTORY (YYMMDD) +C 770401 DATE WRITTEN +C 890531 Changed all specific intrinsics to generic. (WRB) +C 890531 REVISION DATE from Version 3.2 +C 891214 Prologue converted to Version 4.0 format. (BAB) +C 900315 CALLs to XERROR changed to CALLs to XERMSG. (THJ) +C 900326 Removed duplicate information from DESCRIPTION section. +C (WRB) +C***END PROLOGUE ATANH + DIMENSION ATNHCS(15) + LOGICAL FIRST + SAVE ATNHCS, NTERMS, DXREL, SQEPS, FIRST + DATA ATNHCS( 1) / .0943951023 93195492E0 / + DATA ATNHCS( 2) / .0491984370 55786159E0 / + DATA ATNHCS( 3) / .0021025935 22455432E0 / + DATA ATNHCS( 4) / .0001073554 44977611E0 / + DATA ATNHCS( 5) / .0000059782 67249293E0 / + DATA ATNHCS( 6) / .0000003505 06203088E0 / + DATA ATNHCS( 7) / .0000000212 63743437E0 / + DATA ATNHCS( 8) / .0000000013 21694535E0 / + DATA ATNHCS( 9) / .0000000000 83658755E0 / + DATA ATNHCS(10) / .0000000000 05370503E0 / + DATA ATNHCS(11) / .0000000000 00348665E0 / + DATA ATNHCS(12) / .0000000000 00022845E0 / + DATA ATNHCS(13) / .0000000000 00001508E0 / + DATA ATNHCS(14) / .0000000000 00000100E0 / + DATA ATNHCS(15) / .0000000000 00000006E0 / + DATA FIRST /.TRUE./ +C***FIRST EXECUTABLE STATEMENT ATANH + IF (FIRST) THEN + NTERMS = INITS (ATNHCS, 15, 0.1*R1MACH(3)) + DXREL = SQRT (R1MACH(4)) + SQEPS = SQRT (3.0*R1MACH(3)) + ENDIF + FIRST = .FALSE. +C + Y = ABS(X) + IF (Y .GE. 1.0) CALL XERMSG ('SLATEC', 'ATANH', 'ABS(X) GE 1', 2, + + 2) +C + IF (1.0-Y .LT. DXREL) CALL XERMSG ('SLATEC', 'ATANH', + + 'ANSWER LT HALF PRECISION BECAUSE ABS(X) TOO NEAR 1', 1, 1) +C + ATANH = X + IF (Y.GT.SQEPS .AND. Y.LE.0.5) ATANH = X*(1.0 + CSEVL (8.*X*X-1., + 1 ATNHCS, NTERMS)) + IF (Y.GT.0.5) ATANH = 0.5*LOG((1.0+X)/(1.0-X)) +C + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/slatec-fn/betai.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/slatec-fn/betai.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,118 @@ +*DECK BETAI + REAL FUNCTION BETAI (X, PIN, QIN) +C***BEGIN PROLOGUE BETAI +C***PURPOSE Calculate the incomplete Beta function. +C***LIBRARY SLATEC (FNLIB) +C***CATEGORY C7F +C***TYPE SINGLE PRECISION (BETAI-S, DBETAI-D) +C***KEYWORDS FNLIB, INCOMPLETE BETA FUNCTION, SPECIAL FUNCTIONS +C***AUTHOR Fullerton, W., (LANL) +C***DESCRIPTION +C +C BETAI calculates the REAL incomplete beta function. +C +C The incomplete beta function ratio is the probability that a +C random variable from a beta distribution having parameters PIN and +C QIN will be less than or equal to X. +C +C -- Input Arguments -- All arguments are REAL. +C X upper limit of integration. X must be in (0,1) inclusive. +C PIN first beta distribution parameter. PIN must be .GT. 0.0. +C QIN second beta distribution parameter. QIN must be .GT. 0.0. +C +C***REFERENCES Nancy E. Bosten and E. L. Battiste, Remark on Algorithm +C 179, Communications of the ACM 17, 3 (March 1974), +C pp. 156. +C***ROUTINES CALLED ALBETA, R1MACH, XERMSG +C***REVISION HISTORY (YYMMDD) +C 770401 DATE WRITTEN +C 890531 Changed all specific intrinsics to generic. (WRB) +C 890531 REVISION DATE from Version 3.2 +C 891214 Prologue converted to Version 4.0 format. (BAB) +C 900315 CALLs to XERROR changed to CALLs to XERMSG. (THJ) +C 900326 Removed duplicate information from DESCRIPTION section. +C (WRB) +C 920528 DESCRIPTION and REFERENCES sections revised. (WRB) +C***END PROLOGUE BETAI + LOGICAL FIRST + SAVE EPS, ALNEPS, SML, ALNSML, FIRST + DATA FIRST /.TRUE./ +C***FIRST EXECUTABLE STATEMENT BETAI + IF (FIRST) THEN + EPS = R1MACH(3) + ALNEPS = LOG(EPS) + SML = R1MACH(1) + ALNSML = LOG(SML) + ENDIF + FIRST = .FALSE. +C + IF (X .LT. 0. .OR. X .GT. 1.0) CALL XERMSG ('SLATEC', 'BETAI', + + 'X IS NOT IN THE RANGE (0,1)', 1, 2) + IF (PIN .LE. 0. .OR. QIN .LE. 0.) CALL XERMSG ('SLATEC', 'BETAI', + + 'P AND/OR Q IS LE ZERO', 2, 2) +C + Y = X + P = PIN + Q = QIN + IF (Q.LE.P .AND. X.LT.0.8) GO TO 20 + IF (X.LT.0.2) GO TO 20 + Y = 1.0 - Y + P = QIN + Q = PIN +C + 20 IF ((P+Q)*Y/(P+1.).LT.EPS) GO TO 80 +C +C EVALUATE THE INFINITE SUM FIRST. +C TERM WILL EQUAL Y**P/BETA(PS,P) * (1.-PS)I * Y**I / FAC(I) +C + PS = Q - AINT(Q) + IF (PS.EQ.0.) PS = 1.0 + XB = P*LOG(Y) - ALBETA(PS, P) - LOG(P) + BETAI = 0.0 + IF (XB.LT.ALNSML) GO TO 40 +C + BETAI = EXP (XB) + TERM = BETAI*P + IF (PS.EQ.1.0) GO TO 40 +C + N = MAX (ALNEPS/LOG(Y), 4.0E0) + DO 30 I=1,N + TERM = TERM*(I-PS)*Y/I + BETAI = BETAI + TERM/(P+I) + 30 CONTINUE +C +C NOW EVALUATE THE FINITE SUM, MAYBE. +C + 40 IF (Q.LE.1.0) GO TO 70 +C + XB = P*LOG(Y) + Q*LOG(1.0-Y) - ALBETA(P,Q) - LOG(Q) + IB = MAX (XB/ALNSML, 0.0E0) + TERM = EXP (XB - IB*ALNSML) + C = 1.0/(1.0-Y) + P1 = Q*C/(P+Q-1.) +C + FINSUM = 0.0 + N = Q + IF (Q.EQ.REAL(N)) N = N - 1 + DO 50 I=1,N + IF (P1.LE.1.0 .AND. TERM/EPS.LE.FINSUM) GO TO 60 + TERM = (Q-I+1)*C*TERM/(P+Q-I) +C + IF (TERM.GT.1.0) IB = IB - 1 + IF (TERM.GT.1.0) TERM = TERM*SML +C + IF (IB.EQ.0) FINSUM = FINSUM + TERM + 50 CONTINUE +C + 60 BETAI = BETAI + FINSUM + 70 IF (Y.NE.X .OR. P.NE.PIN) BETAI = 1.0 - BETAI + BETAI = MAX (MIN (BETAI, 1.0), 0.0) + RETURN +C + 80 BETAI = 0.0 + XB = P*LOG(MAX(Y,SML)) - LOG(P) - ALBETA(P,Q) + IF (XB.GT.ALNSML .AND. Y.NE.0.) BETAI = EXP (XB) + IF (Y.NE.X .OR. P.NE.PIN) BETAI = 1.0 - BETAI + RETURN +C + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/slatec-fn/csevl.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/slatec-fn/csevl.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,65 @@ +*DECK CSEVL + FUNCTION CSEVL (X, CS, N) +C***BEGIN PROLOGUE CSEVL +C***PURPOSE Evaluate a Chebyshev series. +C***LIBRARY SLATEC (FNLIB) +C***CATEGORY C3A2 +C***TYPE SINGLE PRECISION (CSEVL-S, DCSEVL-D) +C***KEYWORDS CHEBYSHEV SERIES, FNLIB, SPECIAL FUNCTIONS +C***AUTHOR Fullerton, W., (LANL) +C***DESCRIPTION +C +C Evaluate the N-term Chebyshev series CS at X. Adapted from +C a method presented in the paper by Broucke referenced below. +C +C Input Arguments -- +C X value at which the series is to be evaluated. +C CS array of N terms of a Chebyshev series. In evaluating +C CS, only half the first coefficient is summed. +C N number of terms in array CS. +C +C***REFERENCES R. Broucke, Ten subroutines for the manipulation of +C Chebyshev series, Algorithm 446, Communications of +C the A.C.M. 16, (1973) pp. 254-256. +C L. Fox and I. B. Parker, Chebyshev Polynomials in +C Numerical Analysis, Oxford University Press, 1968, +C page 56. +C***ROUTINES CALLED R1MACH, XERMSG +C***REVISION HISTORY (YYMMDD) +C 770401 DATE WRITTEN +C 890831 Modified array declarations. (WRB) +C 890831 REVISION DATE from Version 3.2 +C 891214 Prologue converted to Version 4.0 format. (BAB) +C 900315 CALLs to XERROR changed to CALLs to XERMSG. (THJ) +C 900329 Prologued revised extensively and code rewritten to allow +C X to be slightly outside interval (-1,+1). (WRB) +C 920501 Reformatted the REFERENCES section. (WRB) +C***END PROLOGUE CSEVL + REAL B0, B1, B2, CS(*), ONEPL, TWOX, X + LOGICAL FIRST + SAVE FIRST, ONEPL + DATA FIRST /.TRUE./ +C***FIRST EXECUTABLE STATEMENT CSEVL + IF (FIRST) ONEPL = 1.0E0 + R1MACH(4) + FIRST = .FALSE. + IF (N .LT. 1) CALL XERMSG ('SLATEC', 'CSEVL', + + 'NUMBER OF TERMS .LE. 0', 2, 2) + IF (N .GT. 1000) CALL XERMSG ('SLATEC', 'CSEVL', + + 'NUMBER OF TERMS .GT. 1000', 3, 2) + IF (ABS(X) .GT. ONEPL) CALL XERMSG ('SLATEC', 'CSEVL', + + 'X OUTSIDE THE INTERVAL (-1,+1)', 1, 1) +C + B1 = 0.0E0 + B0 = 0.0E0 + TWOX = 2.0*X + DO 10 I = 1,N + B2 = B1 + B1 = B0 + NI = N + 1 - I + B0 = TWOX*B1 - B2 + CS(NI) + 10 CONTINUE +C + CSEVL = 0.5E0*(B0-B2) +C + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/slatec-fn/erf.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/slatec-fn/erf.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,73 @@ +*DECK ERF + FUNCTION ERF (X) +C***BEGIN PROLOGUE ERF +C***PURPOSE Compute the error function. +C***LIBRARY SLATEC (FNLIB) +C***CATEGORY C8A, L5A1E +C***TYPE SINGLE PRECISION (ERF-S, DERF-D) +C***KEYWORDS ERF, ERROR FUNCTION, FNLIB, SPECIAL FUNCTIONS +C***AUTHOR Fullerton, W., (LANL) +C***DESCRIPTION +C +C ERF(X) calculates the single precision error function for +C single precision argument X. +C +C Series for ERF on the interval 0. to 1.00000D+00 +C with weighted error 7.10E-18 +C log weighted error 17.15 +C significant figures required 16.31 +C decimal places required 17.71 +C +C***REFERENCES (NONE) +C***ROUTINES CALLED CSEVL, ERFC, INITS, R1MACH +C***REVISION HISTORY (YYMMDD) +C 770401 DATE WRITTEN +C 890531 Changed all specific intrinsics to generic. (WRB) +C 890531 REVISION DATE from Version 3.2 +C 891214 Prologue converted to Version 4.0 format. (BAB) +C 900727 Added EXTERNAL statement. (WRB) +C 920618 Removed space from variable name. (RWC, WRB) +C***END PROLOGUE ERF + DIMENSION ERFCS(13) + LOGICAL FIRST + EXTERNAL ERFC + SAVE ERFCS, SQRTPI, NTERF, XBIG, SQEPS, FIRST + DATA ERFCS( 1) / -.0490461212 34691808E0 / + DATA ERFCS( 2) / -.1422612051 0371364E0 / + DATA ERFCS( 3) / .0100355821 87599796E0 / + DATA ERFCS( 4) / -.0005768764 69976748E0 / + DATA ERFCS( 5) / .0000274199 31252196E0 / + DATA ERFCS( 6) / -.0000011043 17550734E0 / + DATA ERFCS( 7) / .0000000384 88755420E0 / + DATA ERFCS( 8) / -.0000000011 80858253E0 / + DATA ERFCS( 9) / .0000000000 32334215E0 / + DATA ERFCS(10) / -.0000000000 00799101E0 / + DATA ERFCS(11) / .0000000000 00017990E0 / + DATA ERFCS(12) / -.0000000000 00000371E0 / + DATA ERFCS(13) / .0000000000 00000007E0 / + DATA SQRTPI /1.772453850 9055160E0/ + DATA FIRST /.TRUE./ +C***FIRST EXECUTABLE STATEMENT ERF + IF (FIRST) THEN + NTERF = INITS (ERFCS, 13, 0.1*R1MACH(3)) + XBIG = SQRT(-LOG(SQRTPI*R1MACH(3))) + SQEPS = SQRT(2.0*R1MACH(3)) + ENDIF + FIRST = .FALSE. +C + Y = ABS(X) + IF (Y.GT.1.) GO TO 20 +C +C ERF(X) = 1. - ERFC(X) FOR -1. .LE. X .LE. 1. +C + IF (Y.LE.SQEPS) ERF = 2.0*X/SQRTPI + IF (Y.GT.SQEPS) ERF = X*(1.0 + CSEVL(2.*X**2-1., ERFCS, NTERF)) + RETURN +C +C ERF(X) = 1. - ERFC(X) FOR ABS(X) .GT. 1. +C + 20 IF (Y.LE.XBIG) ERF = SIGN (1.0-ERFC(Y), X) + IF (Y.GT.XBIG) ERF = SIGN (1.0, X) +C + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/slatec-fn/erfc.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/slatec-fn/erfc.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,156 @@ +*DECK ERFC + FUNCTION ERFC (X) +C***BEGIN PROLOGUE ERFC +C***PURPOSE Compute the complementary error function. +C***LIBRARY SLATEC (FNLIB) +C***CATEGORY C8A, L5A1E +C***TYPE SINGLE PRECISION (ERFC-S, DERFC-D) +C***KEYWORDS COMPLEMENTARY ERROR FUNCTION, ERFC, FNLIB, +C SPECIAL FUNCTIONS +C***AUTHOR Fullerton, W., (LANL) +C***DESCRIPTION +C +C ERFC(X) calculates the single precision complementary error +C function for single precision argument X. +C +C Series for ERF on the interval 0. to 1.00000D+00 +C with weighted error 7.10E-18 +C log weighted error 17.15 +C significant figures required 16.31 +C decimal places required 17.71 +C +C Series for ERFC on the interval 0. to 2.50000D-01 +C with weighted error 4.81E-17 +C log weighted error 16.32 +C approx significant figures required 15.0 +C +C +C Series for ERC2 on the interval 2.50000D-01 to 1.00000D+00 +C with weighted error 5.22E-17 +C log weighted error 16.28 +C approx significant figures required 15.0 +C decimal places required 16.96 +C +C***REFERENCES (NONE) +C***ROUTINES CALLED CSEVL, INITS, R1MACH, XERMSG +C***REVISION HISTORY (YYMMDD) +C 770701 DATE WRITTEN +C 890531 Changed all specific intrinsics to generic. (WRB) +C 890531 REVISION DATE from Version 3.2 +C 891214 Prologue converted to Version 4.0 format. (BAB) +C 900315 CALLs to XERROR changed to CALLs to XERMSG. (THJ) +C 920618 Removed space from variable names. (RWC, WRB) +C***END PROLOGUE ERFC + DIMENSION ERFCS(13), ERFCCS(24), ERC2CS(23) + LOGICAL FIRST + SAVE ERFCS, ERC2CS, ERFCCS, SQRTPI, NTERF, NTERFC, + 1 NTERC2, XSML, XMAX, SQEPS, FIRST + DATA ERFCS( 1) / -.0490461212 34691808E0 / + DATA ERFCS( 2) / -.1422612051 0371364E0 / + DATA ERFCS( 3) / .0100355821 87599796E0 / + DATA ERFCS( 4) / -.0005768764 69976748E0 / + DATA ERFCS( 5) / .0000274199 31252196E0 / + DATA ERFCS( 6) / -.0000011043 17550734E0 / + DATA ERFCS( 7) / .0000000384 88755420E0 / + DATA ERFCS( 8) / -.0000000011 80858253E0 / + DATA ERFCS( 9) / .0000000000 32334215E0 / + DATA ERFCS(10) / -.0000000000 00799101E0 / + DATA ERFCS(11) / .0000000000 00017990E0 / + DATA ERFCS(12) / -.0000000000 00000371E0 / + DATA ERFCS(13) / .0000000000 00000007E0 / + DATA ERC2CS( 1) / -.0696013466 02309501E0 / + DATA ERC2CS( 2) / -.0411013393 62620893E0 / + DATA ERC2CS( 3) / .0039144958 66689626E0 / + DATA ERC2CS( 4) / -.0004906395 65054897E0 / + DATA ERC2CS( 5) / .0000715747 90013770E0 / + DATA ERC2CS( 6) / -.0000115307 16341312E0 / + DATA ERC2CS( 7) / .0000019946 70590201E0 / + DATA ERC2CS( 8) / -.0000003642 66647159E0 / + DATA ERC2CS( 9) / .0000000694 43726100E0 / + DATA ERC2CS(10) / -.0000000137 12209021E0 / + DATA ERC2CS(11) / .0000000027 88389661E0 / + DATA ERC2CS(12) / -.0000000005 81416472E0 / + DATA ERC2CS(13) / .0000000001 23892049E0 / + DATA ERC2CS(14) / -.0000000000 26906391E0 / + DATA ERC2CS(15) / .0000000000 05942614E0 / + DATA ERC2CS(16) / -.0000000000 01332386E0 / + DATA ERC2CS(17) / .0000000000 00302804E0 / + DATA ERC2CS(18) / -.0000000000 00069666E0 / + DATA ERC2CS(19) / .0000000000 00016208E0 / + DATA ERC2CS(20) / -.0000000000 00003809E0 / + DATA ERC2CS(21) / .0000000000 00000904E0 / + DATA ERC2CS(22) / -.0000000000 00000216E0 / + DATA ERC2CS(23) / .0000000000 00000052E0 / + DATA ERFCCS( 1) / 0.0715179310 202925E0 / + DATA ERFCCS( 2) / -.0265324343 37606719E0 / + DATA ERFCCS( 3) / .0017111539 77920853E0 / + DATA ERFCCS( 4) / -.0001637516 63458512E0 / + DATA ERFCCS( 5) / .0000198712 93500549E0 / + DATA ERFCCS( 6) / -.0000028437 12412769E0 / + DATA ERFCCS( 7) / .0000004606 16130901E0 / + DATA ERFCCS( 8) / -.0000000822 77530261E0 / + DATA ERFCCS( 9) / .0000000159 21418724E0 / + DATA ERFCCS(10) / -.0000000032 95071356E0 / + DATA ERFCCS(11) / .0000000007 22343973E0 / + DATA ERFCCS(12) / -.0000000001 66485584E0 / + DATA ERFCCS(13) / .0000000000 40103931E0 / + DATA ERFCCS(14) / -.0000000000 10048164E0 / + DATA ERFCCS(15) / .0000000000 02608272E0 / + DATA ERFCCS(16) / -.0000000000 00699105E0 / + DATA ERFCCS(17) / .0000000000 00192946E0 / + DATA ERFCCS(18) / -.0000000000 00054704E0 / + DATA ERFCCS(19) / .0000000000 00015901E0 / + DATA ERFCCS(20) / -.0000000000 00004729E0 / + DATA ERFCCS(21) / .0000000000 00001432E0 / + DATA ERFCCS(22) / -.0000000000 00000439E0 / + DATA ERFCCS(23) / .0000000000 00000138E0 / + DATA ERFCCS(24) / -.0000000000 00000048E0 / + DATA SQRTPI /1.772453850 9055160E0/ + DATA FIRST /.TRUE./ +C***FIRST EXECUTABLE STATEMENT ERFC + IF (FIRST) THEN + ETA = 0.1*R1MACH(3) + NTERF = INITS (ERFCS, 13, ETA) + NTERFC = INITS (ERFCCS, 24, ETA) + NTERC2 = INITS (ERC2CS, 23, ETA) +C + XSML = -SQRT (-LOG(SQRTPI*R1MACH(3))) + TXMAX = SQRT (-LOG(SQRTPI*R1MACH(1))) + XMAX = TXMAX - 0.5*LOG(TXMAX)/TXMAX - 0.01 + SQEPS = SQRT (2.0*R1MACH(3)) + ENDIF + FIRST = .FALSE. +C + IF (X.GT.XSML) GO TO 20 +C +C ERFC(X) = 1.0 - ERF(X) FOR X .LT. XSML +C + ERFC = 2. + RETURN +C + 20 IF (X.GT.XMAX) GO TO 40 + Y = ABS(X) + IF (Y.GT.1.0) GO TO 30 +C +C ERFC(X) = 1.0 - ERF(X) FOR -1. .LE. X .LE. 1. +C + IF (Y.LT.SQEPS) ERFC = 1.0 - 2.0*X/SQRTPI + IF (Y.GE.SQEPS) ERFC = 1.0 - + 1 X*(1.0 + CSEVL (2.*X*X-1., ERFCS, NTERF) ) + RETURN +C +C ERFC(X) = 1.0 - ERF(X) FOR 1. .LT. ABS(X) .LE. XMAX +C + 30 Y = Y*Y + IF (Y.LE.4.) ERFC = EXP(-Y)/ABS(X) * (0.5 + CSEVL ((8./Y-5.)/3., + 1 ERC2CS, NTERC2) ) + IF (Y.GT.4.) ERFC = EXP(-Y)/ABS(X) * (0.5 + CSEVL (8./Y-1., + 1 ERFCCS, NTERFC) ) + IF (X.LT.0.) ERFC = 2.0 - ERFC + RETURN +C + 40 CALL XERMSG ('SLATEC', 'ERFC', 'X SO BIG ERFC UNDERFLOWS', 1, 1) + ERFC = 0. + RETURN +C + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/slatec-fn/gami.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/slatec-fn/gami.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,45 @@ +*DECK GAMI + FUNCTION GAMI (A, X) +C***BEGIN PROLOGUE GAMI +C***PURPOSE Evaluate the incomplete Gamma function. +C***LIBRARY SLATEC (FNLIB) +C***CATEGORY C7E +C***TYPE SINGLE PRECISION (GAMI-S, DGAMI-D) +C***KEYWORDS FNLIB, INCOMPLETE GAMMA FUNCTION, SPECIAL FUNCTIONS +C***AUTHOR Fullerton, W., (LANL) +C***DESCRIPTION +C +C Evaluate the incomplete gamma function defined by +C +C GAMI = integral from T = 0 to X of EXP(-T) * T**(A-1.0) . +C +C GAMI is evaluated for positive values of A and non-negative values +C of X. A slight deterioration of 2 or 3 digits accuracy will occur +C when GAMI is very large or very small, because logarithmic variables +C are used. GAMI, A, and X are single precision. +C +C***REFERENCES (NONE) +C***ROUTINES CALLED ALNGAM, GAMIT, XERMSG +C***REVISION HISTORY (YYMMDD) +C 770701 DATE WRITTEN +C 890531 Changed all specific intrinsics to generic. (WRB) +C 890531 REVISION DATE from Version 3.2 +C 891214 Prologue converted to Version 4.0 format. (BAB) +C 900315 CALLs to XERROR changed to CALLs to XERMSG. (THJ) +C***END PROLOGUE GAMI +C***FIRST EXECUTABLE STATEMENT GAMI + IF (A .LE. 0.0) CALL XERMSG ('SLATEC', 'GAMI', + + 'A MUST BE GT ZERO', 1, 2) + IF (X .LT. 0.0) CALL XERMSG ('SLATEC', 'GAMI', + + 'X MUST BE GE ZERO', 2, 2) +C + GAMI = 0.0 + IF (X.EQ.0.0) RETURN +C +C THE ONLY ERROR POSSIBLE IN THE EXPRESSION BELOW IS A FATAL OVERFLOW. + FACTOR = EXP (ALNGAM(A) + A*LOG(X) ) +C + GAMI = FACTOR * GAMIT(A, X) +C + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/slatec-fn/gamit.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/slatec-fn/gamit.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,112 @@ +*DECK GAMIT + REAL FUNCTION GAMIT (A, X) +C***BEGIN PROLOGUE GAMIT +C***PURPOSE Calculate Tricomi's form of the incomplete Gamma function. +C***LIBRARY SLATEC (FNLIB) +C***CATEGORY C7E +C***TYPE SINGLE PRECISION (GAMIT-S, DGAMIT-D) +C***KEYWORDS COMPLEMENTARY INCOMPLETE GAMMA FUNCTION, FNLIB, +C SPECIAL FUNCTIONS, TRICOMI +C***AUTHOR Fullerton, W., (LANL) +C***DESCRIPTION +C +C Evaluate Tricomi's incomplete gamma function defined by +C +C GAMIT = X**(-A)/GAMMA(A) * integral from 0 to X of EXP(-T) * +C T**(A-1.) +C +C for A .GT. 0.0 and by analytic continuation for A .LE. 0.0. +C GAMMA(X) is the complete gamma function of X. +C +C GAMIT is evaluated for arbitrary real values of A and for non- +C negative values of X (even though GAMIT is defined for X .LT. +C 0.0), except that for X = 0 and A .LE. 0.0, GAMIT is infinite, +C which is a fatal error. +C +C The function and both arguments are REAL. +C +C A slight deterioration of 2 or 3 digits accuracy will occur when +C GAMIT is very large or very small in absolute value, because log- +C arithmic variables are used. Also, if the parameter A is very +C close to a negative integer (but not a negative integer), there is +C a loss of accuracy, which is reported if the result is less than +C half machine precision. +C +C***REFERENCES W. Gautschi, A computational procedure for incomplete +C gamma functions, ACM Transactions on Mathematical +C Software 5, 4 (December 1979), pp. 466-481. +C W. Gautschi, Incomplete gamma functions, Algorithm 542, +C ACM Transactions on Mathematical Software 5, 4 +C (December 1979), pp. 482-489. +C***ROUTINES CALLED ALGAMS, ALNGAM, GAMR, R1MACH, R9GMIT, R9LGIC, +C R9LGIT, XERCLR, XERMSG +C***REVISION HISTORY (YYMMDD) +C 770701 DATE WRITTEN +C 890531 Changed all specific intrinsics to generic. (WRB) +C 890531 REVISION DATE from Version 3.2 +C 891214 Prologue converted to Version 4.0 format. (BAB) +C 900315 CALLs to XERROR changed to CALLs to XERMSG. (THJ) +C 920528 DESCRIPTION and REFERENCES sections revised. (WRB) +C***END PROLOGUE GAMIT + LOGICAL FIRST + SAVE ALNEPS, SQEPS, BOT, FIRST + DATA FIRST /.TRUE./ +C***FIRST EXECUTABLE STATEMENT GAMIT + IF (FIRST) THEN + ALNEPS = -LOG(R1MACH(3)) + SQEPS = SQRT(R1MACH(4)) + BOT = LOG(R1MACH(1)) + ENDIF + FIRST = .FALSE. +C + IF (X .LT. 0.0) CALL XERMSG ('SLATEC', 'GAMIT', 'X IS NEGATIVE', + + 2, 2) +C + IF (X.NE.0.0) ALX = LOG(X) + SGA = 1.0 + IF (A.NE.0.0) SGA = SIGN (1.0, A) + AINTA = AINT (A+0.5*SGA) + AEPS = A - AINTA +C + IF (X.GT.0.0) GO TO 20 + GAMIT = 0.0 + IF (AINTA.GT.0.0 .OR. AEPS.NE.0.0) GAMIT = GAMR(A+1.0) + RETURN +C + 20 IF (X.GT.1.0) GO TO 40 + IF (A.GE.(-0.5) .OR. AEPS.NE.0.0) CALL ALGAMS (A+1.0, ALGAP1, + 1 SGNGAM) + GAMIT = R9GMIT (A, X, ALGAP1, SGNGAM, ALX) + RETURN +C + 40 IF (A.LT.X) GO TO 50 + T = R9LGIT (A, X, ALNGAM(A+1.0)) + IF (T.LT.BOT) CALL XERCLR + GAMIT = EXP(T) + RETURN +C + 50 ALNG = R9LGIC (A, X, ALX) +C +C EVALUATE GAMIT IN TERMS OF LOG(GAMIC(A,X)) +C + H = 1.0 + IF (AEPS.EQ.0.0 .AND. AINTA.LE.0.0) GO TO 60 + CALL ALGAMS (A+1.0, ALGAP1, SGNGAM) + T = LOG(ABS(A)) + ALNG - ALGAP1 + IF (T.GT.ALNEPS) GO TO 70 + IF (T.GT.(-ALNEPS)) H = 1.0 - SGA*SGNGAM*EXP(T) + IF (ABS(H).GT.SQEPS) GO TO 60 + CALL XERCLR + CALL XERMSG ('SLATEC', 'GAMIT', 'RESULT LT HALF PRECISION', 1, 1) +C + 60 T = -A*ALX + LOG(ABS(H)) + IF (T.LT.BOT) CALL XERCLR + GAMIT = SIGN (EXP(T), H) + RETURN +C + 70 T = T - A*ALX + IF (T.LT.BOT) CALL XERCLR + GAMIT = -SGA*SGNGAM*EXP(T) + RETURN +C + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/slatec-fn/gamlim.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/slatec-fn/gamlim.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,61 @@ +*DECK GAMLIM + SUBROUTINE GAMLIM (XMIN, XMAX) +C***BEGIN PROLOGUE GAMLIM +C***PURPOSE Compute the minimum and maximum bounds for the argument in +C the Gamma function. +C***LIBRARY SLATEC (FNLIB) +C***CATEGORY C7A, R2 +C***TYPE SINGLE PRECISION (GAMLIM-S, DGAMLM-D) +C***KEYWORDS COMPLETE GAMMA FUNCTION, FNLIB, LIMITS, SPECIAL FUNCTIONS +C***AUTHOR Fullerton, W., (LANL) +C***DESCRIPTION +C +C Calculate the minimum and maximum legal bounds for X in GAMMA(X). +C XMIN and XMAX are not the only bounds, but they are the only non- +C trivial ones to calculate. +C +C Output Arguments -- +C XMIN minimum legal value of X in GAMMA(X). Any smaller value of +C X might result in underflow. +C XMAX maximum legal value of X in GAMMA(X). Any larger value will +C cause overflow. +C +C***REFERENCES (NONE) +C***ROUTINES CALLED R1MACH, XERMSG +C***REVISION HISTORY (YYMMDD) +C 770401 DATE WRITTEN +C 890531 Changed all specific intrinsics to generic. (WRB) +C 890531 REVISION DATE from Version 3.2 +C 891214 Prologue converted to Version 4.0 format. (BAB) +C 900315 CALLs to XERROR changed to CALLs to XERMSG. (THJ) +C***END PROLOGUE GAMLIM +C***FIRST EXECUTABLE STATEMENT GAMLIM + ALNSML = LOG(R1MACH(1)) + XMIN = -ALNSML + DO 10 I=1,10 + XOLD = XMIN + XLN = LOG(XMIN) + XMIN = XMIN - XMIN*((XMIN+0.5)*XLN - XMIN - 0.2258 + ALNSML) + 1 / (XMIN*XLN + 0.5) + IF (ABS(XMIN-XOLD).LT.0.005) GO TO 20 + 10 CONTINUE + CALL XERMSG ('SLATEC', 'GAMLIM', 'UNABLE TO FIND XMIN', 1, 2) +C + 20 XMIN = -XMIN + 0.01 +C + ALNBIG = LOG(R1MACH(2)) + XMAX = ALNBIG + DO 30 I=1,10 + XOLD = XMAX + XLN = LOG(XMAX) + XMAX = XMAX - XMAX*((XMAX-0.5)*XLN - XMAX + 0.9189 - ALNBIG) + 1 / (XMAX*XLN - 0.5) + IF (ABS(XMAX-XOLD).LT.0.005) GO TO 40 + 30 CONTINUE + CALL XERMSG ('SLATEC', 'GAMLIM', 'UNABLE TO FIND XMAX', 2, 2) +C + 40 XMAX = XMAX - 0.01 + XMIN = MAX (XMIN, -XMAX+1.) +C + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/slatec-fn/gamma.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/slatec-fn/gamma.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,138 @@ +*DECK GAMMA + FUNCTION GAMMA (X) +C***BEGIN PROLOGUE GAMMA +C***PURPOSE Compute the complete Gamma function. +C***LIBRARY SLATEC (FNLIB) +C***CATEGORY C7A +C***TYPE SINGLE PRECISION (GAMMA-S, DGAMMA-D, CGAMMA-C) +C***KEYWORDS COMPLETE GAMMA FUNCTION, FNLIB, SPECIAL FUNCTIONS +C***AUTHOR Fullerton, W., (LANL) +C***DESCRIPTION +C +C GAMMA computes the gamma function at X, where X is not 0, -1, -2, .... +C GAMMA and X are single precision. +C +C***REFERENCES (NONE) +C***ROUTINES CALLED CSEVL, GAMLIM, INITS, R1MACH, R9LGMC, XERMSG +C***REVISION HISTORY (YYMMDD) +C 770601 DATE WRITTEN +C 890531 Changed all specific intrinsics to generic. (WRB) +C 890531 REVISION DATE from Version 3.2 +C 891214 Prologue converted to Version 4.0 format. (BAB) +C 900315 CALLs to XERROR changed to CALLs to XERMSG. (THJ) +C***END PROLOGUE GAMMA + DIMENSION GCS(23) + LOGICAL FIRST + SAVE GCS, PI, SQ2PIL, NGCS, XMIN, XMAX, DXREL, FIRST + DATA GCS ( 1) / .0085711955 90989331E0/ + DATA GCS ( 2) / .0044153813 24841007E0/ + DATA GCS ( 3) / .0568504368 1599363E0/ + DATA GCS ( 4) /-.0042198353 96418561E0/ + DATA GCS ( 5) / .0013268081 81212460E0/ + DATA GCS ( 6) /-.0001893024 529798880E0/ + DATA GCS ( 7) / .0000360692 532744124E0/ + DATA GCS ( 8) /-.0000060567 619044608E0/ + DATA GCS ( 9) / .0000010558 295463022E0/ + DATA GCS (10) /-.0000001811 967365542E0/ + DATA GCS (11) / .0000000311 772496471E0/ + DATA GCS (12) /-.0000000053 542196390E0/ + DATA GCS (13) / .0000000009 193275519E0/ + DATA GCS (14) /-.0000000001 577941280E0/ + DATA GCS (15) / .0000000000 270798062E0/ + DATA GCS (16) /-.0000000000 046468186E0/ + DATA GCS (17) / .0000000000 007973350E0/ + DATA GCS (18) /-.0000000000 001368078E0/ + DATA GCS (19) / .0000000000 000234731E0/ + DATA GCS (20) /-.0000000000 000040274E0/ + DATA GCS (21) / .0000000000 000006910E0/ + DATA GCS (22) /-.0000000000 000001185E0/ + DATA GCS (23) / .0000000000 000000203E0/ + DATA PI /3.14159 26535 89793 24E0/ +C SQ2PIL IS LOG (SQRT (2.*PI) ) + DATA SQ2PIL /0.91893 85332 04672 74E0/ + DATA FIRST /.TRUE./ +C +C LANL DEPENDENT CODE REMOVED 81.02.04 +C +C***FIRST EXECUTABLE STATEMENT GAMMA + IF (FIRST) THEN +C +C --------------------------------------------------------------------- +C INITIALIZE. FIND LEGAL BOUNDS FOR X, AND DETERMINE THE NUMBER OF +C TERMS IN THE SERIES REQUIRED TO ATTAIN AN ACCURACY TEN TIMES BETTER +C THAN MACHINE PRECISION. +C + NGCS = INITS (GCS, 23, 0.1*R1MACH(3)) +C + CALL GAMLIM (XMIN, XMAX) + DXREL = SQRT (R1MACH(4)) +C +C --------------------------------------------------------------------- +C FINISH INITIALIZATION. START EVALUATING GAMMA(X). +C + ENDIF + FIRST = .FALSE. +C + Y = ABS(X) + IF (Y.GT.10.0) GO TO 50 +C +C COMPUTE GAMMA(X) FOR ABS(X) .LE. 10.0. REDUCE INTERVAL AND +C FIND GAMMA(1+Y) FOR 0. .LE. Y .LT. 1. FIRST OF ALL. +C + N = X + IF (X.LT.0.) N = N - 1 + Y = X - N + N = N - 1 + GAMMA = 0.9375 + CSEVL(2.*Y-1., GCS, NGCS) + IF (N.EQ.0) RETURN +C + IF (N.GT.0) GO TO 30 +C +C COMPUTE GAMMA(X) FOR X .LT. 1. +C + N = -N + IF (X .EQ. 0.) CALL XERMSG ('SLATEC', 'GAMMA', 'X IS 0', 4, 2) + IF (X .LT. 0. .AND. X+N-2 .EQ. 0.) CALL XERMSG ('SLATEC', 'GAMMA' + 1, 'X IS A NEGATIVE INTEGER', 4, 2) + IF (X .LT. (-0.5) .AND. ABS((X-AINT(X-0.5))/X) .LT. DXREL) CALL + 1XERMSG ( 'SLATEC', 'GAMMA', + 2'ANSWER LT HALF PRECISION BECAUSE X TOO NEAR NEGATIVE INTEGER' + 3, 1, 1) +C + DO 20 I=1,N + GAMMA = GAMMA / (X+I-1) + 20 CONTINUE + RETURN +C +C GAMMA(X) FOR X .GE. 2. +C + 30 DO 40 I=1,N + GAMMA = (Y+I)*GAMMA + 40 CONTINUE + RETURN +C +C COMPUTE GAMMA(X) FOR ABS(X) .GT. 10.0. RECALL Y = ABS(X). +C + 50 IF (X .GT. XMAX) CALL XERMSG ('SLATEC', 'GAMMA', + + 'X SO BIG GAMMA OVERFLOWS', 3, 2) +C + GAMMA = 0. + IF (X .LT. XMIN) CALL XERMSG ('SLATEC', 'GAMMA', + + 'X SO SMALL GAMMA UNDERFLOWS', 2, 1) + IF (X.LT.XMIN) RETURN +C + GAMMA = EXP((Y-0.5)*LOG(Y) - Y + SQ2PIL + R9LGMC(Y) ) + IF (X.GT.0.) RETURN +C + IF (ABS((X-AINT(X-0.5))/X) .LT. DXREL) CALL XERMSG ('SLATEC', + + 'GAMMA', + + 'ANSWER LT HALF PRECISION, X TOO NEAR NEGATIVE INTEGER', 1, 1) +C + SINPIY = SIN (PI*Y) + IF (SINPIY .EQ. 0.) CALL XERMSG ('SLATEC', 'GAMMA', + + 'X IS A NEGATIVE INTEGER', 4, 2) +C + GAMMA = -PI / (Y*SINPIY*GAMMA) +C + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/slatec-fn/gamr.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/slatec-fn/gamr.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,42 @@ +*DECK GAMR + FUNCTION GAMR (X) +C***BEGIN PROLOGUE GAMR +C***PURPOSE Compute the reciprocal of the Gamma function. +C***LIBRARY SLATEC (FNLIB) +C***CATEGORY C7A +C***TYPE SINGLE PRECISION (GAMR-S, DGAMR-D, CGAMR-C) +C***KEYWORDS FNLIB, RECIPROCAL GAMMA FUNCTION, SPECIAL FUNCTIONS +C***AUTHOR Fullerton, W., (LANL) +C***DESCRIPTION +C +C GAMR is a single precision function that evaluates the reciprocal +C of the gamma function for single precision argument X. +C +C***REFERENCES (NONE) +C***ROUTINES CALLED ALGAMS, GAMMA, XERCLR, XGETF, XSETF +C***REVISION HISTORY (YYMMDD) +C 770701 DATE WRITTEN +C 861211 REVISION DATE from Version 3.2 +C 891214 Prologue converted to Version 4.0 format. (BAB) +C 900727 Added EXTERNAL statement. (WRB) +C***END PROLOGUE GAMR + EXTERNAL GAMMA +C***FIRST EXECUTABLE STATEMENT GAMR + GAMR = 0.0 + IF (X.LE.0.0 .AND. AINT(X).EQ.X) RETURN +C + CALL XGETF (IROLD) + CALL XSETF (1) + IF (ABS(X).GT.10.0) GO TO 10 + GAMR = 1.0/GAMMA(X) + CALL XERCLR + CALL XSETF (IROLD) + RETURN +C + 10 CALL ALGAMS (X, ALNGX, SGNGX) + CALL XERCLR + CALL XSETF (IROLD) + GAMR = SGNGX * EXP(-ALNGX) + RETURN +C + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/slatec-fn/inits.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/slatec-fn/inits.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,53 @@ +*DECK INITS + FUNCTION INITS (OS, NOS, ETA) +C***BEGIN PROLOGUE INITS +C***PURPOSE Determine the number of terms needed in an orthogonal +C polynomial series so that it meets a specified accuracy. +C***LIBRARY SLATEC (FNLIB) +C***CATEGORY C3A2 +C***TYPE SINGLE PRECISION (INITS-S, INITDS-D) +C***KEYWORDS CHEBYSHEV, FNLIB, INITIALIZE, ORTHOGONAL POLYNOMIAL, +C ORTHOGONAL SERIES, SPECIAL FUNCTIONS +C***AUTHOR Fullerton, W., (LANL) +C***DESCRIPTION +C +C Initialize the orthogonal series, represented by the array OS, so +C that INITS is the number of terms needed to insure the error is no +C larger than ETA. Ordinarily, ETA will be chosen to be one-tenth +C machine precision. +C +C Input Arguments -- +C OS single precision array of NOS coefficients in an orthogonal +C series. +C NOS number of coefficients in OS. +C ETA single precision scalar containing requested accuracy of +C series. +C +C***REFERENCES (NONE) +C***ROUTINES CALLED XERMSG +C***REVISION HISTORY (YYMMDD) +C 770401 DATE WRITTEN +C 890831 Modified array declarations. (WRB) +C 891115 Modified error message. (WRB) +C 891115 REVISION DATE from Version 3.2 +C 891214 Prologue converted to Version 4.0 format. (BAB) +C 900315 CALLs to XERROR changed to CALLs to XERMSG. (THJ) +C***END PROLOGUE INITS + REAL OS(*) +C***FIRST EXECUTABLE STATEMENT INITS + IF (NOS .LT. 1) CALL XERMSG ('SLATEC', 'INITS', + + 'Number of coefficients is less than 1', 2, 1) +C + ERR = 0. + DO 10 II = 1,NOS + I = NOS + 1 - II + ERR = ERR + ABS(OS(I)) + IF (ERR.GT.ETA) GO TO 20 + 10 CONTINUE +C + 20 IF (I .EQ. NOS) CALL XERMSG ('SLATEC', 'INITS', + + 'Chebyshev series too short for specified accuracy', 1, 1) + INITS = I +C + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/slatec-fn/pchim.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/slatec-fn/pchim.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,280 @@ +*DECK PCHIM + SUBROUTINE PCHIM (N, X, F, D, INCFD, IERR) +C***BEGIN PROLOGUE PCHIM +C***PURPOSE Set derivatives needed to determine a monotone piecewise +C cubic Hermite interpolant to given data. Boundary values +C are provided which are compatible with monotonicity. The +C interpolant will have an extremum at each point where mono- +C tonicity switches direction. (See PCHIC if user control is +C desired over boundary or switch conditions.) +C***LIBRARY SLATEC (PCHIP) +C***CATEGORY E1A +C***TYPE SINGLE PRECISION (PCHIM-S, DPCHIM-D) +C***KEYWORDS CUBIC HERMITE INTERPOLATION, MONOTONE INTERPOLATION, +C PCHIP, PIECEWISE CUBIC INTERPOLATION +C***AUTHOR Fritsch, F. N., (LLNL) +C Lawrence Livermore National Laboratory +C P.O. Box 808 (L-316) +C Livermore, CA 94550 +C FTS 532-4275, (510) 422-4275 +C***DESCRIPTION +C +C PCHIM: Piecewise Cubic Hermite Interpolation to +C Monotone data. +C +C Sets derivatives needed to determine a monotone piecewise cubic +C Hermite interpolant to the data given in X and F. +C +C Default boundary conditions are provided which are compatible +C with monotonicity. (See PCHIC if user control of boundary con- +C ditions is desired.) +C +C If the data are only piecewise monotonic, the interpolant will +C have an extremum at each point where monotonicity switches direc- +C tion. (See PCHIC if user control is desired in such cases.) +C +C To facilitate two-dimensional applications, includes an increment +C between successive values of the F- and D-arrays. +C +C The resulting piecewise cubic Hermite function may be evaluated +C by PCHFE or PCHFD. +C +C ---------------------------------------------------------------------- +C +C Calling sequence: +C +C PARAMETER (INCFD = ...) +C INTEGER N, IERR +C REAL X(N), F(INCFD,N), D(INCFD,N) +C +C CALL PCHIM (N, X, F, D, INCFD, IERR) +C +C Parameters: +C +C N -- (input) number of data points. (Error return if N.LT.2 .) +C If N=2, simply does linear interpolation. +C +C X -- (input) real array of independent variable values. The +C elements of X must be strictly increasing: +C X(I-1) .LT. X(I), I = 2(1)N. +C (Error return if not.) +C +C F -- (input) real array of dependent variable values to be inter- +C polated. F(1+(I-1)*INCFD) is value corresponding to X(I). +C PCHIM is designed for monotonic data, but it will work for +C any F-array. It will force extrema at points where mono- +C tonicity switches direction. If some other treatment of +C switch points is desired, PCHIC should be used instead. +C ----- +C D -- (output) real array of derivative values at the data points. +C If the data are monotonic, these values will determine a +C a monotone cubic Hermite function. +C The value corresponding to X(I) is stored in +C D(1+(I-1)*INCFD), I=1(1)N. +C No other entries in D are changed. +C +C INCFD -- (input) increment between successive values in F and D. +C This argument is provided primarily for 2-D applications. +C (Error return if INCFD.LT.1 .) +C +C IERR -- (output) error flag. +C Normal return: +C IERR = 0 (no errors). +C Warning error: +C IERR.GT.0 means that IERR switches in the direction +C of monotonicity were detected. +C "Recoverable" errors: +C IERR = -1 if N.LT.2 . +C IERR = -2 if INCFD.LT.1 . +C IERR = -3 if the X-array is not strictly increasing. +C (The D-array has not been changed in any of these cases.) +C NOTE: The above errors are checked in the order listed, +C and following arguments have **NOT** been validated. +C +C***REFERENCES 1. F. N. Fritsch and J. Butland, A method for construc- +C ting local monotone piecewise cubic interpolants, SIAM +C Journal on Scientific and Statistical Computing 5, 2 +C (June 1984), pp. 300-304. +C 2. F. N. Fritsch and R. E. Carlson, Monotone piecewise +C cubic interpolation, SIAM Journal on Numerical Ana- +C lysis 17, 2 (April 1980), pp. 238-246. +C***ROUTINES CALLED PCHST, XERMSG +C***REVISION HISTORY (YYMMDD) +C 811103 DATE WRITTEN +C 820201 1. Introduced PCHST to reduce possible over/under- +C flow problems. +C 2. Rearranged derivative formula for same reason. +C 820602 1. Modified end conditions to be continuous functions +C of data when monotonicity switches in next interval. +C 2. Modified formulas so end conditions are less prone +C of over/underflow problems. +C 820803 Minor cosmetic changes for release 1. +C 870813 Updated Reference 1. +C 890411 Added SAVE statements (Vers. 3.2). +C 890531 Changed all specific intrinsics to generic. (WRB) +C 890703 Corrected category record. (WRB) +C 890831 Modified array declarations. (WRB) +C 890831 REVISION DATE from Version 3.2 +C 891214 Prologue converted to Version 4.0 format. (BAB) +C 900315 CALLs to XERROR changed to CALLs to XERMSG. (THJ) +C 920429 Revised format and order of references. (WRB,FNF) +C***END PROLOGUE PCHIM +C Programming notes: +C +C 1. The function PCHST(ARG1,ARG2) is assumed to return zero if +C either argument is zero, +1 if they are of the same sign, and +C -1 if they are of opposite sign. +C 2. To produce a double precision version, simply: +C a. Change PCHIM to DPCHIM wherever it occurs, +C b. Change PCHST to DPCHST wherever it occurs, +C c. Change all references to the Fortran intrinsics to their +C double precision equivalents, +C d. Change the real declarations to double precision, and +C e. Change the constants ZERO and THREE to double precision. +C +C DECLARE ARGUMENTS. +C + INTEGER N, INCFD, IERR + REAL X(*), F(INCFD,*), D(INCFD,*) +C +C DECLARE LOCAL VARIABLES. +C + INTEGER I, NLESS1 + REAL DEL1, DEL2, DMAX, DMIN, DRAT1, DRAT2, DSAVE, + * H1, H2, HSUM, HSUMT3, THREE, W1, W2, ZERO + SAVE ZERO, THREE + REAL PCHST + DATA ZERO /0./, THREE /3./ +C +C VALIDITY-CHECK ARGUMENTS. +C +C***FIRST EXECUTABLE STATEMENT PCHIM + IF ( N.LT.2 ) GO TO 5001 + IF ( INCFD.LT.1 ) GO TO 5002 + DO 1 I = 2, N + IF ( X(I).LE.X(I-1) ) GO TO 5003 + 1 CONTINUE +C +C FUNCTION DEFINITION IS OK, GO ON. +C + IERR = 0 + NLESS1 = N - 1 + H1 = X(2) - X(1) + DEL1 = (F(1,2) - F(1,1))/H1 + DSAVE = DEL1 +C +C SPECIAL CASE N=2 -- USE LINEAR INTERPOLATION. +C + IF (NLESS1 .GT. 1) GO TO 10 + D(1,1) = DEL1 + D(1,N) = DEL1 + GO TO 5000 +C +C NORMAL CASE (N .GE. 3). +C + 10 CONTINUE + H2 = X(3) - X(2) + DEL2 = (F(1,3) - F(1,2))/H2 +C +C SET D(1) VIA NON-CENTERED THREE-POINT FORMULA, ADJUSTED TO BE +C SHAPE-PRESERVING. +C + HSUM = H1 + H2 + W1 = (H1 + HSUM)/HSUM + W2 = -H1/HSUM + D(1,1) = W1*DEL1 + W2*DEL2 + IF ( PCHST(D(1,1),DEL1) .LE. ZERO) THEN + D(1,1) = ZERO + ELSE IF ( PCHST(DEL1,DEL2) .LT. ZERO) THEN +C NEED DO THIS CHECK ONLY IF MONOTONICITY SWITCHES. + DMAX = THREE*DEL1 + IF (ABS(D(1,1)) .GT. ABS(DMAX)) D(1,1) = DMAX + ENDIF +C +C LOOP THROUGH INTERIOR POINTS. +C + DO 50 I = 2, NLESS1 + IF (I .EQ. 2) GO TO 40 +C + H1 = H2 + H2 = X(I+1) - X(I) + HSUM = H1 + H2 + DEL1 = DEL2 + DEL2 = (F(1,I+1) - F(1,I))/H2 + 40 CONTINUE +C +C SET D(I)=0 UNLESS DATA ARE STRICTLY MONOTONIC. +C + D(1,I) = ZERO + IF ( PCHST(DEL1,DEL2) ) 42, 41, 45 +C +C COUNT NUMBER OF CHANGES IN DIRECTION OF MONOTONICITY. +C + 41 CONTINUE + IF (DEL2 .EQ. ZERO) GO TO 50 + IF ( PCHST(DSAVE,DEL2) .LT. ZERO) IERR = IERR + 1 + DSAVE = DEL2 + GO TO 50 +C + 42 CONTINUE + IERR = IERR + 1 + DSAVE = DEL2 + GO TO 50 +C +C USE BRODLIE MODIFICATION OF BUTLAND FORMULA. +C + 45 CONTINUE + HSUMT3 = HSUM+HSUM+HSUM + W1 = (HSUM + H1)/HSUMT3 + W2 = (HSUM + H2)/HSUMT3 + DMAX = MAX( ABS(DEL1), ABS(DEL2) ) + DMIN = MIN( ABS(DEL1), ABS(DEL2) ) + DRAT1 = DEL1/DMAX + DRAT2 = DEL2/DMAX + D(1,I) = DMIN/(W1*DRAT1 + W2*DRAT2) +C + 50 CONTINUE +C +C SET D(N) VIA NON-CENTERED THREE-POINT FORMULA, ADJUSTED TO BE +C SHAPE-PRESERVING. +C + W1 = -H2/HSUM + W2 = (H2 + HSUM)/HSUM + D(1,N) = W1*DEL1 + W2*DEL2 + IF ( PCHST(D(1,N),DEL2) .LE. ZERO) THEN + D(1,N) = ZERO + ELSE IF ( PCHST(DEL1,DEL2) .LT. ZERO) THEN +C NEED DO THIS CHECK ONLY IF MONOTONICITY SWITCHES. + DMAX = THREE*DEL2 + IF (ABS(D(1,N)) .GT. ABS(DMAX)) D(1,N) = DMAX + ENDIF +C +C NORMAL RETURN. +C + 5000 CONTINUE + RETURN +C +C ERROR RETURNS. +C + 5001 CONTINUE +C N.LT.2 RETURN. + IERR = -1 + CALL XERMSG ('SLATEC', 'PCHIM', + + 'NUMBER OF DATA POINTS LESS THAN TWO', IERR, 1) + RETURN +C + 5002 CONTINUE +C INCFD.LT.1 RETURN. + IERR = -2 + CALL XERMSG ('SLATEC', 'PCHIM', 'INCREMENT LESS THAN ONE', IERR, + + 1) + RETURN +C + 5003 CONTINUE +C X-ARRAY NOT STRICTLY INCREASING. + IERR = -3 + CALL XERMSG ('SLATEC', 'PCHIM', 'X-ARRAY NOT STRICTLY INCREASING' + + , IERR, 1) + RETURN +C------------- LAST LINE OF PCHIM FOLLOWS ------------------------------ + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/slatec-fn/pchst.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/slatec-fn/pchst.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,57 @@ +*DECK PCHST + REAL FUNCTION PCHST (ARG1, ARG2) +C***BEGIN PROLOGUE PCHST +C***SUBSIDIARY +C***PURPOSE PCHIP Sign-Testing Routine +C***LIBRARY SLATEC (PCHIP) +C***TYPE SINGLE PRECISION (PCHST-S, DPCHST-D) +C***AUTHOR Fritsch, F. N., (LLNL) +C***DESCRIPTION +C +C PCHST: PCHIP Sign-Testing Routine. +C +C Returns: +C -1. if ARG1 and ARG2 are of opposite sign. +C 0. if either argument is zero. +C +1. if ARG1 and ARG2 are of the same sign. +C +C The object is to do this without multiplying ARG1*ARG2, to avoid +C possible over/underflow problems. +C +C Fortran intrinsics used: SIGN. +C +C***SEE ALSO PCHCE, PCHCI, PCHCS, PCHIM +C***ROUTINES CALLED (NONE) +C***REVISION HISTORY (YYMMDD) +C 811103 DATE WRITTEN +C 820805 Converted to SLATEC library version. +C 870813 Minor cosmetic changes. +C 890411 Added SAVE statements (Vers. 3.2). +C 890411 REVISION DATE from Version 3.2 +C 891214 Prologue converted to Version 4.0 format. (BAB) +C 900328 Added TYPE section. (WRB) +C 910408 Updated AUTHOR and DATE WRITTEN sections in prologue. (WRB) +C 930503 Improved purpose. (FNF) +C***END PROLOGUE PCHST +C +C**End +C +C DECLARE ARGUMENTS. +C + REAL ARG1, ARG2 +C +C DECLARE LOCAL VARIABLES. +C + REAL ONE, ZERO + SAVE ZERO, ONE + DATA ZERO /0./, ONE /1./ +C +C PERFORM THE TEST. +C +C***FIRST EXECUTABLE STATEMENT PCHST + PCHST = SIGN(ONE,ARG1) * SIGN(ONE,ARG2) + IF ((ARG1.EQ.ZERO) .OR. (ARG2.EQ.ZERO)) PCHST = ZERO +C + RETURN +C------------- LAST LINE OF PCHST FOLLOWS ------------------------------ + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/slatec-fn/r9gmit.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/slatec-fn/r9gmit.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,84 @@ +*DECK R9GMIT + FUNCTION R9GMIT (A, X, ALGAP1, SGNGAM, ALX) +C***BEGIN PROLOGUE R9GMIT +C***SUBSIDIARY +C***PURPOSE Compute Tricomi's incomplete Gamma function for small +C arguments. +C***LIBRARY SLATEC (FNLIB) +C***CATEGORY C7E +C***TYPE SINGLE PRECISION (R9GMIT-S, D9GMIT-D) +C***KEYWORDS COMPLEMENTARY INCOMPLETE GAMMA FUNCTION, FNLIB, SMALL X, +C SPECIAL FUNCTIONS, TRICOMI +C***AUTHOR Fullerton, W., (LANL) +C***DESCRIPTION +C +C Compute Tricomi's incomplete gamma function for small X. +C +C***REFERENCES (NONE) +C***ROUTINES CALLED ALNGAM, R1MACH, XERMSG +C***REVISION HISTORY (YYMMDD) +C 770701 DATE WRITTEN +C 890531 Changed all specific intrinsics to generic. (WRB) +C 890531 REVISION DATE from Version 3.2 +C 891214 Prologue converted to Version 4.0 format. (BAB) +C 900315 CALLs to XERROR changed to CALLs to XERMSG. (THJ) +C 900720 Routine changed from user-callable to subsidiary. (WRB) +C***END PROLOGUE R9GMIT + SAVE EPS, BOT + DATA EPS, BOT / 2*0.0 / +C***FIRST EXECUTABLE STATEMENT R9GMIT + IF (EPS.EQ.0.0) EPS = 0.5*R1MACH(3) + IF (BOT.EQ.0.0) BOT = LOG(R1MACH(1)) +C + IF (X .LE. 0.0) CALL XERMSG ('SLATEC', 'R9GMIT', + + 'X SHOULD BE GT 0', 1, 2) +C + MA = A + 0.5 + IF (A.LT.0.0) MA = A - 0.5 + AEPS = A - MA +C + AE = A + IF (A.LT.(-0.5)) AE = AEPS +C + T = 1.0 + TE = AE + S = T + DO 20 K=1,200 + FK = K + TE = -X*TE/FK + T = TE/(AE+FK) + S = S + T + IF (ABS(T).LT.EPS*ABS(S)) GO TO 30 + 20 CONTINUE + CALL XERMSG ('SLATEC', 'R9GMIT', + + 'NO CONVERGENCE IN 200 TERMS OF TAYLOR-S SERIES', 2, 2) +C + 30 IF (A.GE.(-0.5)) ALGS = -ALGAP1 + LOG(S) + IF (A.GE.(-0.5)) GO TO 60 +C + ALGS = -ALNGAM(1.0+AEPS) + LOG(S) + S = 1.0 + M = -MA - 1 + IF (M.EQ.0) GO TO 50 + T = 1.0 + DO 40 K=1,M + T = X*T/(AEPS-M-1+K) + S = S + T + IF (ABS(T).LT.EPS*ABS(S)) GO TO 50 + 40 CONTINUE +C + 50 R9GMIT = 0.0 + ALGS = -MA*LOG(X) + ALGS + IF (S.EQ.0.0 .OR. AEPS.EQ.0.0) GO TO 60 +C + SGNG2 = SGNGAM*SIGN(1.0,S) + ALG2 = -X - ALGAP1 + LOG(ABS(S)) +C + IF (ALG2.GT.BOT) R9GMIT = SGNG2*EXP(ALG2) + IF (ALGS.GT.BOT) R9GMIT = R9GMIT + EXP(ALGS) + RETURN +C + 60 R9GMIT = EXP(ALGS) + RETURN +C + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/slatec-fn/r9lgic.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/slatec-fn/r9lgic.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,53 @@ +*DECK R9LGIC + FUNCTION R9LGIC (A, X, ALX) +C***BEGIN PROLOGUE R9LGIC +C***SUBSIDIARY +C***PURPOSE Compute the log complementary incomplete Gamma function +C for large X and for A .LE. X. +C***LIBRARY SLATEC (FNLIB) +C***CATEGORY C7E +C***TYPE SINGLE PRECISION (R9LGIC-S, D9LGIC-D) +C***KEYWORDS COMPLEMENTARY INCOMPLETE GAMMA FUNCTION, FNLIB, LARGE X, +C LOGARITHM, SPECIAL FUNCTIONS +C***AUTHOR Fullerton, W., (LANL) +C***DESCRIPTION +C +C Compute the log complementary incomplete gamma function for large X +C and for A .LE. X. +C +C***REFERENCES (NONE) +C***ROUTINES CALLED R1MACH, XERMSG +C***REVISION HISTORY (YYMMDD) +C 770701 DATE WRITTEN +C 890531 Changed all specific intrinsics to generic. (WRB) +C 890531 REVISION DATE from Version 3.2 +C 891214 Prologue converted to Version 4.0 format. (BAB) +C 900315 CALLs to XERROR changed to CALLs to XERMSG. (THJ) +C 900720 Routine changed from user-callable to subsidiary. (WRB) +C***END PROLOGUE R9LGIC + SAVE EPS + DATA EPS / 0.0 / +C***FIRST EXECUTABLE STATEMENT R9LGIC + IF (EPS.EQ.0.0) EPS = 0.5*R1MACH(3) +C + XPA = X + 1.0 - A + XMA = X - 1.0 - A +C + R = 0.0 + P = 1.0 + S = P + DO 10 K=1,200 + FK = K + T = FK*(A-FK)*(1.0+R) + R = -T/((XMA+2.0*FK)*(XPA+2.0*FK)+T) + P = R*P + S = S + P + IF (ABS(P).LT.EPS*S) GO TO 20 + 10 CONTINUE + CALL XERMSG ('SLATEC', 'R9LGIC', + + 'NO CONVERGENCE IN 200 TERMS OF CONTINUED FRACTION', 1, 2) +C + 20 R9LGIC = A*ALX - X + LOG(S/XPA) +C + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/slatec-fn/r9lgit.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/slatec-fn/r9lgit.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,61 @@ +*DECK R9LGIT + FUNCTION R9LGIT (A, X, ALGAP1) +C***BEGIN PROLOGUE R9LGIT +C***SUBSIDIARY +C***PURPOSE Compute the logarithm of Tricomi's incomplete Gamma +C function with Perron's continued fraction for large X and +C A .GE. X. +C***LIBRARY SLATEC (FNLIB) +C***CATEGORY C7E +C***TYPE SINGLE PRECISION (R9LGIT-S, D9LGIT-D) +C***KEYWORDS FNLIB, INCOMPLETE GAMMA FUNCTION, LOGARITHM, +C PERRON'S CONTINUED FRACTION, SPECIAL FUNCTIONS, TRICOMI +C***AUTHOR Fullerton, W., (LANL) +C***DESCRIPTION +C +C Compute the log of Tricomi's incomplete gamma function with Perron's +C continued fraction for large X and for A .GE. X. +C +C***REFERENCES (NONE) +C***ROUTINES CALLED R1MACH, XERMSG +C***REVISION HISTORY (YYMMDD) +C 770701 DATE WRITTEN +C 890531 Changed all specific intrinsics to generic. (WRB) +C 890531 REVISION DATE from Version 3.2 +C 891214 Prologue converted to Version 4.0 format. (BAB) +C 900315 CALLs to XERROR changed to CALLs to XERMSG. (THJ) +C 900720 Routine changed from user-callable to subsidiary. (WRB) +C***END PROLOGUE R9LGIT + SAVE EPS, SQEPS + DATA EPS, SQEPS / 2*0.0 / +C***FIRST EXECUTABLE STATEMENT R9LGIT + IF (EPS.EQ.0.0) EPS = 0.5*R1MACH(3) + IF (SQEPS.EQ.0.0) SQEPS = SQRT(R1MACH(4)) +C + IF (X .LE. 0.0 .OR. A .LT. X) CALL XERMSG ('SLATEC', 'R9LGIT', + + 'X SHOULD BE GT 0.0 AND LE A', 2, 2) +C + AX = A + X + A1X = AX + 1.0 + R = 0.0 + P = 1.0 + S = P + DO 20 K=1,200 + FK = K + T = (A+FK)*X*(1.0+R) + R = T/((AX+FK)*(A1X+FK)-T) + P = R*P + S = S + P + IF (ABS(P).LT.EPS*S) GO TO 30 + 20 CONTINUE + CALL XERMSG ('SLATEC', 'R9LGIT', + + 'NO CONVERGENCE IN 200 TERMS OF CONTINUED FRACTION', 3, 2) +C + 30 HSTAR = 1.0 - X*S/A1X + IF (HSTAR .LT. SQEPS) CALL XERMSG ('SLATEC', 'R9LGIT', + + 'RESULT LESS THAN HALF PRECISION', 1, 1) +C + R9LGIT = -X - ALGAP1 - LOG(HSTAR) +C + RETURN + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/slatec-fn/r9lgmc.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/slatec-fn/r9lgmc.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,66 @@ +*DECK R9LGMC + FUNCTION R9LGMC (X) +C***BEGIN PROLOGUE R9LGMC +C***SUBSIDIARY +C***PURPOSE Compute the log Gamma correction factor so that +C LOG(GAMMA(X)) = LOG(SQRT(2*PI)) + (X-.5)*LOG(X) - X +C + R9LGMC(X). +C***LIBRARY SLATEC (FNLIB) +C***CATEGORY C7E +C***TYPE SINGLE PRECISION (R9LGMC-S, D9LGMC-D, C9LGMC-C) +C***KEYWORDS COMPLETE GAMMA FUNCTION, CORRECTION TERM, FNLIB, +C LOG GAMMA, LOGARITHM, SPECIAL FUNCTIONS +C***AUTHOR Fullerton, W., (LANL) +C***DESCRIPTION +C +C Compute the log gamma correction factor for X .GE. 10.0 so that +C LOG (GAMMA(X)) = LOG(SQRT(2*PI)) + (X-.5)*LOG(X) - X + R9LGMC(X) +C +C Series for ALGM on the interval 0. to 1.00000D-02 +C with weighted error 3.40E-16 +C log weighted error 15.47 +C significant figures required 14.39 +C decimal places required 15.86 +C +C***REFERENCES (NONE) +C***ROUTINES CALLED CSEVL, INITS, R1MACH, XERMSG +C***REVISION HISTORY (YYMMDD) +C 770801 DATE WRITTEN +C 890531 Changed all specific intrinsics to generic. (WRB) +C 890531 REVISION DATE from Version 3.2 +C 891214 Prologue converted to Version 4.0 format. (BAB) +C 900315 CALLs to XERROR changed to CALLs to XERMSG. (THJ) +C 900720 Routine changed from user-callable to subsidiary. (WRB) +C***END PROLOGUE R9LGMC + DIMENSION ALGMCS(6) + LOGICAL FIRST + SAVE ALGMCS, NALGM, XBIG, XMAX, FIRST + DATA ALGMCS( 1) / .1666389480 45186E0 / + DATA ALGMCS( 2) / -.0000138494 817606E0 / + DATA ALGMCS( 3) / .0000000098 108256E0 / + DATA ALGMCS( 4) / -.0000000000 180912E0 / + DATA ALGMCS( 5) / .0000000000 000622E0 / + DATA ALGMCS( 6) / -.0000000000 000003E0 / + DATA FIRST /.TRUE./ +C***FIRST EXECUTABLE STATEMENT R9LGMC + IF (FIRST) THEN + NALGM = INITS (ALGMCS, 6, R1MACH(3)) + XBIG = 1.0/SQRT(R1MACH(3)) + XMAX = EXP (MIN(LOG(R1MACH(2)/12.0), -LOG(12.0*R1MACH(1))) ) + ENDIF + FIRST = .FALSE. +C + IF (X .LT. 10.0) CALL XERMSG ('SLATEC', 'R9LGMC', + + 'X MUST BE GE 10', 1, 2) + IF (X.GE.XMAX) GO TO 20 +C + R9LGMC = 1.0/(12.0*X) + IF (X.LT.XBIG) R9LGMC = CSEVL (2.0*(10./X)**2-1., ALGMCS, NALGM)/X + RETURN +C + 20 R9LGMC = 0.0 + CALL XERMSG ('SLATEC', 'R9LGMC', 'X SO BIG R9LGMC UNDERFLOWS', 2, + + 1) + RETURN +C + END diff -r 45f5faba05a2 -r 82be108cc558 libcruft/slatec-fn/xacosh.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/slatec-fn/xacosh.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,6 @@ + subroutine xsacosh (x, result) + external acosh + real x, result, dacosh + result = acosh (x) + return + end diff -r 45f5faba05a2 -r 82be108cc558 libcruft/slatec-fn/xasinh.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/slatec-fn/xasinh.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,6 @@ + subroutine xasinh (x, result) + external asinh + real x, result, dasinh + result = asinh (x) + return + end diff -r 45f5faba05a2 -r 82be108cc558 libcruft/slatec-fn/xatanh.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/slatec-fn/xatanh.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,6 @@ + subroutine xatanh (x, result) + external atanh + real x, result, atanh + result = atanh (x) + return + end diff -r 45f5faba05a2 -r 82be108cc558 libcruft/slatec-fn/xbetai.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/slatec-fn/xbetai.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,6 @@ + subroutine xbetai (x, a, b, result) + external betai + real x, a, b, result, betai + result = betai (x, a, b) + return + end diff -r 45f5faba05a2 -r 82be108cc558 libcruft/slatec-fn/xerf.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/slatec-fn/xerf.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,6 @@ + subroutine xerf (x, result) + external erf + real x, result, erf + result = erf (x) + return + end diff -r 45f5faba05a2 -r 82be108cc558 libcruft/slatec-fn/xerfc.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/slatec-fn/xerfc.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,6 @@ + subroutine xerfc (x, result) + external erfc + real x, result, erfc + result = erfc (x) + return + end diff -r 45f5faba05a2 -r 82be108cc558 libcruft/slatec-fn/xgamma.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/slatec-fn/xgamma.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,6 @@ + subroutine xgamma (x, result) + external gamma + real x, result, gamma + result = gamma (x) + return + end diff -r 45f5faba05a2 -r 82be108cc558 libcruft/slatec-fn/xsgmainc.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcruft/slatec-fn/xsgmainc.f Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,100 @@ + subroutine xsgammainc (a, x, result) + +c -- jwe, based on GAMIT. +c +c -- Do a better job than gami for large values of x. + + real a, x, result + intrinsic exp, log, sqrt, sign, aint + external gami, alngam, r9lgit, r9lgic, r9gmit + +C external gamr +C real GAMR + + REAL AEPS, AINTA, ALGAP1, ALNEPS, ALNG, ALX, + $ BOT, H, SGA, SGNGAM, SQEPS, T, R1MACH, R9GMIT, + $ R9LGIC, R9LGIT, ALNGAM, GAMI + + LOGICAL FIRST + + SAVE ALNEPS, SQEPS, BOT, FIRST + + DATA FIRST /.TRUE./ + + if (x .eq. 0.0e0) then + + if (a .eq. 0.0e0) then + result = 1.0e0 + else + result = 0.0e0 + endif + + else + + IF (FIRST) THEN + ALNEPS = -LOG (R1MACH(3)) + SQEPS = SQRT(R1MACH(4)) + BOT = LOG (R1MACH(1)) + ENDIF + FIRST = .FALSE. +C + IF (X .LT. 0.E0) CALL XERMSG ('SLATEC', 'XGMAINC', 'X IS NEGATIVE' + + , 2, 2) +C + IF (X.NE.0.E0) ALX = LOG (X) + SGA = 1.0E0 + IF (A.NE.0.E0) SGA = SIGN (1.0E0, A) + AINTA = AINT (A + 0.5E0*SGA) + AEPS = A - AINTA +C +C IF (X.GT.0.E0) GO TO 20 +C GAMIT = 0.0E0 +C IF (AINTA.GT.0.E0 .OR. AEPS.NE.0.E0) GAMIT = GAMR(A+1.0E0) +C RETURN +C + 20 IF (X.GT.1.E0) GO TO 30 + IF (A.GE.(-0.5E0) .OR. AEPS.NE.0.E0) CALL DLGAMS (A+1.0E0, ALGAP1, + 1 SGNGAM) +C GAMIT = R9GMIT (A, X, ALGAP1, SGNGAM, ALX) + result = exp (a*alx + log (R9GMIT (A, X, ALGAP1, SGNGAM, ALX))) + RETURN +C + 30 IF (A.LT.X) GO TO 40 + T = R9LGIT (A, X, ALNGAM(A+1.0E0)) + IF (T.LT.BOT) CALL XERCLR +C GAMIT = EXP (T) + result = EXP (a*alx + T) + RETURN +C + 40 ALNG = R9LGIC (A, X, ALX) +C +C EVALUATE GAMIT IN TERMS OF LOG (DGAMIC (A, X)) +C + H = 1.0E0 + IF (AEPS.EQ.0.E0 .AND. AINTA.LE.0.E0) GO TO 50 +C + CALL DLGAMS (A+1.0E0, ALGAP1, SGNGAM) + T = LOG (ABS(A)) + ALNG - ALGAP1 + IF (T.GT.ALNEPS) GO TO 60 +C + IF (T.GT.(-ALNEPS)) H = 1.0E0 - SGA * SGNGAM * EXP(T) + IF (ABS(H).GT.SQEPS) GO TO 50 +C + CALL XERCLR + CALL XERMSG ('SLATEC', 'XGMAINC', 'RESULT LT HALF PRECISION', 1, + + 1) +C +C 50 T = -A*ALX + LOG(ABS(H)) +C IF (T.LT.BOT) CALL XERCLR +C GAMIT = SIGN (EXP(T), H) + 50 result = H + RETURN +C +C 60 T = T - A*ALX + 60 IF (T.LT.BOT) CALL XERCLR + result = -SGA * SGNGAM * EXP(T) + RETURN + + endif + return + end diff -r 45f5faba05a2 -r 82be108cc558 liboctave/Array-f.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/Array-f.cc Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,409 @@ +/* + +Copyright (C) 1994, 1995, 1996, 1997, 1998, 2000, 2001, 2003, 2004, + 2005, 2006, 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +// Instantiate Arrays of float values. + +#include "Array.h" +#include "Array.cc" +#include "oct-sort.cc" + +#if defined (HAVE_IEEE754_DATA_FORMAT) + +static inline uint32_t +FloatFlip (uint32_t f) +{ + uint32_t mask + = -static_cast(f >> 31) | 0x80000000UL; + + return f ^ mask; +} + +static inline uint32_t +IFloatFlip (uint32_t f) +{ + uint32_t mask = ((f >> 31) - 1) | 0x80000000UL; + + return f ^ mask; +} + +template <> +bool +ascending_compare (float a, float b) +{ + return (xisnan (b) || (a < b)); +} + +template <> +bool +ascending_compare (vec_index *a, vec_index *b) +{ + return (xisnan (b->vec) || (a->vec < b->vec)); +} + +template <> +bool +descending_compare (float a, float b) +{ + return (xisnan (a) || (a > b)); +} + +template <> +bool +descending_compare (vec_index *a, vec_index *b) +{ + return (xisnan (b->vec) || (a->vec > b->vec)); +} + +INSTANTIATE_ARRAY_SORT (uint32_t); + +template <> +Array +Array::sort (octave_idx_type dim, sortmode mode) const +{ + Array m = *this; + + dim_vector dv = m.dims (); + + if (m.length () < 1) + return m; + + octave_idx_type ns = dv(dim); + octave_idx_type iter = dv.numel () / ns; + octave_idx_type stride = 1; + for (int i = 0; i < dim; i++) + stride *= dv(i); + + float *v = m.fortran_vec (); + + uint32_t *p = reinterpret_cast (v); + + octave_sort lsort; + + if (mode == ASCENDING) + lsort.set_compare (ascending_compare); + else if (mode == DESCENDING) + lsort.set_compare (descending_compare); + else + abort (); + + if (stride == 1) + { + for (octave_idx_type j = 0; j < iter; j++) + { + // Flip the data in the vector so that int compares on + // IEEE754 give the correct ordering. + + for (octave_idx_type i = 0; i < ns; i++) + p[i] = FloatFlip (p[i]); + + lsort.sort (p, ns); + + // Flip the data out of the vector so that int compares + // on IEEE754 give the correct ordering. + + for (octave_idx_type i = 0; i < ns; i++) + p[i] = IFloatFlip (p[i]); + + // There are two representations of NaN. One will be + // sorted to the beginning of the vector and the other + // to the end. If it will be sorted incorrectly, fix + // things up. + + if (lo_ieee_signbit (octave_Float_NaN)) + { + if (mode == ASCENDING) + { + octave_idx_type i = 0; + float *vtmp = reinterpret_cast (p); + while (xisnan (vtmp[i++]) && i < ns); + for (octave_idx_type l = 0; l < ns - i + 1; l++) + vtmp[l] = vtmp[l+i-1]; + for (octave_idx_type l = ns - i + 1; l < ns; l++) + vtmp[l] = octave_Float_NaN; + } + else + { + octave_idx_type i = ns; + float *vtmp = reinterpret_cast (p); + while (xisnan (vtmp[--i]) && i > 0); + for (octave_idx_type l = i; l >= 0; l--) + vtmp[l-i+ns-1] = vtmp[l]; + for (octave_idx_type l = 0; l < ns - i - 1; l++) + vtmp[l] = octave_Float_NaN; + } + } + + p += ns; + } + } + else + { + OCTAVE_LOCAL_BUFFER (uint32_t, vi, ns); + + for (octave_idx_type j = 0; j < iter; j++) + { + octave_idx_type offset = j; + octave_idx_type offset2 = 0; + while (offset >= stride) + { + offset -= stride; + offset2++; + } + offset += offset2 * stride * ns; + + // Flip the data in the vector so that int compares on + // IEEE754 give the correct ordering. + + for (octave_idx_type i = 0; i < ns; i++) + vi[i] = FloatFlip (p[i*stride + offset]); + + lsort.sort (vi, ns); + + // Flip the data out of the vector so that int compares + // on IEEE754 give the correct ordering. + + for (octave_idx_type i = 0; i < ns; i++) + p[i*stride + offset] = IFloatFlip (vi[i]); + + // There are two representations of NaN. One will be + // sorted to the beginning of the vector and the other + // to the end. If it will be sorted to the beginning, + // fix things up. + + if (lo_ieee_signbit (octave_Float_NaN)) + { + if (mode == ASCENDING) + { + octave_idx_type i = 0; + while (xisnan (v[i++*stride + offset]) && i < ns); + for (octave_idx_type l = 0; l < ns - i + 1; l++) + v[l*stride + offset] = v[(l+i-1)*stride + offset]; + for (octave_idx_type l = ns - i + 1; l < ns; l++) + v[l*stride + offset] = octave_Float_NaN; + } + else + { + octave_idx_type i = ns; + while (xisnan (v[--i*stride + offset]) && i > 0); + for (octave_idx_type l = i; l >= 0; l--) + v[(l-i+ns-1)*stride + offset] = v[l*stride + offset]; + for (octave_idx_type l = 0; l < ns - i - 1; l++) + v[l*stride + offset] = octave_Float_NaN; + } + } + } + } + + return m; +} + +template <> +Array +Array::sort (Array &sidx, octave_idx_type dim, + sortmode mode) const +{ + Array m = *this; + + dim_vector dv = m.dims (); + + if (m.length () < 1) + { + sidx = Array (dv); + return m; + } + + octave_idx_type ns = dv(dim); + octave_idx_type iter = dv.numel () / ns; + octave_idx_type stride = 1; + for (int i = 0; i < dim; i++) + stride *= dv(i); + + float *v = m.fortran_vec (); + + uint32_t *p = reinterpret_cast (v); + + octave_sort *> indexed_sort; + + if (mode == ASCENDING) + indexed_sort.set_compare (ascending_compare); + else if (mode == DESCENDING) + indexed_sort.set_compare (descending_compare); + else + abort (); + + OCTAVE_LOCAL_BUFFER (vec_index *, vi, ns); + OCTAVE_LOCAL_BUFFER (vec_index, vix, ns); + + for (octave_idx_type i = 0; i < ns; i++) + vi[i] = &vix[i]; + + sidx = Array (dv); + + for (octave_idx_type j = 0; j < iter; j++) + { + octave_idx_type offset = j; + octave_idx_type offset2 = 0; + while (offset >= stride) + { + offset -= stride; + offset2++; + } + offset += offset2 * stride * ns; + + // Flip the data in the vector so that int compares on + // IEEE754 give the correct ordering. + + for (octave_idx_type i = 0; i < ns; i++) + { + vi[i]->vec = FloatFlip (p[i*stride + offset]); + vi[i]->indx = i; + } + + indexed_sort.sort (vi, ns); + + // Flip the data out of the vector so that int compares on + // IEEE754 give the correct ordering + + for (octave_idx_type i = 0; i < ns; i++) + { + p[i*stride + offset] = IFloatFlip (vi[i]->vec); + sidx(i*stride + offset) = vi[i]->indx; + } + + // There are two representations of NaN. One will be sorted + // to the beginning of the vector and the other to the end. + // If it will be sorted to the beginning, fix things up. + + if (lo_ieee_signbit (octave_Float_NaN)) + { + if (mode == ASCENDING) + { + octave_idx_type i = 0; + while (xisnan (v[i++*stride+offset]) && i < ns); + OCTAVE_LOCAL_BUFFER (float, itmp, i - 1); + for (octave_idx_type l = 0; l < i -1; l++) + itmp[l] = sidx(l*stride + offset); + for (octave_idx_type l = 0; l < ns - i + 1; l++) + { + v[l*stride + offset] = v[(l+i-1)*stride + offset]; + sidx(l*stride + offset) = sidx((l+i-1)*stride + offset); + } + for (octave_idx_type k = 0, l = ns - i + 1; l < ns; l++, k++) + { + v[l*stride + offset] = octave_Float_NaN; + sidx(l*stride + offset) = + static_cast(itmp[k]); + } + } + else + { + octave_idx_type i = ns; + while (xisnan (v[--i*stride+offset]) && i > 0); + OCTAVE_LOCAL_BUFFER (float, itmp, ns - i - 1); + for (octave_idx_type l = 0; l < ns - i -1; l++) + itmp[l] = sidx((l+i+1)*stride + offset); + for (octave_idx_type l = i; l >= 0; l--) + { + v[(l-i+ns-1)*stride + offset] = v[l*stride + offset]; + sidx((l-i+ns-1)*stride + offset) = sidx(l*stride + offset); + } + for (octave_idx_type k = 0, l = 0; l < ns - i - 1; l++, k++) + { + v[l*stride + offset] = octave_Float_NaN; + sidx(l*stride + offset) = + static_cast(itmp[k]); + } + } + } + } + + return m; +} + +#else + +template <> +bool +ascending_compare (float a, float b) +{ + return (xisnan (b) || (a < b)); +} + +template <> +bool +ascending_compare (vec_index *a, + vec_index *b) +{ + return (xisnan (b->vec) || (a->vec < b->vec)); +} + +template <> +bool +descending_compare (float a, float b) +{ + return (xisnan (a) || (a > b)); +} + +template <> +bool +descending_compare (vec_index *a, + vec_index *b) +{ + return (xisnan (b->vec) || (a->vec > b->vec)); +} + +INSTANTIATE_ARRAY_SORT (float); + +#endif + +INSTANTIATE_ARRAY_AND_ASSIGN (float, OCTAVE_API); + +INSTANTIATE_ARRAY_ASSIGN (float, int, OCTAVE_API); +INSTANTIATE_ARRAY_ASSIGN (float, short, OCTAVE_API); +INSTANTIATE_ARRAY_ASSIGN (float, char, OCTAVE_API); + +#include "Array2.h" + +template class OCTAVE_API Array2; + +#include "ArrayN.h" +#include "ArrayN.cc" + +template class OCTAVE_API ArrayN; + +template OCTAVE_API std::ostream& operator << (std::ostream&, const ArrayN&); + +#include "DiagArray2.h" +#include "DiagArray2.cc" + +template class OCTAVE_API DiagArray2; + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 liboctave/Array-fC.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/Array-fC.cc Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,124 @@ +/* + +Copyright (C) 1994, 1995, 1996, 1997, 1998, 2001, 2003, 2004, 2005, + 2006, 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +// Instantiate Arrays of FloatComplex values. + +#include "oct-cmplx.h" + +#include "Array.h" +#include "Array.cc" + +static float +xabs (const FloatComplex& x) +{ + return (xisinf (x.real ()) || xisinf (x.imag ())) ? octave_Float_Inf : abs (x); +} + +static bool +operator < (const FloatComplex& a, const FloatComplex& b) +{ + return (xisnan (b) || (xabs (a) < xabs (b)) + || ((xabs (a) == xabs (b)) && (arg (a) < arg (b)))); +} + +static bool +operator > (const FloatComplex& a, const FloatComplex& b) +{ + return (xisnan (a) || (xabs (a) > xabs (b)) + || ((xabs (a) == xabs (b)) && (arg (a) > arg (b)))); +} + +// This file must be included after the < and > operators are +// defined to avoid errors with the Intel C++ compiler. +#include "oct-sort.cc" + +template <> +bool +ascending_compare (FloatComplex a, FloatComplex b) +{ + return (xisnan (b) || (xabs (a) < xabs (b)) + || ((xabs (a) == xabs (b)) && (arg (a) < arg (b)))); +} + +template <> +bool +ascending_compare (vec_index *a, vec_index *b) +{ + return (xisnan (b->vec) + || (xabs (a->vec) < xabs (b->vec)) + || ((xabs (a->vec) == xabs (b->vec)) + && (arg (a->vec) < arg (b->vec)))); +} + +template <> +bool +descending_compare (FloatComplex a, FloatComplex b) +{ + return (xisnan (a) || (xabs (a) > xabs (b)) + || ((xabs (a) == xabs (b)) && (arg (a) > arg (b)))); +} + +template <> +bool +descending_compare (vec_index *a, vec_index *b) +{ + return (xisnan (a->vec) + || (xabs (a->vec) > xabs (b->vec)) + || ((xabs (a->vec) == xabs (b->vec)) + && (arg (a->vec) > arg (b->vec)))); +} + +INSTANTIATE_ARRAY_SORT (FloatComplex); + +INSTANTIATE_ARRAY_AND_ASSIGN (FloatComplex, OCTAVE_API); + +INSTANTIATE_ARRAY_ASSIGN (FloatComplex, float, OCTAVE_API); +INSTANTIATE_ARRAY_ASSIGN (FloatComplex, int, OCTAVE_API); +INSTANTIATE_ARRAY_ASSIGN (FloatComplex, short, OCTAVE_API); +INSTANTIATE_ARRAY_ASSIGN (FloatComplex, char, OCTAVE_API); + +#include "Array2.h" + +template class OCTAVE_API Array2; + +#include "ArrayN.h" +#include "ArrayN.cc" + +template class OCTAVE_API ArrayN; + +template OCTAVE_API std::ostream& operator << (std::ostream&, const ArrayN&); + +#include "DiagArray2.h" +#include "DiagArray2.cc" + +template class OCTAVE_API DiagArray2; + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 liboctave/Array.cc --- a/liboctave/Array.cc Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/Array.cc Sun Apr 27 22:34:17 2008 +0200 @@ -1203,7 +1203,48 @@ octave_idx_type nr = dim1 (); octave_idx_type nc = dim2 (); - if (nr > 1 && nc > 1) + if (nr >= 8 && nc >= 8) + { + Array result (dim_vector (nc, nr)); + + // Blocked transpose to attempt to avoid cache misses. + + // Don't use OCTAVE_LOCAL_BUFFER here as it doesn't work with bool + // on some compilers. + T buf[64]; + + octave_idx_type ii = 0, jj; + for (jj = 0; jj < (nc - 8 + 1); jj += 8) + { + for (ii = 0; ii < (nr - 8 + 1); ii += 8) + { + // Copy to buffer + for (octave_idx_type j = jj, k = 0, idxj = jj * nr; + j < jj + 8; j++, idxj += nr) + for (octave_idx_type i = ii; i < ii + 8; i++) + buf[k++] = xelem (i + idxj); + + // Copy from buffer + for (octave_idx_type i = ii, idxi = ii * nc; i < ii + 8; + i++, idxi += nc) + for (octave_idx_type j = jj, k = i - ii; j < jj + 8; + j++, k+=8) + result.xelem (j + idxi) = buf[k]; + } + + if (ii < nr) + for (octave_idx_type j = jj; j < jj + 8; j++) + for (octave_idx_type i = ii; i < nr; i++) + result.xelem (j, i) = xelem (i, j); + } + + for (octave_idx_type j = jj; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + result.xelem (j, i) = xelem (i, j); + + return result; + } + else if (nr > 1 && nc > 1) { Array result (dim_vector (nc, nr)); @@ -1221,6 +1262,103 @@ } template +Array +Array::hermitian (T (*fcn) (const T&)) const +{ + assert (ndims () == 2); + + octave_idx_type nr = dim1 (); + octave_idx_type nc = dim2 (); + + if (nr >= 8 && nc >= 8) + { + Array result (dim_vector (nc, nr)); + + // Blocked transpose to attempt to avoid cache misses. + + // Don't use OCTAVE_LOCAL_BUFFER here as it doesn't work with bool + // on some compilers. + T buf[64]; + + octave_idx_type ii = 0, jj; + for (jj = 0; jj < (nc - 8 + 1); jj += 8) + { + for (ii = 0; ii < (nr - 8 + 1); ii += 8) + { + // Copy to buffer + for (octave_idx_type j = jj, k = 0, idxj = jj * nr; + j < jj + 8; j++, idxj += nr) + for (octave_idx_type i = ii; i < ii + 8; i++) + buf[k++] = xelem (i + idxj); + + // Copy from buffer + for (octave_idx_type i = ii, idxi = ii * nc; i < ii + 8; + i++, idxi += nc) + for (octave_idx_type j = jj, k = i - ii; j < jj + 8; + j++, k+=8) + result.xelem (j + idxi) = fcn (buf[k]); + } + + if (ii < nr) + for (octave_idx_type j = jj; j < jj + 8; j++) + for (octave_idx_type i = ii; i < nr; i++) + result.xelem (j, i) = fcn (xelem (i, j)); + } + + for (octave_idx_type j = jj; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + result.xelem (j, i) = fcn (xelem (i, j)); + + return result; + } + else + { + Array result (dim_vector (nc, nr)); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + result.xelem (j, i) = fcn (xelem (i, j)); + + return result; + } +} + +/* + +%% Tranpose tests for matrices of the tile size and plus or minus a row +%% and with four tiles. + +%!shared m7, mt7, m8, mt8, m9, mt9 +%! m7 = reshape (1 : 7*8, 8, 7); +%! mt7 = [1:7; 1:7, 1:7, 1:7, 1:7; 1:7, 1:7, 1:7]; +%! m8 = reshape (1 : 8*8, 8, 8); +%! mt8 = [1:8; 1:8, 1:8, 1:8, 1:8; 1:8, 1:8, 1:8]; +%! m9 = reshape (1 : 9*8, 8, 9); +%! mt9 = [1:9; 1:9, 1:9, 1:9, 1:9; 1:9, 1:9, 1:9]; + +%!assert (m7', mt7) +%!assert ((1i*m7).', 1i * mt7) +%!assert ((1i*m7)', conj (1i * mt7)) +%!assert (m8', mt8) +%!assert ((1i*m8).', 1i * mt8) +%!assert ((1i*m8)', conj (1i * mt8)) +%!assert (m9', mt9) +%!assert ((1i*m9).', 1i * mt9) +%!assert ((1i*m9)', conj (1i * mt9)) + +%!assert ([m7, m7; m8, m8]', [mt7, mt8; mt7, mt8]) +%!assert ((1i*[m7, m7; m8, m8]).', 1i * [mt7, mt8; mt7, mt8]) +%!assert ((1i*[m7, m7; m8, m8])', conj (1i * [mt7, mt8; mt7, mt8])) +%!assert ([m8, m8; m8, m8]', [mt8, mt8; mt8, mt8]) +%!assert ((1i*[m8, m8; m8, m8]).', 1i * [mt8, mt8; mt8, mt8]) +%!assert ((1i*[m8, m8; m8, m8])', conj (1i * [mt8, mt8; mt8, mt8])) +%!assert ([m9, m9; m8, m8]', [mt9, mt8; mt9, mt8]) +%!assert ((1i*[m9, m9; m8, m8]).', 1i * [mt9, mt8; mt9, mt8]) +%!assert ((1i*[m9, m9; m8, m8])', conj (1i * [mt9, mt8; mt9, mt8])) + +*/ + +template T * Array::fortran_vec (void) { diff -r 45f5faba05a2 -r 82be108cc558 liboctave/Array.h --- a/liboctave/Array.h Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/Array.h Sun Apr 27 22:34:17 2008 +0200 @@ -461,6 +461,7 @@ bool is_empty (void) const { return numel () == 0; } Array transpose (void) const; + Array hermitian (T (*fcn) (const T&) = 0) const; const T *data (void) const { return rep->data; } diff -r 45f5faba05a2 -r 82be108cc558 liboctave/Array2.h --- a/liboctave/Array2.h Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/Array2.h Sun Apr 27 22:34:17 2008 +0200 @@ -109,6 +109,12 @@ return Array2 (tmp, tmp.rows (), tmp.columns ()); } + Array2 hermitian (T (*fcn) (const T&) = 0) const + { + Array tmp = Array::hermitian (fcn); + return Array2 (tmp, tmp.rows (), tmp.columns ()); + } + Array2 index (idx_vector& i, int resize_ok = 0, const T& rfv = resize_fill_value (T ())) const { diff -r 45f5faba05a2 -r 82be108cc558 liboctave/ArrayN.h --- a/liboctave/ArrayN.h Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/ArrayN.h Sun Apr 27 22:34:17 2008 +0200 @@ -102,6 +102,7 @@ ArrayN squeeze (void) const { return Array::squeeze (); } ArrayN transpose (void) const { return Array::transpose (); } + ArrayN hermitian (T (*fcn) (const T&) = 0) const { return Array::hermitian (fcn); } ArrayN& insert (const ArrayN& a, const dim_vector& dv) { diff -r 45f5faba05a2 -r 82be108cc558 liboctave/CColVector.cc --- a/liboctave/CColVector.cc Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/CColVector.cc Sun Apr 27 22:34:17 2008 +0200 @@ -221,17 +221,16 @@ return retval; } -ComplexRowVector +ComplexRowVector ComplexColumnVector::hermitian (void) const -{ - octave_idx_type len = length (); - return ComplexRowVector (mx_inline_conj_dup (data (), len), len); +{ + return MArray::hermitian (std::conj); } ComplexRowVector ComplexColumnVector::transpose (void) const { - return ComplexRowVector (*this); + return MArray::transpose (); } ComplexColumnVector diff -r 45f5faba05a2 -r 82be108cc558 liboctave/CColVector.h --- a/liboctave/CColVector.h Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/CColVector.h Sun Apr 27 22:34:17 2008 +0200 @@ -72,7 +72,7 @@ ComplexColumnVector stack (const ColumnVector& a) const; ComplexColumnVector stack (const ComplexColumnVector& a) const; - ComplexRowVector hermitian (void) const; // complex conjugate transpose. + ComplexRowVector hermitian (void) const; ComplexRowVector transpose (void) const; friend ComplexColumnVector conj (const ComplexColumnVector& a); diff -r 45f5faba05a2 -r 82be108cc558 liboctave/CDiagMatrix.cc --- a/liboctave/CDiagMatrix.cc Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/CDiagMatrix.cc Sun Apr 27 22:34:17 2008 +0200 @@ -233,20 +233,6 @@ } ComplexDiagMatrix -ComplexDiagMatrix::hermitian (void) const -{ - return ComplexDiagMatrix (mx_inline_conj_dup (data (), length ()), - cols (), rows ()); -} - -ComplexDiagMatrix -ComplexDiagMatrix::transpose (void) const -{ - return ComplexDiagMatrix (mx_inline_dup (data (), length ()), - cols (), rows ()); -} - -ComplexDiagMatrix conj (const ComplexDiagMatrix& a) { ComplexDiagMatrix retval; diff -r 45f5faba05a2 -r 82be108cc558 liboctave/CDiagMatrix.h --- a/liboctave/CDiagMatrix.h Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/CDiagMatrix.h Sun Apr 27 22:34:17 2008 +0200 @@ -87,8 +87,8 @@ ComplexDiagMatrix& fill (const RowVector& a, octave_idx_type beg); ComplexDiagMatrix& fill (const ComplexRowVector& a, octave_idx_type beg); - ComplexDiagMatrix hermitian (void) const; // complex conjugate transpose - ComplexDiagMatrix transpose (void) const; + ComplexDiagMatrix hermitian (void) const { return MDiagArray2::hermitian (std::conj); } + ComplexDiagMatrix transpose (void) const { return MDiagArray2::transpose(); } friend ComplexDiagMatrix conj (const ComplexDiagMatrix& a); diff -r 45f5faba05a2 -r 82be108cc558 liboctave/CMatrix.cc --- a/liboctave/CMatrix.cc Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/CMatrix.cc Sun Apr 27 22:34:17 2008 +0200 @@ -191,13 +191,13 @@ // each subroutine. F77_RET_T - F77_FUNC (cffti, CFFTI) (const octave_idx_type&, Complex*); + F77_FUNC (zffti, ZFFTI) (const octave_idx_type&, Complex*); F77_RET_T - F77_FUNC (cfftf, CFFTF) (const octave_idx_type&, Complex*, Complex*); + F77_FUNC (zfftf, ZFFTF) (const octave_idx_type&, Complex*, Complex*); F77_RET_T - F77_FUNC (cfftb, CFFTB) (const octave_idx_type&, Complex*, Complex*); + F77_FUNC (zfftb, ZFFTB) (const octave_idx_type&, Complex*, Complex*); F77_RET_T F77_FUNC (zlartg, ZLARTG) (const Complex&, const Complex&, @@ -885,22 +885,6 @@ } ComplexMatrix -ComplexMatrix::hermitian (void) const -{ - octave_idx_type nr = rows (); - octave_idx_type nc = cols (); - ComplexMatrix result; - if (length () > 0) - { - result.resize (nc, nr); - for (octave_idx_type j = 0; j < nc; j++) - for (octave_idx_type i = 0; i < nr; i++) - result.elem (j, i) = conj (elem (i, j)); - } - return result; -} - -ComplexMatrix conj (const ComplexMatrix& a) { octave_idx_type a_len = a.length (); @@ -1356,13 +1340,13 @@ retval = *this; Complex *tmp_data = retval.fortran_vec (); - F77_FUNC (cffti, CFFTI) (npts, pwsave); + F77_FUNC (zffti, ZFFTI) (npts, pwsave); for (octave_idx_type j = 0; j < nsamples; j++) { OCTAVE_QUIT; - F77_FUNC (cfftf, CFFTF) (npts, &tmp_data[npts*j], pwsave); + F77_FUNC (zfftf, ZFFTF) (npts, &tmp_data[npts*j], pwsave); } return retval; @@ -1397,13 +1381,13 @@ retval = *this; Complex *tmp_data = retval.fortran_vec (); - F77_FUNC (cffti, CFFTI) (npts, pwsave); + F77_FUNC (zffti, ZFFTI) (npts, pwsave); for (octave_idx_type j = 0; j < nsamples; j++) { OCTAVE_QUIT; - F77_FUNC (cfftb, CFFTB) (npts, &tmp_data[npts*j], pwsave); + F77_FUNC (zfftb, ZFFTB) (npts, &tmp_data[npts*j], pwsave); } for (octave_idx_type j = 0; j < npts*nsamples; j++) @@ -1441,13 +1425,13 @@ retval = *this; Complex *tmp_data = retval.fortran_vec (); - F77_FUNC (cffti, CFFTI) (npts, pwsave); + F77_FUNC (zffti, ZFFTI) (npts, pwsave); for (octave_idx_type j = 0; j < nsamples; j++) { OCTAVE_QUIT; - F77_FUNC (cfftf, CFFTF) (npts, &tmp_data[npts*j], pwsave); + F77_FUNC (zfftf, ZFFTF) (npts, &tmp_data[npts*j], pwsave); } npts = nc; @@ -1460,7 +1444,7 @@ Array tmp (npts); Complex *prow = tmp.fortran_vec (); - F77_FUNC (cffti, CFFTI) (npts, pwsave); + F77_FUNC (zffti, ZFFTI) (npts, pwsave); for (octave_idx_type j = 0; j < nsamples; j++) { @@ -1469,7 +1453,7 @@ for (octave_idx_type i = 0; i < npts; i++) prow[i] = tmp_data[i*nr + j]; - F77_FUNC (cfftf, CFFTF) (npts, prow, pwsave); + F77_FUNC (zfftf, ZFFTF) (npts, prow, pwsave); for (octave_idx_type i = 0; i < npts; i++) tmp_data[i*nr + j] = prow[i]; @@ -1507,13 +1491,13 @@ retval = *this; Complex *tmp_data = retval.fortran_vec (); - F77_FUNC (cffti, CFFTI) (npts, pwsave); + F77_FUNC (zffti, ZFFTI) (npts, pwsave); for (octave_idx_type j = 0; j < nsamples; j++) { OCTAVE_QUIT; - F77_FUNC (cfftb, CFFTB) (npts, &tmp_data[npts*j], pwsave); + F77_FUNC (zfftb, ZFFTB) (npts, &tmp_data[npts*j], pwsave); } for (octave_idx_type j = 0; j < npts*nsamples; j++) @@ -1529,7 +1513,7 @@ Array tmp (npts); Complex *prow = tmp.fortran_vec (); - F77_FUNC (cffti, CFFTI) (npts, pwsave); + F77_FUNC (zffti, ZFFTI) (npts, pwsave); for (octave_idx_type j = 0; j < nsamples; j++) { @@ -1538,7 +1522,7 @@ for (octave_idx_type i = 0; i < npts; i++) prow[i] = tmp_data[i*nr + j]; - F77_FUNC (cfftb, CFFTB) (npts, prow, pwsave); + F77_FUNC (zfftb, ZFFTB) (npts, prow, pwsave); for (octave_idx_type i = 0; i < npts; i++) tmp_data[i*nr + j] = prow[i] / static_cast (npts); diff -r 45f5faba05a2 -r 82be108cc558 liboctave/CMatrix.h --- a/liboctave/CMatrix.h Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/CMatrix.h Sun Apr 27 22:34:17 2008 +0200 @@ -54,7 +54,11 @@ ComplexMatrix (const ComplexMatrix& a) : MArray2 (a) { } - ComplexMatrix (const MArray2& a) : MArray2 (a) { } + template + ComplexMatrix (const MArray2& a) : MArray2 (a) { } + + template + ComplexMatrix (const Array2& a) : MArray2 (a) { } explicit ComplexMatrix (const Matrix& a); @@ -122,7 +126,8 @@ ComplexMatrix stack (const ComplexColumnVector& a) const; ComplexMatrix stack (const ComplexDiagMatrix& a) const; - ComplexMatrix hermitian (void) const; // complex conjugate transpose + ComplexMatrix hermitian (void) const + { return MArray2::hermitian (std::conj); } ComplexMatrix transpose (void) const { return MArray2::transpose (); } diff -r 45f5faba05a2 -r 82be108cc558 liboctave/CNDArray.cc --- a/liboctave/CNDArray.cc Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/CNDArray.cc Sun Apr 27 22:34:17 2008 +0200 @@ -48,13 +48,13 @@ // each subroutine. F77_RET_T - F77_FUNC (cffti, CFFTI) (const octave_idx_type&, Complex*); + F77_FUNC (zffti, ZFFTI) (const octave_idx_type&, Complex*); F77_RET_T - F77_FUNC (cfftf, CFFTF) (const octave_idx_type&, Complex*, Complex*); + F77_FUNC (zfftf, ZFFTF) (const octave_idx_type&, Complex*, Complex*); F77_RET_T - F77_FUNC (cfftb, CFFTB) (const octave_idx_type&, Complex*, Complex*); + F77_FUNC (zfftb, ZFFTB) (const octave_idx_type&, Complex*, Complex*); } #endif @@ -218,7 +218,7 @@ octave_idx_type nloop = (stride == 1 ? 1 : numel () / npts / stride); octave_idx_type dist = (stride == 1 ? npts : 1); - F77_FUNC (cffti, CFFTI) (npts, pwsave); + F77_FUNC (zffti, ZFFTI) (npts, pwsave); for (octave_idx_type k = 0; k < nloop; k++) { @@ -229,7 +229,7 @@ for (octave_idx_type i = 0; i < npts; i++) tmp[i] = elem((i + k*npts)*stride + j*dist); - F77_FUNC (cfftf, CFFTF) (npts, tmp, pwsave); + F77_FUNC (zfftf, ZFFTF) (npts, tmp, pwsave); for (octave_idx_type i = 0; i < npts; i++) retval ((i + k*npts)*stride + j*dist) = tmp[i]; @@ -265,7 +265,7 @@ octave_idx_type nloop = (stride == 1 ? 1 : numel () / npts / stride); octave_idx_type dist = (stride == 1 ? npts : 1); - F77_FUNC (cffti, CFFTI) (npts, pwsave); + F77_FUNC (zffti, ZFFTI) (npts, pwsave); for (octave_idx_type k = 0; k < nloop; k++) { @@ -276,7 +276,7 @@ for (octave_idx_type i = 0; i < npts; i++) tmp[i] = elem((i + k*npts)*stride + j*dist); - F77_FUNC (cfftb, CFFTB) (npts, tmp, pwsave); + F77_FUNC (zfftb, ZFFTB) (npts, tmp, pwsave); for (octave_idx_type i = 0; i < npts; i++) retval ((i + k*npts)*stride + j*dist) = tmp[i] / @@ -311,7 +311,7 @@ octave_idx_type nloop = (stride == 1 ? 1 : numel () / npts / stride); octave_idx_type dist = (stride == 1 ? npts : 1); - F77_FUNC (cffti, CFFTI) (npts, pwsave); + F77_FUNC (zffti, ZFFTI) (npts, pwsave); for (octave_idx_type k = 0; k < nloop; k++) { @@ -322,7 +322,7 @@ for (octave_idx_type l = 0; l < npts; l++) prow[l] = retval ((l + k*npts)*stride + j*dist); - F77_FUNC (cfftf, CFFTF) (npts, prow, pwsave); + F77_FUNC (zfftf, ZFFTF) (npts, prow, pwsave); for (octave_idx_type l = 0; l < npts; l++) retval ((l + k*npts)*stride + j*dist) = prow[l]; @@ -359,7 +359,7 @@ octave_idx_type nloop = (stride == 1 ? 1 : numel () / npts / stride); octave_idx_type dist = (stride == 1 ? npts : 1); - F77_FUNC (cffti, CFFTI) (npts, pwsave); + F77_FUNC (zffti, ZFFTI) (npts, pwsave); for (octave_idx_type k = 0; k < nloop; k++) { @@ -370,7 +370,7 @@ for (octave_idx_type l = 0; l < npts; l++) prow[l] = retval ((l + k*npts)*stride + j*dist); - F77_FUNC (cfftb, CFFTB) (npts, prow, pwsave); + F77_FUNC (zfftb, ZFFTB) (npts, prow, pwsave); for (octave_idx_type l = 0; l < npts; l++) retval ((l + k*npts)*stride + j*dist) = prow[l] / @@ -407,7 +407,7 @@ octave_idx_type nloop = (stride == 1 ? 1 : numel () / npts / stride); octave_idx_type dist = (stride == 1 ? npts : 1); - F77_FUNC (cffti, CFFTI) (npts, pwsave); + F77_FUNC (zffti, ZFFTI) (npts, pwsave); for (octave_idx_type k = 0; k < nloop; k++) { @@ -418,7 +418,7 @@ for (octave_idx_type l = 0; l < npts; l++) prow[l] = retval ((l + k*npts)*stride + j*dist); - F77_FUNC (cfftf, CFFTF) (npts, prow, pwsave); + F77_FUNC (zfftf, ZFFTF) (npts, prow, pwsave); for (octave_idx_type l = 0; l < npts; l++) retval ((l + k*npts)*stride + j*dist) = prow[l]; @@ -454,7 +454,7 @@ octave_idx_type nloop = (stride == 1 ? 1 : numel () / npts / stride); octave_idx_type dist = (stride == 1 ? npts : 1); - F77_FUNC (cffti, CFFTI) (npts, pwsave); + F77_FUNC (zffti, ZFFTI) (npts, pwsave); for (octave_idx_type k = 0; k < nloop; k++) { @@ -465,7 +465,7 @@ for (octave_idx_type l = 0; l < npts; l++) prow[l] = retval ((l + k*npts)*stride + j*dist); - F77_FUNC (cfftb, CFFTB) (npts, prow, pwsave); + F77_FUNC (zfftb, ZFFTB) (npts, prow, pwsave); for (octave_idx_type l = 0; l < npts; l++) retval ((l + k*npts)*stride + j*dist) = prow[l] / diff -r 45f5faba05a2 -r 82be108cc558 liboctave/CNDArray.h --- a/liboctave/CNDArray.h Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/CNDArray.h Sun Apr 27 22:34:17 2008 +0200 @@ -46,7 +46,11 @@ ComplexNDArray (const ComplexMatrix& a) : MArrayN (a) { } - ComplexNDArray (const MArrayN& a) : MArrayN (a) { } + template + ComplexNDArray (const MArrayN& a) : MArrayN (a) { } + + template + ComplexNDArray (const ArrayN& a) : MArrayN (a) { } ComplexNDArray& operator = (const ComplexNDArray& a) { diff -r 45f5faba05a2 -r 82be108cc558 liboctave/CRowVector.cc --- a/liboctave/CRowVector.cc Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/CRowVector.cc Sun Apr 27 22:34:17 2008 +0200 @@ -227,14 +227,13 @@ ComplexColumnVector ComplexRowVector::hermitian (void) const { - octave_idx_type len = length (); - return ComplexColumnVector (mx_inline_conj_dup (data (), len), len); + return MArray::hermitian (std::conj); } ComplexColumnVector ComplexRowVector::transpose (void) const { - return ComplexColumnVector (*this); + return MArray::transpose (); } ComplexRowVector diff -r 45f5faba05a2 -r 82be108cc558 liboctave/CRowVector.h --- a/liboctave/CRowVector.h Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/CRowVector.h Sun Apr 27 22:34:17 2008 +0200 @@ -70,7 +70,7 @@ ComplexRowVector append (const RowVector& a) const; ComplexRowVector append (const ComplexRowVector& a) const; - ComplexColumnVector hermitian (void) const; // complex conjugate transpose. + ComplexColumnVector hermitian (void) const; ComplexColumnVector transpose (void) const; friend ComplexRowVector conj (const ComplexRowVector& a); diff -r 45f5faba05a2 -r 82be108cc558 liboctave/ChangeLog --- a/liboctave/ChangeLog Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/ChangeLog Sun Apr 27 22:34:17 2008 +0200 @@ -1,3 +1,122 @@ +2008-05-20 David Bateman + + * Array.cc (Array Array::transpose () const): Modify for tiled + transpose to limit the number of cache misses. + (Array Array::hermitian (T (*)(const&)) const): New method + for matrix conjugate transpose. + * Array.h (Array hermitian (T (*)(const&)) const): Declare it. + + * DiagArray2.cc (DiagArray2 DiagArray2::transpose () const): + Specialization for diagonal arrays. + (DiagArray2 DiagArray2::transpose (T (*) (const&)) const): + Ditto. + + * MArray.h (MArray hermitian transpose () const): Ditto. + * MArray2.h (MArray2 hermitian hermitian hermitian transpose () const): Ditto. + (MDiagArray hermitian * CMatrix.cc (double rcond): Replace with double rcon everywhere diff -r 45f5faba05a2 -r 82be108cc558 liboctave/CmplxDET.cc --- a/liboctave/CmplxDET.cc Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/CmplxDET.cc Sun Apr 27 22:34:17 2008 +0200 @@ -54,7 +54,7 @@ { if (c2 != 0.0) { - double etmp = e2 / xlog2 (10); + double etmp = e2 / xlog2 (static_cast(10)); e10 = static_cast (xround (etmp)); etmp -= e10; c10 = c2 * pow (10.0, etmp); @@ -76,7 +76,7 @@ Complex ComplexDET::value (void) const { - return base2 ? c2 * xexp2 (e2) : c10 * pow (10.0, e10); + return base2 ? c2 * xexp2 (static_cast(e2)) : c10 * pow (10.0, e10); } /* diff -r 45f5faba05a2 -r 82be108cc558 liboctave/DiagArray2.cc --- a/liboctave/DiagArray2.cc Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/DiagArray2.cc Sun Apr 27 22:34:17 2008 +0200 @@ -34,6 +34,27 @@ #include "lo-error.h" +template +DiagArray2 +DiagArray2::transpose (void) const +{ + DiagArray2 retval (*this); + retval.dimensions = dim_vector (this->dim2 (), this->dim1 ()); + return retval; +} + +template +DiagArray2 +DiagArray2::hermitian (T (* fcn) (const T&)) const +{ + DiagArray2 retval (this->dim2 (), this->dim1 ()); + const T *p = this->data (); + T *q = retval.fortran_vec (); + for (octave_idx_type i = 0; i < this->length (); i++) + q [i] = fcn (p [i]); + return retval; +} + // A two-dimensional array with diagonal elements only. template diff -r 45f5faba05a2 -r 82be108cc558 liboctave/DiagArray2.h --- a/liboctave/DiagArray2.h Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/DiagArray2.h Sun Apr 27 22:34:17 2008 +0200 @@ -180,6 +180,9 @@ void resize (octave_idx_type n, octave_idx_type m, const T& val); void maybe_delete_elements (idx_vector& i, idx_vector& j); + + DiagArray2 transpose (void) const; + DiagArray2 hermitian (T (*fcn) (const T&) = 0) const; }; #endif diff -r 45f5faba05a2 -r 82be108cc558 liboctave/MArray-C.cc --- a/liboctave/MArray-C.cc Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/MArray-C.cc Sun Apr 27 22:34:17 2008 +0200 @@ -44,7 +44,7 @@ OCTAVE_API double MArray::norm (double p) const { - MARRAY_NORM_BODY (Complex, xdznrm2, XDZNRM2); + MARRAY_NORM_BODY (Complex, double, xdznrm2, XDZNRM2, octave_NaN); } template class OCTAVE_API MArray; diff -r 45f5faba05a2 -r 82be108cc558 liboctave/MArray-d.cc --- a/liboctave/MArray-d.cc Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/MArray-d.cc Sun Apr 27 22:34:17 2008 +0200 @@ -42,7 +42,7 @@ OCTAVE_API double MArray::norm (double p) const { - MARRAY_NORM_BODY (double, xdnrm2, XDNRM2); + MARRAY_NORM_BODY (double, double, xdnrm2, XDNRM2, octave_NaN); } template class OCTAVE_API MArray; diff -r 45f5faba05a2 -r 82be108cc558 liboctave/MArray-defs.h --- a/liboctave/MArray-defs.h Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/MArray-defs.h Sun Apr 27 22:34:17 2008 +0200 @@ -343,9 +343,9 @@ MDIAGARRAY2_DADA_BINOP_FWD_DEFS \ (R, T, dynamic_cast&>, R, dynamic_cast&>, R) -#define MARRAY_NORM_BODY(TYPE, blas_norm, BLAS_NORM) \ +#define MARRAY_NORM_BODY(TYPE, RTYPE, blas_norm, BLAS_NORM, NAN_VALUE) \ \ - double retval = octave_NaN; \ + RTYPE retval = NAN_VALUE; \ \ octave_idx_type len = length (); \ \ @@ -359,20 +359,20 @@ retval = 0; \ \ /* precondition */ \ - double inf_norm = 0.; \ + RTYPE inf_norm = 0.; \ for (octave_idx_type i = 0; i < len; i++) \ { \ - double d_abs = std::abs (d[i]); \ + RTYPE d_abs = std::abs (d[i]); \ if (d_abs > inf_norm) \ inf_norm = d_abs; \ } \ inf_norm = (inf_norm == octave_Inf || inf_norm == 0. ? 1.0 : \ inf_norm); \ - double scale = 1. / inf_norm; \ + RTYPE scale = 1. / inf_norm; \ \ for (octave_idx_type i = 0; i < len; i++) \ { \ - double d_abs = std::abs (d[i]) * scale; \ + RTYPE d_abs = std::abs (d[i]) * scale; \ retval += d_abs * d_abs; \ } \ \ @@ -394,7 +394,7 @@ { \ while (i < len) \ { \ - double d_abs = std::abs (d[i++]); \ + RTYPE d_abs = std::abs (d[i++]); \ \ if (d_abs > retval) \ retval = d_abs; \ @@ -404,7 +404,7 @@ { \ while (i < len) \ { \ - double d_abs = std::abs (d[i++]); \ + RTYPE d_abs = std::abs (d[i++]); \ \ if (d_abs < retval) \ retval = d_abs; \ @@ -417,7 +417,7 @@ \ for (octave_idx_type i = 0; i < len; i++) \ { \ - double d_abs = std::abs (d[i]); \ + RTYPE d_abs = std::abs (d[i]); \ retval += pow (d_abs, p); \ } \ \ diff -r 45f5faba05a2 -r 82be108cc558 liboctave/MArray-f.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/MArray-f.cc Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,77 @@ +/* + +Copyright (C) 1995, 1996, 1997, 2000, 2003, 2005, 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +// Instantiate MArrays of float values. + +#include "f77-fcn.h" + +extern "C" +{ + F77_RET_T + F77_FUNC (xsnrm2, XSNRM2) (const octave_idx_type&, const float*, + const octave_idx_type&, float&); +} + +#include "MArray.h" +#include "MArray.cc" + +template <> +OCTAVE_API float +MArray::norm (float p) const +{ + MARRAY_NORM_BODY (float, float, xsnrm2, XSNRM2, octave_Float_NaN); +} + +template class OCTAVE_API MArray; + +INSTANTIATE_MARRAY_FRIENDS (float, OCTAVE_API) + +#include "MArray2.h" +#include "MArray2.cc" + +template class OCTAVE_API MArray2; + +INSTANTIATE_MARRAY2_FRIENDS (float, OCTAVE_API) + +#include "MArrayN.h" +#include "MArrayN.cc" + +template class OCTAVE_API MArrayN; + +INSTANTIATE_MARRAYN_FRIENDS (float, OCTAVE_API) + +#include "MDiagArray2.h" +#include "MDiagArray2.cc" + +template class OCTAVE_API MDiagArray2; + +INSTANTIATE_MDIAGARRAY2_FRIENDS (float, OCTAVE_API) + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 liboctave/MArray-fC.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/MArray-fC.cc Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,79 @@ +/* + +Copyright (C) 1995, 1996, 1997, 2000, 2003, 2005, 2006, 2007 + John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +// Instantiate MArrays of FloatComplex values. + +#include "oct-cmplx.h" +#include "f77-fcn.h" + +extern "C" +{ + F77_RET_T + F77_FUNC (xscnrm2, XSCNRM2) (const octave_idx_type&, const FloatComplex*, + const octave_idx_type&, float&); +} + +#include "MArray.h" +#include "MArray.cc" + +template <> +OCTAVE_API float +MArray::norm (float p) const +{ + MARRAY_NORM_BODY (FloatComplex, float, xscnrm2, XSCNRM2, octave_Float_NaN); +} + +template class OCTAVE_API MArray; + +INSTANTIATE_MARRAY_FRIENDS (FloatComplex, OCTAVE_API) + +#include "MArray2.h" +#include "MArray2.cc" + +template class OCTAVE_API MArray2; + +INSTANTIATE_MARRAY2_FRIENDS (FloatComplex, OCTAVE_API) + +#include "MArrayN.h" +#include "MArrayN.cc" + +template class OCTAVE_API MArrayN; + +INSTANTIATE_MARRAYN_FRIENDS (FloatComplex, OCTAVE_API) + +#include "MDiagArray2.h" +#include "MDiagArray2.cc" + +template class OCTAVE_API MDiagArray2; + +INSTANTIATE_MDIAGARRAY2_FRIENDS (FloatComplex, OCTAVE_API) + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 liboctave/MArray.cc --- a/liboctave/MArray.cc Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/MArray.cc Sun Apr 27 22:34:17 2008 +0200 @@ -43,6 +43,16 @@ return 0; } +template +float +MArray::norm (float) const +{ + (*current_liboctave_error_handler) + ("norm: only implemented for double and complex values"); + + return 0; +} + // Element by element MArray by scalar ops. template diff -r 45f5faba05a2 -r 82be108cc558 liboctave/MArray.h --- a/liboctave/MArray.h Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/MArray.h Sun Apr 27 22:34:17 2008 +0200 @@ -63,6 +63,9 @@ return *this; } + MArray transpose (void) const { return Array::transpose (); } + MArray hermitian (T (*fcn) (const T&) = 0) const { return Array::hermitian (fcn); } + octave_idx_type nnz (void) const { octave_idx_type retval = 0; @@ -81,6 +84,7 @@ } double norm (double p) const; + float norm (float p) const; template MArray map (F fcn) const diff -r 45f5faba05a2 -r 82be108cc558 liboctave/MArray2.h --- a/liboctave/MArray2.h Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/MArray2.h Sun Apr 27 22:34:17 2008 +0200 @@ -80,6 +80,7 @@ } MArray2 transpose (void) const { return Array2::transpose (); } + MArray2 hermitian (T (*fcn) (const T&) = 0) const { return Array2::hermitian (fcn); } MArray2 diag (octave_idx_type k) const { diff -r 45f5faba05a2 -r 82be108cc558 liboctave/MDiagArray2.h --- a/liboctave/MDiagArray2.h Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/MDiagArray2.h Sun Apr 27 22:34:17 2008 +0200 @@ -81,6 +81,9 @@ return retval; } + MDiagArray2 transpose (void) const { return DiagArray2::transpose (); } + MDiagArray2 hermitian (T (*fcn) (const T&) = 0) const { return DiagArray2::hermitian (fcn); } + static MDiagArray2 nil_array; // Currently, the OPS functions don't need to be friends, but that diff -r 45f5faba05a2 -r 82be108cc558 liboctave/Makefile.in --- a/liboctave/Makefile.in Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/Makefile.in Sun Apr 27 22:34:17 2008 +0200 @@ -46,19 +46,25 @@ base-lu.h dim-vector.h mx-base.h mx-op-defs.h \ mx-defs.h mx-ext.h CColVector.h CDiagMatrix.h CMatrix.h \ CNDArray.h CRowVector.h CmplxAEPBAL.h CmplxCHOL.h \ - CmplxDET.h CmplxHESS.h CmplxLU.h CmplxQR.h CmplxQRP.h \ - CmplxSCHUR.h CmplxSVD.h EIG.h boolMatrix.h boolNDArray.h \ + CmplxDET.h CmplxGEPBAL.h CmplxHESS.h CmplxLU.h CmplxQR.h CmplxQRP.h \ + CmplxSCHUR.h CmplxSVD.h EIG.h fEIG.h boolMatrix.h boolNDArray.h \ chMatrix.h chNDArray.h dColVector.h dDiagMatrix.h dMatrix.h \ dNDArray.h dRowVector.h dbleAEPBAL.h dbleCHOL.h dbleDET.h \ - dbleHESS.h dbleLU.h dbleQR.h dbleQRP.h dbleSCHUR.h dbleSVD.h \ - boolSparse.h CSparse.h dSparse.h MSparse-defs.h MSparse.h \ + dbleGEPBAL.h dbleHESS.h dbleLU.h dbleQR.h dbleQRP.h dbleSCHUR.h \ + dbleSVD.h boolSparse.h CSparse.h dSparse.h MSparse-defs.h MSparse.h \ Sparse.h sparse-base-lu.h SparseCmplxLU.h SparsedbleLU.h \ sparse-base-chol.h SparseCmplxCHOL.h \ SparsedbleCHOL.h SparseCmplxQR.h SparseQR.h Sparse-op-defs.h \ MatrixType.h \ int8NDArray.h uint8NDArray.h int16NDArray.h uint16NDArray.h \ int32NDArray.h uint32NDArray.h int64NDArray.h uint64NDArray.h \ - intNDArray.h + intNDArray.h \ + fCColVector.h fCRowVector.h fCDiagMatrix.h fCMatrix.h fCNDArray.h \ + fColVector.h fRowVector.h fDiagMatrix.h fMatrix.h fNDArray.h \ + fCmplxGEPBAL.h fCmplxHESS.h fCmplxCHOL.h fCmplxDET.h fCmplxLU.h \ + fCmplxSCHUR.h fCmplxSVD.h fCmplxQR.h fCmplxQRP.h \ + floatCHOL.h floatDET.h floatGEPBAL.h floatHESS.h floatLU.h \ + floatSCHUR.h floatSVD.h floatQR.h floatQRP.h MX_OP_INC := $(shell $(AWK) -f $(srcdir)/mk-ops.awk prefix=mx list_h_files=1 $(srcdir)/mx-ops) @@ -102,25 +108,31 @@ sparse-dmsolve.cc TI_SRC := Array-C.cc Array-b.cc Array-ch.cc Array-i.cc Array-d.cc \ - Array-s.cc Array-so.cc Array-str.cc Array-idx-vec.cc \ - MArray-C.cc MArray-ch.cc MArray-i.cc MArray-d.cc MArray-s.cc \ - MSparse-C.cc MSparse-d.cc Sparse-C.cc Sparse-b.cc Sparse-d.cc \ - oct-inttypes.cc + Array-f.cc Array-fC.cc Array-s.cc Array-so.cc Array-str.cc \ + Array-idx-vec.cc MArray-C.cc MArray-ch.cc MArray-i.cc MArray-d.cc \ + MArray-f.cc MArray-fC.cc MArray-s.cc MSparse-C.cc MSparse-d.cc \ + Sparse-C.cc Sparse-b.cc Sparse-d.cc oct-inttypes.cc MATRIX_SRC := Array-util.cc CColVector.cc \ CDiagMatrix.cc CMatrix.cc CNDArray.cc CRowVector.cc \ - CmplxAEPBAL.cc CmplxCHOL.cc CmplxDET.cc CmplxHESS.cc \ + CmplxAEPBAL.cc CmplxCHOL.cc CmplxDET.cc CmplxGEPBAL.cc CmplxHESS.cc \ CmplxLU.cc CmplxQR.cc CmplxQRP.cc CmplxSCHUR.cc CmplxSVD.cc \ - EIG.cc boolMatrix.cc boolNDArray.cc chMatrix.cc \ + EIG.cc fEIG.cc boolMatrix.cc boolNDArray.cc chMatrix.cc \ chNDArray.cc dColVector.cc dDiagMatrix.cc dMatrix.cc \ dNDArray.cc dRowVector.cc dbleAEPBAL.cc dbleCHOL.cc \ - dbleDET.cc dbleHESS.cc dbleLU.cc dbleQR.cc dbleQRP.cc \ + dbleDET.cc dbleGEPBAL.cc dbleHESS.cc dbleLU.cc dbleQR.cc dbleQRP.cc \ dbleSCHUR.cc dbleSVD.cc boolSparse.cc CSparse.cc dSparse.cc \ MSparse.cc Sparse.cc SparseCmplxLU.cc SparsedbleLU.cc \ SparseCmplxCHOL.cc SparsedbleCHOL.cc \ SparseCmplxQR.cc SparseQR.cc MatrixType.cc \ int8NDArray.cc uint8NDArray.cc int16NDArray.cc uint16NDArray.cc \ - int32NDArray.cc uint32NDArray.cc int64NDArray.cc uint64NDArray.cc + int32NDArray.cc uint32NDArray.cc int64NDArray.cc uint64NDArray.cc \ + fCColVector.cc fCRowVector.cc fCDiagMatrix.cc fCMatrix.cc fCNDArray.cc \ + fColVector.cc fRowVector.cc fDiagMatrix.cc fMatrix.cc fNDArray.cc \ + fCmplxCHOL.cc fCmplxDET.cc fCmplxGEPBAL.cc fCmplxHESS.cc fCmplxLU.cc \ + fCmplxSCHUR.cc fCmplxSVD.cc fCmplxQR.cc fCmplxQRP.cc \ + floatCHOL.cc floatDET.cc floatGEPBAL.cc floatHESS.cc floatLU.cc \ + floatSCHUR.cc floatSVD.cc floatQR.cc floatQRP.cc MX_OP_SRC := $(shell $(AWK) -f $(srcdir)/mk-ops.awk prefix=mx list_cc_files=1 $(srcdir)/mx-ops) diff -r 45f5faba05a2 -r 82be108cc558 liboctave/MatrixType.cc --- a/liboctave/MatrixType.cc Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/MatrixType.cc Sun Apr 27 22:34:17 2008 +0200 @@ -175,6 +175,127 @@ typ = MatrixType::Rectangular; } + +MatrixType::MatrixType (const FloatMatrix &a) + : typ (MatrixType::Unknown), + sp_bandden (0), bandden (0), upper_band (0), lower_band (0), + dense (false), full (true), nperm (0), perm (0) +{ + octave_idx_type nrows = a.rows (); + octave_idx_type ncols = a.cols (); + + if (ncols == nrows) + { + bool upper = true; + bool lower = true; + bool hermitian = true; + + for (octave_idx_type j = 0; j < ncols; j++) + { + if (j < nrows) + { + if (a.elem (j,j) == 0.) + { + upper = false; + lower = false; + hermitian = false; + break; + } + if (a.elem (j,j) < 0.) + hermitian = false; + } + for (octave_idx_type i = 0; i < j; i++) + if (lower && a.elem (i,j) != 0.) + { + lower = false; + break; + } + for (octave_idx_type i = j+1; i < nrows; i++) + { + if (hermitian && a.elem (i, j) != a.elem (j, i)) + hermitian = false; + if (upper && a.elem (i,j) != 0) + upper = false; + } + if (!upper && !lower && !hermitian) + break; + } + + if (upper) + typ = MatrixType::Upper; + else if (lower) + typ = MatrixType::Lower; + else if (hermitian) + typ = MatrixType::Hermitian; + else if (ncols == nrows) + typ = MatrixType::Full; + } + else + typ = MatrixType::Rectangular; +} + +MatrixType::MatrixType (const FloatComplexMatrix &a) + : typ (MatrixType::Unknown), + sp_bandden (0), bandden (0), upper_band (0), lower_band (0), + dense (false), full (true), nperm (0), perm (0) +{ + octave_idx_type nrows = a.rows (); + octave_idx_type ncols = a.cols (); + + if (ncols == nrows) + { + bool upper = true; + bool lower = true; + bool hermitian = true; + + for (octave_idx_type j = 0; j < ncols; j++) + { + if (j < ncols) + { + if (imag(a.elem (j,j)) == 0. && + real(a.elem (j,j)) == 0.) + { + upper = false; + lower = false; + hermitian = false; + break; + } + + if (imag(a.elem (j,j)) != 0. || + real(a.elem (j,j)) < 0.) + hermitian = false; + } + for (octave_idx_type i = 0; i < j; i++) + if (lower && (real(a.elem (i,j)) != 0 || imag(a.elem (i,j)) != 0)) + { + lower = false; + break; + } + for (octave_idx_type i = j+1; i < nrows; i++) + { + if (hermitian && a.elem (i, j) != conj(a.elem (j, i))) + hermitian = false; + if (upper && (real(a.elem (i,j)) != 0 || + imag(a.elem (i,j)) != 0)) + upper = false; + } + if (!upper && !lower && !hermitian) + break; + } + + if (upper) + typ = MatrixType::Upper; + else if (lower) + typ = MatrixType::Lower; + else if (hermitian) + typ = MatrixType::Hermitian; + else if (ncols == nrows) + typ = MatrixType::Full; + } + else + typ = MatrixType::Rectangular; +} + MatrixType::MatrixType (const SparseMatrix &a) : typ (MatrixType::Unknown), sp_bandden (0), bandden (0), upper_band (0), lower_band (0), @@ -1000,6 +1121,7 @@ return typ; } + int MatrixType::type (const Matrix &a) { @@ -1054,6 +1176,60 @@ return typ; } +int +MatrixType::type (const FloatMatrix &a) +{ + if (typ != MatrixType::Unknown) + { + if (octave_sparse_params::get_key ("spumoni") != 0.) + (*current_liboctave_warning_handler) + ("Using Cached Matrix Type"); + + return typ; + } + + MatrixType tmp_typ (a); + typ = tmp_typ.typ; + full = tmp_typ.full; + nperm = tmp_typ.nperm; + + if (nperm != 0) + { + perm = new octave_idx_type [nperm]; + for (octave_idx_type i = 0; i < nperm; i++) + perm[i] = tmp_typ.perm[i]; + } + + return typ; +} + +int +MatrixType::type (const FloatComplexMatrix &a) +{ + if (typ != MatrixType::Unknown) + { + if (octave_sparse_params::get_key ("spumoni") != 0.) + (*current_liboctave_warning_handler) + ("Using Cached Matrix Type"); + + return typ; + } + + MatrixType tmp_typ (a); + typ = tmp_typ.typ; + full = tmp_typ.full; + nperm = tmp_typ.nperm; + + if (nperm != 0) + { + perm = new octave_idx_type [nperm]; + for (octave_idx_type i = 0; i < nperm; i++) + perm[i] = tmp_typ.perm[i]; + } + + return typ; +} + void MatrixType::info () const { diff -r 45f5faba05a2 -r 82be108cc558 liboctave/MatrixType.h --- a/liboctave/MatrixType.h Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/MatrixType.h Sun Apr 27 22:34:17 2008 +0200 @@ -26,6 +26,8 @@ class Matrix; class ComplexMatrix; +class FloatMatrix; +class FloatComplexMatrix; class SparseMatrix; class SparseComplexMatrix; @@ -59,6 +61,10 @@ MatrixType (const ComplexMatrix &a); + MatrixType (const FloatMatrix &a); + + MatrixType (const FloatComplexMatrix &a); + MatrixType (const SparseMatrix &a); MatrixType (const SparseComplexMatrix &a); @@ -81,6 +87,10 @@ int type (const ComplexMatrix &a); + int type (const FloatMatrix &a); + + int type (const FloatComplexMatrix &a); + int type (const SparseMatrix &a); int type (const SparseComplexMatrix &a); diff -r 45f5faba05a2 -r 82be108cc558 liboctave/SparseCmplxQR.cc --- a/liboctave/SparseCmplxQR.cc Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/SparseCmplxQR.cc Sun Apr 27 22:34:17 2008 +0200 @@ -880,6 +880,20 @@ #endif } +ComplexMatrix +qrsolve (const SparseComplexMatrix &a, const MArray2 &b, + octave_idx_type &info) +{ + return qrsolve (a, Matrix (b), info); +} + +ComplexMatrix +qrsolve (const SparseComplexMatrix &a, const MArray2 &b, + octave_idx_type &info) +{ + return qrsolve (a, ComplexMatrix (b), info); +} + /* ;;; Local Variables: *** ;;; mode: C++ *** diff -r 45f5faba05a2 -r 82be108cc558 liboctave/SparseCmplxQR.h --- a/liboctave/SparseCmplxQR.h Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/SparseCmplxQR.h Sun Apr 27 22:34:17 2008 +0200 @@ -149,6 +149,10 @@ extern ComplexMatrix qrsolve (const SparseComplexMatrix &a, const Matrix &b, octave_idx_type &info); +extern ComplexMatrix qrsolve (const SparseComplexMatrix &a, + const MArray2 &b, + octave_idx_type &info); + extern SparseComplexMatrix qrsolve (const SparseComplexMatrix &a, const SparseMatrix &b, octave_idx_type &info); @@ -157,6 +161,10 @@ const ComplexMatrix &b, octave_idx_type &info); +extern ComplexMatrix qrsolve (const SparseComplexMatrix &a, + const MArray2 &b, + octave_idx_type &info); + extern SparseComplexMatrix qrsolve (const SparseComplexMatrix &a, const SparseComplexMatrix &b, octave_idx_type &info); diff -r 45f5faba05a2 -r 82be108cc558 liboctave/SparseQR.cc --- a/liboctave/SparseQR.cc Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/SparseQR.cc Sun Apr 27 22:34:17 2008 +0200 @@ -896,6 +896,21 @@ #endif } +Matrix +qrsolve(const SparseMatrix &a, const MArray2 &b, + octave_idx_type &info) +{ + return qrsolve (a, Matrix (b), info); +} + +ComplexMatrix +qrsolve(const SparseMatrix &a, const MArray2 &b, + octave_idx_type &info) +{ + return qrsolve (a, ComplexMatrix (b), info); +} + + /* ;;; Local Variables: *** ;;; mode: C++ *** diff -r 45f5faba05a2 -r 82be108cc558 liboctave/SparseQR.h --- a/liboctave/SparseQR.h Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/SparseQR.h Sun Apr 27 22:34:17 2008 +0200 @@ -145,12 +145,18 @@ extern Matrix qrsolve (const SparseMatrix &a, const Matrix &b, octave_idx_type &info); +extern Matrix qrsolve (const SparseMatrix &a, const MArray2 &b, + octave_idx_type &info); + extern SparseMatrix qrsolve (const SparseMatrix &a, const SparseMatrix &b, octave_idx_type &info); extern ComplexMatrix qrsolve (const SparseMatrix &a, const ComplexMatrix &b, octave_idx_type &info); +extern ComplexMatrix qrsolve (const SparseMatrix &a, const MArray2 &b, + octave_idx_type &info); + extern SparseComplexMatrix qrsolve (const SparseMatrix &a, const SparseComplexMatrix &b, octave_idx_type &info); diff -r 45f5faba05a2 -r 82be108cc558 liboctave/dColVector.cc --- a/liboctave/dColVector.cc Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/dColVector.cc Sun Apr 27 22:34:17 2008 +0200 @@ -142,7 +142,7 @@ RowVector ColumnVector::transpose (void) const { - return RowVector (*this); + return MArray::transpose(); } ColumnVector diff -r 45f5faba05a2 -r 82be108cc558 liboctave/dDiagMatrix.cc --- a/liboctave/dDiagMatrix.cc Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/dDiagMatrix.cc Sun Apr 27 22:34:17 2008 +0200 @@ -139,12 +139,6 @@ } DiagMatrix -DiagMatrix::transpose (void) const -{ - return DiagMatrix (mx_inline_dup (data (), length ()), cols (), rows ()); -} - -DiagMatrix real (const ComplexDiagMatrix& a) { DiagMatrix retval; diff -r 45f5faba05a2 -r 82be108cc558 liboctave/dDiagMatrix.h --- a/liboctave/dDiagMatrix.h Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/dDiagMatrix.h Sun Apr 27 22:34:17 2008 +0200 @@ -70,7 +70,7 @@ DiagMatrix& fill (const ColumnVector& a, octave_idx_type beg); DiagMatrix& fill (const RowVector& a, octave_idx_type beg); - DiagMatrix transpose (void) const; + DiagMatrix transpose (void) const { return MDiagArray2::transpose(); } friend OCTAVE_API DiagMatrix real (const ComplexDiagMatrix& a); friend OCTAVE_API DiagMatrix imag (const ComplexDiagMatrix& a); diff -r 45f5faba05a2 -r 82be108cc558 liboctave/dMatrix.cc --- a/liboctave/dMatrix.cc Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/dMatrix.cc Sun Apr 27 22:34:17 2008 +0200 @@ -185,13 +185,13 @@ // each subroutine. F77_RET_T - F77_FUNC (cffti, CFFTI) (const octave_idx_type&, Complex*); + F77_FUNC (zffti, ZFFTI) (const octave_idx_type&, Complex*); F77_RET_T - F77_FUNC (cfftf, CFFTF) (const octave_idx_type&, Complex*, Complex*); + F77_FUNC (zfftf, ZFFTF) (const octave_idx_type&, Complex*, Complex*); F77_RET_T - F77_FUNC (cfftb, CFFTB) (const octave_idx_type&, Complex*, Complex*); + F77_FUNC (zfftb, ZFFTB) (const octave_idx_type&, Complex*, Complex*); F77_RET_T F77_FUNC (dlartg, DLARTG) (const double&, const double&, double&, @@ -1019,13 +1019,13 @@ retval = ComplexMatrix (*this); Complex *tmp_data = retval.fortran_vec (); - F77_FUNC (cffti, CFFTI) (npts, pwsave); + F77_FUNC (zffti, ZFFTI) (npts, pwsave); for (octave_idx_type j = 0; j < nsamples; j++) { OCTAVE_QUIT; - F77_FUNC (cfftf, CFFTF) (npts, &tmp_data[npts*j], pwsave); + F77_FUNC (zfftf, ZFFTF) (npts, &tmp_data[npts*j], pwsave); } return retval; @@ -1060,13 +1060,13 @@ retval = ComplexMatrix (*this); Complex *tmp_data = retval.fortran_vec (); - F77_FUNC (cffti, CFFTI) (npts, pwsave); + F77_FUNC (zffti, ZFFTI) (npts, pwsave); for (octave_idx_type j = 0; j < nsamples; j++) { OCTAVE_QUIT; - F77_FUNC (cfftb, CFFTB) (npts, &tmp_data[npts*j], pwsave); + F77_FUNC (zfftb, ZFFTB) (npts, &tmp_data[npts*j], pwsave); } for (octave_idx_type j = 0; j < npts*nsamples; j++) @@ -1104,13 +1104,13 @@ retval = ComplexMatrix (*this); Complex *tmp_data = retval.fortran_vec (); - F77_FUNC (cffti, CFFTI) (npts, pwsave); + F77_FUNC (zffti, ZFFTI) (npts, pwsave); for (octave_idx_type j = 0; j < nsamples; j++) { OCTAVE_QUIT; - F77_FUNC (cfftf, CFFTF) (npts, &tmp_data[npts*j], pwsave); + F77_FUNC (zfftf, ZFFTF) (npts, &tmp_data[npts*j], pwsave); } npts = nc; @@ -1123,7 +1123,7 @@ Array tmp (npts); Complex *prow = tmp.fortran_vec (); - F77_FUNC (cffti, CFFTI) (npts, pwsave); + F77_FUNC (zffti, ZFFTI) (npts, pwsave); for (octave_idx_type j = 0; j < nsamples; j++) { @@ -1132,7 +1132,7 @@ for (octave_idx_type i = 0; i < npts; i++) prow[i] = tmp_data[i*nr + j]; - F77_FUNC (cfftf, CFFTF) (npts, prow, pwsave); + F77_FUNC (zfftf, ZFFTF) (npts, prow, pwsave); for (octave_idx_type i = 0; i < npts; i++) tmp_data[i*nr + j] = prow[i]; @@ -1170,13 +1170,13 @@ retval = ComplexMatrix (*this); Complex *tmp_data = retval.fortran_vec (); - F77_FUNC (cffti, CFFTI) (npts, pwsave); + F77_FUNC (zffti, ZFFTI) (npts, pwsave); for (octave_idx_type j = 0; j < nsamples; j++) { OCTAVE_QUIT; - F77_FUNC (cfftb, CFFTB) (npts, &tmp_data[npts*j], pwsave); + F77_FUNC (zfftb, ZFFTB) (npts, &tmp_data[npts*j], pwsave); } for (octave_idx_type j = 0; j < npts*nsamples; j++) @@ -1192,7 +1192,7 @@ Array tmp (npts); Complex *prow = tmp.fortran_vec (); - F77_FUNC (cffti, CFFTI) (npts, pwsave); + F77_FUNC (zffti, ZFFTI) (npts, pwsave); for (octave_idx_type j = 0; j < nsamples; j++) { @@ -1201,7 +1201,7 @@ for (octave_idx_type i = 0; i < npts; i++) prow[i] = tmp_data[i*nr + j]; - F77_FUNC (cfftb, CFFTB) (npts, prow, pwsave); + F77_FUNC (zfftb, ZFFTB) (npts, prow, pwsave); for (octave_idx_type i = 0; i < npts; i++) tmp_data[i*nr + j] = prow[i] / static_cast (npts); diff -r 45f5faba05a2 -r 82be108cc558 liboctave/dMatrix.h --- a/liboctave/dMatrix.h Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/dMatrix.h Sun Apr 27 22:34:17 2008 +0200 @@ -51,7 +51,11 @@ Matrix (const Matrix& a) : MArray2 (a) { } - Matrix (const MArray2& a) : MArray2 (a) { } + template + Matrix (const MArray2& a) : MArray2 (a) { } + + template + Matrix (const Array2& a) : MArray2 (a) { } explicit Matrix (const RowVector& rv); diff -r 45f5faba05a2 -r 82be108cc558 liboctave/dNDArray.cc --- a/liboctave/dNDArray.cc Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/dNDArray.cc Sun Apr 27 22:34:17 2008 +0200 @@ -182,13 +182,13 @@ // each subroutine. F77_RET_T - F77_FUNC (cffti, CFFTI) (const octave_idx_type&, Complex*); + F77_FUNC (zffti, ZFFTI) (const octave_idx_type&, Complex*); F77_RET_T - F77_FUNC (cfftf, CFFTF) (const octave_idx_type&, Complex*, Complex*); + F77_FUNC (zfftf, ZFFTF) (const octave_idx_type&, Complex*, Complex*); F77_RET_T - F77_FUNC (cfftb, CFFTB) (const octave_idx_type&, Complex*, Complex*); + F77_FUNC (zfftb, ZFFTB) (const octave_idx_type&, Complex*, Complex*); } ComplexNDArray @@ -217,7 +217,7 @@ octave_idx_type nloop = (stride == 1 ? 1 : numel () / npts / stride); octave_idx_type dist = (stride == 1 ? npts : 1); - F77_FUNC (cffti, CFFTI) (npts, pwsave); + F77_FUNC (zffti, ZFFTI) (npts, pwsave); for (octave_idx_type k = 0; k < nloop; k++) { @@ -228,7 +228,7 @@ for (octave_idx_type i = 0; i < npts; i++) tmp[i] = elem((i + k*npts)*stride + j*dist); - F77_FUNC (cfftf, CFFTF) (npts, tmp, pwsave); + F77_FUNC (zfftf, ZFFTF) (npts, tmp, pwsave); for (octave_idx_type i = 0; i < npts; i++) retval ((i + k*npts)*stride + j*dist) = tmp[i]; @@ -264,7 +264,7 @@ octave_idx_type nloop = (stride == 1 ? 1 : numel () / npts / stride); octave_idx_type dist = (stride == 1 ? npts : 1); - F77_FUNC (cffti, CFFTI) (npts, pwsave); + F77_FUNC (zffti, ZFFTI) (npts, pwsave); for (octave_idx_type k = 0; k < nloop; k++) { @@ -275,7 +275,7 @@ for (octave_idx_type i = 0; i < npts; i++) tmp[i] = elem((i + k*npts)*stride + j*dist); - F77_FUNC (cfftb, CFFTB) (npts, tmp, pwsave); + F77_FUNC (zfftb, ZFFTB) (npts, tmp, pwsave); for (octave_idx_type i = 0; i < npts; i++) retval ((i + k*npts)*stride + j*dist) = tmp[i] / @@ -310,7 +310,7 @@ octave_idx_type nloop = (stride == 1 ? 1 : numel () / npts / stride); octave_idx_type dist = (stride == 1 ? npts : 1); - F77_FUNC (cffti, CFFTI) (npts, pwsave); + F77_FUNC (zffti, ZFFTI) (npts, pwsave); for (octave_idx_type k = 0; k < nloop; k++) { @@ -321,7 +321,7 @@ for (octave_idx_type l = 0; l < npts; l++) prow[l] = retval ((l + k*npts)*stride + j*dist); - F77_FUNC (cfftf, CFFTF) (npts, prow, pwsave); + F77_FUNC (zfftf, ZFFTF) (npts, prow, pwsave); for (octave_idx_type l = 0; l < npts; l++) retval ((l + k*npts)*stride + j*dist) = prow[l]; @@ -358,7 +358,7 @@ octave_idx_type nloop = (stride == 1 ? 1 : numel () / npts / stride); octave_idx_type dist = (stride == 1 ? npts : 1); - F77_FUNC (cffti, CFFTI) (npts, pwsave); + F77_FUNC (zffti, ZFFTI) (npts, pwsave); for (octave_idx_type k = 0; k < nloop; k++) { @@ -369,7 +369,7 @@ for (octave_idx_type l = 0; l < npts; l++) prow[l] = retval ((l + k*npts)*stride + j*dist); - F77_FUNC (cfftb, CFFTB) (npts, prow, pwsave); + F77_FUNC (zfftb, ZFFTB) (npts, prow, pwsave); for (octave_idx_type l = 0; l < npts; l++) retval ((l + k*npts)*stride + j*dist) = prow[l] / @@ -406,7 +406,7 @@ octave_idx_type nloop = (stride == 1 ? 1 : numel () / npts / stride); octave_idx_type dist = (stride == 1 ? npts : 1); - F77_FUNC (cffti, CFFTI) (npts, pwsave); + F77_FUNC (zffti, ZFFTI) (npts, pwsave); for (octave_idx_type k = 0; k < nloop; k++) { @@ -417,7 +417,7 @@ for (octave_idx_type l = 0; l < npts; l++) prow[l] = retval ((l + k*npts)*stride + j*dist); - F77_FUNC (cfftf, CFFTF) (npts, prow, pwsave); + F77_FUNC (zfftf, ZFFTF) (npts, prow, pwsave); for (octave_idx_type l = 0; l < npts; l++) retval ((l + k*npts)*stride + j*dist) = prow[l]; @@ -453,7 +453,7 @@ octave_idx_type nloop = (stride == 1 ? 1 : numel () / npts / stride); octave_idx_type dist = (stride == 1 ? npts : 1); - F77_FUNC (cffti, CFFTI) (npts, pwsave); + F77_FUNC (zffti, ZFFTI) (npts, pwsave); for (octave_idx_type k = 0; k < nloop; k++) { @@ -464,7 +464,7 @@ for (octave_idx_type l = 0; l < npts; l++) prow[l] = retval ((l + k*npts)*stride + j*dist); - F77_FUNC (cfftb, CFFTB) (npts, prow, pwsave); + F77_FUNC (zfftb, ZFFTB) (npts, prow, pwsave); for (octave_idx_type l = 0; l < npts; l++) retval ((l + k*npts)*stride + j*dist) = prow[l] / diff -r 45f5faba05a2 -r 82be108cc558 liboctave/dNDArray.h --- a/liboctave/dNDArray.h Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/dNDArray.h Sun Apr 27 22:34:17 2008 +0200 @@ -47,7 +47,11 @@ NDArray (const Matrix& a) : MArrayN (a) { } - NDArray (const MArrayN& a) : MArrayN (a) { } + template + NDArray (const MArrayN& a) : MArrayN (a) { } + + template + NDArray (const ArrayN& a) : MArrayN (a) { } template explicit NDArray (const intNDArray& a) : MArrayN (a) { } diff -r 45f5faba05a2 -r 82be108cc558 liboctave/dRowVector.cc --- a/liboctave/dRowVector.cc Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/dRowVector.cc Sun Apr 27 22:34:17 2008 +0200 @@ -144,7 +144,7 @@ ColumnVector RowVector::transpose (void) const { - return ColumnVector (*this); + return MArray::transpose(); } RowVector diff -r 45f5faba05a2 -r 82be108cc558 liboctave/data-conv.cc --- a/liboctave/data-conv.cc Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/data-conv.cc Sun Apr 27 22:34:17 2008 +0200 @@ -1059,6 +1059,57 @@ } void +read_floats (std::istream& is, float *data, save_type type, int len, + bool swap, oct_mach_info::float_format fmt) +{ + switch (type) + { + case LS_U_CHAR: + LS_DO_READ (uint8_t, swap, data, 1, len, is); + break; + + case LS_U_SHORT: + LS_DO_READ (uint16_t, swap, data, 2, len, is); + break; + + case LS_U_INT: + LS_DO_READ (uint32_t, swap, data, 4, len, is); + break; + + case LS_CHAR: + LS_DO_READ (int8_t, swap, data, 1, len, is); + break; + + case LS_SHORT: + LS_DO_READ (int16_t, swap, data, 2, len, is); + break; + + case LS_INT: + LS_DO_READ (int32_t, swap, data, 4, len, is); + break; + + case LS_FLOAT: // No conversion necessary. + is.read (reinterpret_cast (data), 4 * len); + do_float_format_conversion (data, len, fmt); + break; + + case LS_DOUBLE: + { + OCTAVE_LOCAL_BUFFER (double, ptr, len); + is.read (reinterpret_cast (ptr), 8 * len); + do_double_format_conversion (ptr, len, fmt); + for (int i = 0; i < len; i++) + data[i] = ptr[i]; + } + break; + + default: + is.clear (std::ios::failbit|is.rdstate ()); + break; + } +} + +void write_doubles (std::ostream& os, const double *data, save_type type, int len) { switch (type) @@ -1106,6 +1157,54 @@ } } +void +write_floats (std::ostream& os, const float *data, save_type type, int len) +{ + switch (type) + { + case LS_U_CHAR: + LS_DO_WRITE (uint8_t, data, 1, len, os); + break; + + case LS_U_SHORT: + LS_DO_WRITE (uint16_t, data, 2, len, os); + break; + + case LS_U_INT: + LS_DO_WRITE (uint32_t, data, 4, len, os); + break; + + case LS_CHAR: + LS_DO_WRITE (int8_t, data, 1, len, os); + break; + + case LS_SHORT: + LS_DO_WRITE (int16_t, data, 2, len, os); + break; + + case LS_INT: + LS_DO_WRITE (int32_t, data, 4, len, os); + break; + + case LS_FLOAT: // No conversion necessary. + { + char tmp_type = static_cast (type); + os.write (&tmp_type, 1); + os.write (reinterpret_cast (data), 4 * len); + } + break; + + case LS_DOUBLE: + LS_DO_WRITE (double, data, 8, len, os); + break; + + default: + (*current_liboctave_error_handler) + ("unrecognized data format requested"); + break; + } +} + /* ;;; Local Variables: *** ;;; mode: C++ *** diff -r 45f5faba05a2 -r 82be108cc558 liboctave/data-conv.h --- a/liboctave/data-conv.h Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/data-conv.h Sun Apr 27 22:34:17 2008 +0200 @@ -115,6 +115,12 @@ extern OCTAVE_API void write_doubles (std::ostream& os, const double *data, save_type type, int len); +extern OCTAVE_API void +read_floats (std::istream& is, float *data, save_type type, int len, + bool swap, oct_mach_info::float_format fmt); +extern OCTAVE_API void +write_floats (std::ostream& os, const float *data, save_type type, int len); + #endif /* diff -r 45f5faba05a2 -r 82be108cc558 liboctave/dbleDET.cc --- a/liboctave/dbleDET.cc Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/dbleDET.cc Sun Apr 27 22:34:17 2008 +0200 @@ -52,7 +52,7 @@ { if (c2 != 0.0) { - double etmp = e2 / xlog2 (10); + double etmp = e2 / xlog2 (static_cast(10)); e10 = static_cast (xround (etmp)); etmp -= e10; c10 = c2 * pow (10.0, etmp); @@ -74,7 +74,7 @@ double DET::value (void) const { - return base2 ? c2 * xexp2 (e2) : c10 * pow (10.0, e10); + return base2 ? c2 * xexp2 (static_cast(e2)) : c10 * pow (10.0, e10); } /* diff -r 45f5faba05a2 -r 82be108cc558 liboctave/dbleSVD.cc --- a/liboctave/dbleSVD.cc Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/dbleSVD.cc Sun Apr 27 22:34:17 2008 +0200 @@ -49,7 +49,7 @@ if (type_computed == SVD::sigma_only) { (*current_liboctave_error_handler) - ("ComplexSVD: U not computed because type == SVD::sigma_only"); + ("SVD: U not computed because type == SVD::sigma_only"); return Matrix (); } else @@ -62,7 +62,7 @@ if (type_computed == SVD::sigma_only) { (*current_liboctave_error_handler) - ("ComplexSVD: V not computed because type == SVD::sigma_only"); + ("SVD: V not computed because type == SVD::sigma_only"); return Matrix (); } else diff -r 45f5faba05a2 -r 82be108cc558 liboctave/fCColVector.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/fCColVector.cc Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,548 @@ +// ColumnVector manipulations. +/* + +Copyright (C) 1994, 1995, 1996, 1997, 2000, 2001, 2002, 2003, 2004, + 2005, 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +#include "Array-util.h" +#include "f77-fcn.h" +#include "functor.h" +#include "lo-error.h" +#include "mx-base.h" +#include "mx-inlines.cc" +#include "oct-cmplx.h" + +// Fortran functions we call. + +extern "C" +{ + F77_RET_T + F77_FUNC (cgemv, CGEMV) (F77_CONST_CHAR_ARG_DECL, + const octave_idx_type&, const octave_idx_type&, const FloatComplex&, + const FloatComplex*, const octave_idx_type&, const FloatComplex*, + const octave_idx_type&, const FloatComplex&, FloatComplex*, const octave_idx_type& + F77_CHAR_ARG_LEN_DECL); +} + +// FloatComplex Column Vector class + +FloatComplexColumnVector::FloatComplexColumnVector (const FloatColumnVector& a) + : MArray (a.length ()) +{ + for (octave_idx_type i = 0; i < length (); i++) + elem (i) = a.elem (i); +} + +bool +FloatComplexColumnVector::operator == (const FloatComplexColumnVector& a) const +{ + octave_idx_type len = length (); + if (len != a.length ()) + return 0; + return mx_inline_equal (data (), a.data (), len); +} + +bool +FloatComplexColumnVector::operator != (const FloatComplexColumnVector& a) const +{ + return !(*this == a); +} + +// destructive insert/delete/reorder operations + +FloatComplexColumnVector& +FloatComplexColumnVector::insert (const FloatColumnVector& a, octave_idx_type r) +{ + octave_idx_type a_len = a.length (); + + if (r < 0 || r + a_len > length ()) + { + (*current_liboctave_error_handler) ("range error for insert"); + return *this; + } + + if (a_len > 0) + { + make_unique (); + + for (octave_idx_type i = 0; i < a_len; i++) + xelem (r+i) = a.elem (i); + } + + return *this; +} + +FloatComplexColumnVector& +FloatComplexColumnVector::insert (const FloatComplexColumnVector& a, octave_idx_type r) +{ + octave_idx_type a_len = a.length (); + + if (r < 0 || r + a_len > length ()) + { + (*current_liboctave_error_handler) ("range error for insert"); + return *this; + } + + if (a_len > 0) + { + make_unique (); + + for (octave_idx_type i = 0; i < a_len; i++) + xelem (r+i) = a.elem (i); + } + + return *this; +} + +FloatComplexColumnVector& +FloatComplexColumnVector::fill (float val) +{ + octave_idx_type len = length (); + + if (len > 0) + { + make_unique (); + + for (octave_idx_type i = 0; i < len; i++) + xelem (i) = val; + } + + return *this; +} + +FloatComplexColumnVector& +FloatComplexColumnVector::fill (const FloatComplex& val) +{ + octave_idx_type len = length (); + + if (len > 0) + { + make_unique (); + + for (octave_idx_type i = 0; i < len; i++) + xelem (i) = val; + } + + + return *this; +} + +FloatComplexColumnVector& +FloatComplexColumnVector::fill (float val, octave_idx_type r1, octave_idx_type r2) +{ + octave_idx_type len = length (); + + if (r1 < 0 || r2 < 0 || r1 >= len || r2 >= len) + { + (*current_liboctave_error_handler) ("range error for fill"); + return *this; + } + + if (r1 > r2) { octave_idx_type tmp = r1; r1 = r2; r2 = tmp; } + + if (r2 >= r1) + { + make_unique (); + + for (octave_idx_type i = r1; i <= r2; i++) + xelem (i) = val; + } + + return *this; +} + +FloatComplexColumnVector& +FloatComplexColumnVector::fill (const FloatComplex& val, octave_idx_type r1, octave_idx_type r2) +{ + octave_idx_type len = length (); + + if (r1 < 0 || r2 < 0 || r1 >= len || r2 >= len) + { + (*current_liboctave_error_handler) ("range error for fill"); + return *this; + } + + if (r1 > r2) { octave_idx_type tmp = r1; r1 = r2; r2 = tmp; } + + if (r2 >= r1) + { + make_unique (); + + for (octave_idx_type i = r1; i <= r2; i++) + xelem (i) = val; + } + + return *this; +} + +FloatComplexColumnVector +FloatComplexColumnVector::stack (const FloatColumnVector& a) const +{ + octave_idx_type len = length (); + octave_idx_type nr_insert = len; + FloatComplexColumnVector retval (len + a.length ()); + retval.insert (*this, 0); + retval.insert (a, nr_insert); + return retval; +} + +FloatComplexColumnVector +FloatComplexColumnVector::stack (const FloatComplexColumnVector& a) const +{ + octave_idx_type len = length (); + octave_idx_type nr_insert = len; + FloatComplexColumnVector retval (len + a.length ()); + retval.insert (*this, 0); + retval.insert (a, nr_insert); + return retval; +} + +FloatComplexRowVector +FloatComplexColumnVector::hermitian (void) const +{ + return MArray::hermitian (std::conj); +} + +FloatComplexRowVector +FloatComplexColumnVector::transpose (void) const +{ + return MArray::transpose (); +} + +FloatComplexColumnVector +conj (const FloatComplexColumnVector& a) +{ + octave_idx_type a_len = a.length (); + FloatComplexColumnVector retval; + if (a_len > 0) + retval = FloatComplexColumnVector (mx_inline_conj_dup (a.data (), a_len), a_len); + return retval; +} + +// resize is the destructive equivalent for this one + +FloatComplexColumnVector +FloatComplexColumnVector::extract (octave_idx_type r1, octave_idx_type r2) const +{ + if (r1 > r2) { octave_idx_type tmp = r1; r1 = r2; r2 = tmp; } + + octave_idx_type new_r = r2 - r1 + 1; + + FloatComplexColumnVector result (new_r); + + for (octave_idx_type i = 0; i < new_r; i++) + result.elem (i) = elem (r1+i); + + return result; +} + +FloatComplexColumnVector +FloatComplexColumnVector::extract_n (octave_idx_type r1, octave_idx_type n) const +{ + FloatComplexColumnVector result (n); + + for (octave_idx_type i = 0; i < n; i++) + result.elem (i) = elem (r1+i); + + return result; +} + +// column vector by column vector -> column vector operations + +FloatComplexColumnVector& +FloatComplexColumnVector::operator += (const FloatColumnVector& a) +{ + octave_idx_type len = length (); + + octave_idx_type a_len = a.length (); + + if (len != a_len) + { + gripe_nonconformant ("operator +=", len, a_len); + return *this; + } + + if (len == 0) + return *this; + + FloatComplex *d = fortran_vec (); // Ensures only one reference to my privates! + + mx_inline_add2 (d, a.data (), len); + return *this; +} + +FloatComplexColumnVector& +FloatComplexColumnVector::operator -= (const FloatColumnVector& a) +{ + octave_idx_type len = length (); + + octave_idx_type a_len = a.length (); + + if (len != a_len) + { + gripe_nonconformant ("operator -=", len, a_len); + return *this; + } + + if (len == 0) + return *this; + + FloatComplex *d = fortran_vec (); // Ensures only one reference to my privates! + + mx_inline_subtract2 (d, a.data (), len); + return *this; +} + +// matrix by column vector -> column vector operations + +FloatComplexColumnVector +operator * (const FloatComplexMatrix& m, const FloatColumnVector& a) +{ + FloatComplexColumnVector tmp (a); + return m * tmp; +} + +FloatComplexColumnVector +operator * (const FloatComplexMatrix& m, const FloatComplexColumnVector& a) +{ + FloatComplexColumnVector retval; + + octave_idx_type nr = m.rows (); + octave_idx_type nc = m.cols (); + + octave_idx_type a_len = a.length (); + + if (nc != a_len) + gripe_nonconformant ("operator *", nr, nc, a_len, 1); + else + { + if (nc == 0 || nr == 0) + retval.resize (nr, 0.0); + else + { + octave_idx_type ld = nr; + + retval.resize (nr); + FloatComplex *y = retval.fortran_vec (); + + F77_XFCN (cgemv, CGEMV, (F77_CONST_CHAR_ARG2 ("N", 1), + nr, nc, 1.0, m.data (), ld, + a.data (), 1, 0.0, y, 1 + F77_CHAR_ARG_LEN (1))); + } + } + + return retval; +} + +// matrix by column vector -> column vector operations + +FloatComplexColumnVector +operator * (const FloatMatrix& m, const FloatComplexColumnVector& a) +{ + FloatComplexMatrix tmp (m); + return tmp * a; +} + +// diagonal matrix by column vector -> column vector operations + +FloatComplexColumnVector +operator * (const FloatDiagMatrix& m, const FloatComplexColumnVector& a) +{ + octave_idx_type nr = m.rows (); + octave_idx_type nc = m.cols (); + + octave_idx_type a_len = a.length (); + + if (nc != a_len) + { + gripe_nonconformant ("operator *", nr, nc, a_len, 1); + return FloatComplexColumnVector (); + } + + if (nc == 0 || nr == 0) + return FloatComplexColumnVector (0); + + FloatComplexColumnVector result (nr); + + for (octave_idx_type i = 0; i < a_len; i++) + result.elem (i) = a.elem (i) * m.elem (i, i); + + for (octave_idx_type i = a_len; i < nr; i++) + result.elem (i) = 0.0; + + return result; +} + +FloatComplexColumnVector +operator * (const FloatComplexDiagMatrix& m, const FloatColumnVector& a) +{ + octave_idx_type nr = m.rows (); + octave_idx_type nc = m.cols (); + + octave_idx_type a_len = a.length (); + + if (nc != a_len) + { + gripe_nonconformant ("operator *", nr, nc, a_len, 1); + return FloatComplexColumnVector (); + } + + if (nc == 0 || nr == 0) + return FloatComplexColumnVector (0); + + FloatComplexColumnVector result (nr); + + for (octave_idx_type i = 0; i < a_len; i++) + result.elem (i) = a.elem (i) * m.elem (i, i); + + for (octave_idx_type i = a_len; i < nr; i++) + result.elem (i) = 0.0; + + return result; +} + +FloatComplexColumnVector +operator * (const FloatComplexDiagMatrix& m, const FloatComplexColumnVector& a) +{ + octave_idx_type nr = m.rows (); + octave_idx_type nc = m.cols (); + + octave_idx_type a_len = a.length (); + + if (nc != a_len) + { + gripe_nonconformant ("operator *", nr, nc, a_len, 1); + return FloatComplexColumnVector (); + } + + if (nc == 0 || nr == 0) + return FloatComplexColumnVector (0); + + FloatComplexColumnVector result (nr); + + for (octave_idx_type i = 0; i < a_len; i++) + result.elem (i) = a.elem (i) * m.elem (i, i); + + for (octave_idx_type i = a_len; i < nr; i++) + result.elem (i) = 0.0; + + return result; +} + +// other operations + +FloatColumnVector +FloatComplexColumnVector::map (dmapper fcn) const +{ + return MArray::map (func_ptr (fcn)); +} + +FloatComplexColumnVector +FloatComplexColumnVector::map (cmapper fcn) const +{ + return MArray::map (func_ptr (fcn)); +} + +FloatComplex +FloatComplexColumnVector::min (void) const +{ + octave_idx_type len = length (); + if (len == 0) + return 0.0; + + FloatComplex res = elem (0); + float absres = std::abs (res); + + for (octave_idx_type i = 1; i < len; i++) + if (std::abs (elem (i)) < absres) + { + res = elem (i); + absres = std::abs (res); + } + + return res; +} + +FloatComplex +FloatComplexColumnVector::max (void) const +{ + octave_idx_type len = length (); + if (len == 0) + return 0.0; + + FloatComplex res = elem (0); + float absres = std::abs (res); + + for (octave_idx_type i = 1; i < len; i++) + if (std::abs (elem (i)) > absres) + { + res = elem (i); + absres = std::abs (res); + } + + return res; +} + +// i/o + +std::ostream& +operator << (std::ostream& os, const FloatComplexColumnVector& a) +{ +// int field_width = os.precision () + 7; + for (octave_idx_type i = 0; i < a.length (); i++) + os << /* setw (field_width) << */ a.elem (i) << "\n"; + return os; +} + +std::istream& +operator >> (std::istream& is, FloatComplexColumnVector& a) +{ + octave_idx_type len = a.length(); + + if (len < 1) + is.clear (std::ios::badbit); + else + { + float tmp; + for (octave_idx_type i = 0; i < len; i++) + { + is >> tmp; + if (is) + a.elem (i) = tmp; + else + break; + } + } + return is; +} + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 liboctave/fCColVector.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/fCColVector.h Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,144 @@ +/* + +Copyright (C) 1994, 1995, 1996, 1997, 2000, 2002, 2003, 2004, 2005, + 2006, 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#if !defined (octave_FloatComplexColumnVector_h) +#define octave_FloatComplexColumnVector_h 1 + +#include "MArray.h" + +#include "mx-defs.h" + +class +OCTAVE_API +FloatComplexColumnVector : public MArray +{ +friend class FloatComplexMatrix; +friend class FloatComplexRowVector; + +public: + + FloatComplexColumnVector (void) : MArray () { } + + explicit FloatComplexColumnVector (octave_idx_type n) : MArray (n) { } + + FloatComplexColumnVector (octave_idx_type n, const FloatComplex& val) + : MArray (n, val) { } + + FloatComplexColumnVector (const FloatComplexColumnVector& a) : MArray (a) { } + + FloatComplexColumnVector (const MArray& a) : MArray (a) { } + + explicit FloatComplexColumnVector (const FloatColumnVector& a); + + FloatComplexColumnVector& operator = (const FloatComplexColumnVector& a) + { + MArray::operator = (a); + return *this; + } + + bool operator == (const FloatComplexColumnVector& a) const; + bool operator != (const FloatComplexColumnVector& a) const; + + // destructive insert/delete/reorder operations + + FloatComplexColumnVector& insert (const FloatColumnVector& a, octave_idx_type r); + FloatComplexColumnVector& insert (const FloatComplexColumnVector& a, octave_idx_type r); + + FloatComplexColumnVector& fill (float val); + FloatComplexColumnVector& fill (const FloatComplex& val); + FloatComplexColumnVector& fill (float val, octave_idx_type r1, octave_idx_type r2); + FloatComplexColumnVector& fill (const FloatComplex& val, octave_idx_type r1, octave_idx_type r2); + + FloatComplexColumnVector stack (const FloatColumnVector& a) const; + FloatComplexColumnVector stack (const FloatComplexColumnVector& a) const; + + FloatComplexRowVector hermitian (void) const; + FloatComplexRowVector transpose (void) const; + + friend FloatComplexColumnVector conj (const FloatComplexColumnVector& a); + + // resize is the destructive equivalent for this one + + FloatComplexColumnVector extract (octave_idx_type r1, octave_idx_type r2) const; + + FloatComplexColumnVector extract_n (octave_idx_type r1, octave_idx_type n) const; + + // column vector by column vector -> column vector operations + + FloatComplexColumnVector& operator += (const FloatColumnVector& a); + FloatComplexColumnVector& operator -= (const FloatColumnVector& a); + + // matrix by column vector -> column vector operations + + friend FloatComplexColumnVector operator * (const FloatComplexMatrix& a, + const FloatColumnVector& b); + + friend FloatComplexColumnVector operator * (const FloatComplexMatrix& a, + const FloatComplexColumnVector& b); + + // matrix by column vector -> column vector operations + + friend FloatComplexColumnVector operator * (const FloatMatrix& a, + const FloatComplexColumnVector& b); + + // diagonal matrix by column vector -> column vector operations + + friend FloatComplexColumnVector operator * (const FloatDiagMatrix& a, + const FloatComplexColumnVector& b); + + friend FloatComplexColumnVector operator * (const FloatComplexDiagMatrix& a, + const ColumnVector& b); + + friend FloatComplexColumnVector operator * (const FloatComplexDiagMatrix& a, + const FloatComplexColumnVector& b); + + // other operations + + typedef float (*dmapper) (const FloatComplex&); + typedef FloatComplex (*cmapper) (const FloatComplex&); + + FloatColumnVector map (dmapper fcn) const; + FloatComplexColumnVector map (cmapper fcn) const; + + FloatComplex min (void) const; + FloatComplex max (void) const; + + // i/o + + friend std::ostream& operator << (std::ostream& os, const FloatComplexColumnVector& a); + friend std::istream& operator >> (std::istream& is, FloatComplexColumnVector& a); + +private: + + FloatComplexColumnVector (FloatComplex *d, octave_idx_type l) : MArray (d, l) { } +}; + +MARRAY_FORWARD_DEFS (MArray, FloatComplexColumnVector, FloatComplex) + +#endif + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 liboctave/fCDiagMatrix.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/fCDiagMatrix.cc Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,555 @@ +// DiagMatrix manipulations. +/* + +Copyright (C) 1994, 1995, 1996, 1997, 2000, 2001, 2002, 2003, 2004, + 2005, 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +#include "Array-util.h" +#include "lo-error.h" +#include "mx-base.h" +#include "mx-inlines.cc" +#include "oct-cmplx.h" + +// FloatComplex Diagonal Matrix class + +FloatComplexDiagMatrix::FloatComplexDiagMatrix (const FloatDiagMatrix& a) + : MDiagArray2 (a.rows (), a.cols ()) +{ + for (octave_idx_type i = 0; i < length (); i++) + elem (i, i) = a.elem (i, i); +} + +bool +FloatComplexDiagMatrix::operator == (const FloatComplexDiagMatrix& a) const +{ + if (rows () != a.rows () || cols () != a.cols ()) + return 0; + + return mx_inline_equal (data (), a.data (), length ()); +} + +bool +FloatComplexDiagMatrix::operator != (const FloatComplexDiagMatrix& a) const +{ + return !(*this == a); +} + +FloatComplexDiagMatrix& +FloatComplexDiagMatrix::fill (float val) +{ + for (octave_idx_type i = 0; i < length (); i++) + elem (i, i) = val; + return *this; +} + +FloatComplexDiagMatrix& +FloatComplexDiagMatrix::fill (const FloatComplex& val) +{ + for (octave_idx_type i = 0; i < length (); i++) + elem (i, i) = val; + return *this; +} + +FloatComplexDiagMatrix& +FloatComplexDiagMatrix::fill (float val, octave_idx_type beg, octave_idx_type end) +{ + if (beg < 0 || end >= length () || end < beg) + { + (*current_liboctave_error_handler) ("range error for fill"); + return *this; + } + + for (octave_idx_type i = beg; i <= end; i++) + elem (i, i) = val; + + return *this; +} + +FloatComplexDiagMatrix& +FloatComplexDiagMatrix::fill (const FloatComplex& val, octave_idx_type beg, octave_idx_type end) +{ + if (beg < 0 || end >= length () || end < beg) + { + (*current_liboctave_error_handler) ("range error for fill"); + return *this; + } + + for (octave_idx_type i = beg; i <= end; i++) + elem (i, i) = val; + + return *this; +} + +FloatComplexDiagMatrix& +FloatComplexDiagMatrix::fill (const FloatColumnVector& a) +{ + octave_idx_type len = length (); + if (a.length () != len) + { + (*current_liboctave_error_handler) ("range error for fill"); + return *this; + } + + for (octave_idx_type i = 0; i < len; i++) + elem (i, i) = a.elem (i); + + return *this; +} + +FloatComplexDiagMatrix& +FloatComplexDiagMatrix::fill (const FloatComplexColumnVector& a) +{ + octave_idx_type len = length (); + if (a.length () != len) + { + (*current_liboctave_error_handler) ("range error for fill"); + return *this; + } + + for (octave_idx_type i = 0; i < len; i++) + elem (i, i) = a.elem (i); + + return *this; +} + +FloatComplexDiagMatrix& +FloatComplexDiagMatrix::fill (const FloatRowVector& a) +{ + octave_idx_type len = length (); + if (a.length () != len) + { + (*current_liboctave_error_handler) ("range error for fill"); + return *this; + } + + for (octave_idx_type i = 0; i < len; i++) + elem (i, i) = a.elem (i); + + return *this; +} + +FloatComplexDiagMatrix& +FloatComplexDiagMatrix::fill (const FloatComplexRowVector& a) +{ + octave_idx_type len = length (); + if (a.length () != len) + { + (*current_liboctave_error_handler) ("range error for fill"); + return *this; + } + + for (octave_idx_type i = 0; i < len; i++) + elem (i, i) = a.elem (i); + + return *this; +} + +FloatComplexDiagMatrix& +FloatComplexDiagMatrix::fill (const FloatColumnVector& a, octave_idx_type beg) +{ + octave_idx_type a_len = a.length (); + if (beg < 0 || beg + a_len >= length ()) + { + (*current_liboctave_error_handler) ("range error for fill"); + return *this; + } + + for (octave_idx_type i = 0; i < a_len; i++) + elem (i+beg, i+beg) = a.elem (i); + + return *this; +} + +FloatComplexDiagMatrix& +FloatComplexDiagMatrix::fill (const FloatComplexColumnVector& a, octave_idx_type beg) +{ + octave_idx_type a_len = a.length (); + if (beg < 0 || beg + a_len >= length ()) + { + (*current_liboctave_error_handler) ("range error for fill"); + return *this; + } + + for (octave_idx_type i = 0; i < a_len; i++) + elem (i+beg, i+beg) = a.elem (i); + + return *this; +} + +FloatComplexDiagMatrix& +FloatComplexDiagMatrix::fill (const FloatRowVector& a, octave_idx_type beg) +{ + octave_idx_type a_len = a.length (); + if (beg < 0 || beg + a_len >= length ()) + { + (*current_liboctave_error_handler) ("range error for fill"); + return *this; + } + + for (octave_idx_type i = 0; i < a_len; i++) + elem (i+beg, i+beg) = a.elem (i); + + return *this; +} + +FloatComplexDiagMatrix& +FloatComplexDiagMatrix::fill (const FloatComplexRowVector& a, octave_idx_type beg) +{ + octave_idx_type a_len = a.length (); + if (beg < 0 || beg + a_len >= length ()) + { + (*current_liboctave_error_handler) ("range error for fill"); + return *this; + } + + for (octave_idx_type i = 0; i < a_len; i++) + elem (i+beg, i+beg) = a.elem (i); + + return *this; +} + +FloatComplexDiagMatrix +conj (const FloatComplexDiagMatrix& a) +{ + FloatComplexDiagMatrix retval; + octave_idx_type a_len = a.length (); + if (a_len > 0) + retval = FloatComplexDiagMatrix (mx_inline_conj_dup (a.data (), a_len), + a.rows (), a.cols ()); + return retval; +} + +// resize is the destructive analog for this one + +FloatComplexMatrix +FloatComplexDiagMatrix::extract (octave_idx_type r1, octave_idx_type c1, octave_idx_type r2, octave_idx_type c2) const +{ + if (r1 > r2) { octave_idx_type tmp = r1; r1 = r2; r2 = tmp; } + if (c1 > c2) { octave_idx_type tmp = c1; c1 = c2; c2 = tmp; } + + octave_idx_type new_r = r2 - r1 + 1; + octave_idx_type new_c = c2 - c1 + 1; + + FloatComplexMatrix result (new_r, new_c); + + for (octave_idx_type j = 0; j < new_c; j++) + for (octave_idx_type i = 0; i < new_r; i++) + result.elem (i, j) = elem (r1+i, c1+j); + + return result; +} + +// extract row or column i. + +FloatComplexRowVector +FloatComplexDiagMatrix::row (octave_idx_type i) const +{ + octave_idx_type r = rows (); + octave_idx_type c = cols (); + if (i < 0 || i >= r) + { + (*current_liboctave_error_handler) ("invalid row selection"); + return FloatComplexRowVector (); + } + + FloatComplexRowVector retval (c, 0.0); + if (r <= c || (r > c && i < c)) + retval.elem (i) = elem (i, i); + + return retval; +} + +FloatComplexRowVector +FloatComplexDiagMatrix::row (char *s) const +{ + if (! s) + { + (*current_liboctave_error_handler) ("invalid row selection"); + return FloatComplexRowVector (); + } + + char c = *s; + if (c == 'f' || c == 'F') + return row (static_cast(0)); + else if (c == 'l' || c == 'L') + return row (rows () - 1); + else + { + (*current_liboctave_error_handler) ("invalid row selection"); + return FloatComplexRowVector (); + } +} + +FloatComplexColumnVector +FloatComplexDiagMatrix::column (octave_idx_type i) const +{ + octave_idx_type r = rows (); + octave_idx_type c = cols (); + if (i < 0 || i >= c) + { + (*current_liboctave_error_handler) ("invalid column selection"); + return FloatComplexColumnVector (); + } + + FloatComplexColumnVector retval (r, 0.0); + if (r >= c || (r < c && i < r)) + retval.elem (i) = elem (i, i); + + return retval; +} + +FloatComplexColumnVector +FloatComplexDiagMatrix::column (char *s) const +{ + if (! s) + { + (*current_liboctave_error_handler) ("invalid column selection"); + return FloatComplexColumnVector (); + } + + char c = *s; + if (c == 'f' || c == 'F') + return column (static_cast(0)); + else if (c == 'l' || c == 'L') + return column (cols () - 1); + else + { + (*current_liboctave_error_handler) ("invalid column selection"); + return FloatComplexColumnVector (); + } +} + +FloatComplexDiagMatrix +FloatComplexDiagMatrix::inverse (void) const +{ + int info; + return inverse (info); +} + +FloatComplexDiagMatrix +FloatComplexDiagMatrix::inverse (int& info) const +{ + octave_idx_type r = rows (); + octave_idx_type c = cols (); + if (r != c) + { + (*current_liboctave_error_handler) ("inverse requires square matrix"); + return FloatComplexDiagMatrix (); + } + + FloatComplexDiagMatrix retval (r, c); + + info = 0; + for (octave_idx_type i = 0; i < length (); i++) + { + if (elem (i, i) == static_cast (0.0)) + { + info = -1; + return *this; + } + else + retval.elem (i, i) = static_cast (1.0) / elem (i, i); + } + + return retval; +} + +// diagonal matrix by diagonal matrix -> diagonal matrix operations + +FloatComplexDiagMatrix& +FloatComplexDiagMatrix::operator += (const FloatDiagMatrix& a) +{ + octave_idx_type r = rows (); + octave_idx_type c = cols (); + + octave_idx_type a_nr = a.rows (); + octave_idx_type a_nc = a.cols (); + + if (r != a_nr || c != a_nc) + { + gripe_nonconformant ("operator +=", r, c, a_nr, a_nc); + return *this; + } + + if (r == 0 || c == 0) + return *this; + + FloatComplex *d = fortran_vec (); // Ensures only one reference to my privates! + + mx_inline_add2 (d, a.data (), length ()); + return *this; +} + +FloatComplexDiagMatrix +operator * (const FloatComplexDiagMatrix& a, const FloatDiagMatrix& b) +{ + octave_idx_type a_nr = a.rows (); + octave_idx_type a_nc = a.cols (); + + octave_idx_type b_nr = b.rows (); + octave_idx_type b_nc = b.cols (); + + if (a_nc != b_nr) + { + gripe_nonconformant ("operator *", a_nr, a_nc, b_nr, b_nc); + return FloatComplexDiagMatrix (); + } + + if (a_nr == 0 || a_nc == 0 || b_nc == 0) + return FloatComplexDiagMatrix (a_nr, a_nc, 0.0); + + FloatComplexDiagMatrix c (a_nr, b_nc); + + octave_idx_type len = a_nr < b_nc ? a_nr : b_nc; + + for (octave_idx_type i = 0; i < len; i++) + { + FloatComplex a_element = a.elem (i, i); + float b_element = b.elem (i, i); + + if (a_element == static_cast (0.0) || b_element == static_cast (0.0)) + c.elem (i, i) = 0; + else if (a_element == static_cast (1.0)) + c.elem (i, i) = b_element; + else if (b_element == static_cast (1.0)) + c.elem (i, i) = a_element; + else + c.elem (i, i) = a_element * b_element; + } + + return c; +} + +FloatComplexDiagMatrix +operator * (const FloatDiagMatrix& a, const FloatComplexDiagMatrix& b) +{ + octave_idx_type a_nr = a.rows (); + octave_idx_type a_nc = a.cols (); + + octave_idx_type b_nr = b.rows (); + octave_idx_type b_nc = b.cols (); + + if (a_nc != b_nr) + { + gripe_nonconformant ("operator *", a_nr, a_nc, b_nr, b_nc); + return FloatComplexDiagMatrix (); + } + + if (a_nr == 0 || a_nc == 0 || b_nc == 0) + return FloatComplexDiagMatrix (a_nr, a_nc, 0.0); + + FloatComplexDiagMatrix c (a_nr, b_nc); + + octave_idx_type len = a_nr < b_nc ? a_nr : b_nc; + + for (octave_idx_type i = 0; i < len; i++) + { + float a_element = a.elem (i, i); + FloatComplex b_element = b.elem (i, i); + + if (a_element == static_cast (0.0) || b_element == static_cast (0.0)) + c.elem (i, i) = 0; + else if (a_element == static_cast (1.0)) + c.elem (i, i) = b_element; + else if (b_element == static_cast (1.0)) + c.elem (i, i) = a_element; + else + c.elem (i, i) = a_element * b_element; + } + + return c; +} + +// other operations + +FloatComplexColumnVector +FloatComplexDiagMatrix::diag (octave_idx_type k) const +{ + octave_idx_type nnr = rows (); + octave_idx_type nnc = cols (); + if (k > 0) + nnc -= k; + else if (k < 0) + nnr += k; + + FloatComplexColumnVector d; + + if (nnr > 0 && nnc > 0) + { + octave_idx_type ndiag = (nnr < nnc) ? nnr : nnc; + + d.resize (ndiag); + + if (k > 0) + { + for (octave_idx_type i = 0; i < ndiag; i++) + d.elem (i) = elem (i, i+k); + } + else if ( k < 0) + { + for (octave_idx_type i = 0; i < ndiag; i++) + d.elem (i) = elem (i-k, i); + } + else + { + for (octave_idx_type i = 0; i < ndiag; i++) + d.elem (i) = elem (i, i); + } + } + else + (*current_liboctave_error_handler) + ("diag: requested diagonal out of range"); + + return d; +} + +// i/o + +std::ostream& +operator << (std::ostream& os, const FloatComplexDiagMatrix& a) +{ + FloatComplex ZERO (0.0); +// int field_width = os.precision () + 7; + for (octave_idx_type i = 0; i < a.rows (); i++) + { + for (octave_idx_type j = 0; j < a.cols (); j++) + { + if (i == j) + os << " " /* setw (field_width) */ << a.elem (i, i); + else + os << " " /* setw (field_width) */ << ZERO; + } + os << "\n"; + } + return os; +} + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 liboctave/fCDiagMatrix.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/fCDiagMatrix.h Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,148 @@ +/* + +Copyright (C) 1994, 1995, 1996, 1997, 2000, 2002, 2004, 2005, 2007 + John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#if !defined (octave_FloatComplexDiagMatrix_h) +#define octave_FloatComplexDiagMatrix_h 1 + +#include "MDiagArray2.h" + +#include "fRowVector.h" +#include "fCRowVector.h" +#include "fColVector.h" +#include "fCColVector.h" + +#include "mx-defs.h" + +class +FloatComplexDiagMatrix : public MDiagArray2 +{ +public: + + FloatComplexDiagMatrix (void) : MDiagArray2 () { } + + FloatComplexDiagMatrix (octave_idx_type r, octave_idx_type c) : MDiagArray2 (r, c) { } + + FloatComplexDiagMatrix (octave_idx_type r, octave_idx_type c, const FloatComplex& val) + : MDiagArray2 (r, c, val) { } + + explicit FloatComplexDiagMatrix (const FloatRowVector& a) + : MDiagArray2 (FloatComplexRowVector (a)) { } + + explicit FloatComplexDiagMatrix (const FloatComplexRowVector& a) + : MDiagArray2 (a) { } + + explicit FloatComplexDiagMatrix (const FloatColumnVector& a) + : MDiagArray2 (FloatComplexColumnVector (a)) { } + + explicit FloatComplexDiagMatrix (const FloatComplexColumnVector& a) + : MDiagArray2 (a) { } + + explicit FloatComplexDiagMatrix (const FloatDiagMatrix& a); + + FloatComplexDiagMatrix (const MDiagArray2& a) + : MDiagArray2 (a) { } + + FloatComplexDiagMatrix (const FloatComplexDiagMatrix& a) + : MDiagArray2 (a) { } + + FloatComplexDiagMatrix& operator = (const FloatComplexDiagMatrix& a) + { + MDiagArray2::operator = (a); + return *this; + } + + bool operator == (const FloatComplexDiagMatrix& a) const; + bool operator != (const FloatComplexDiagMatrix& a) const; + + FloatComplexDiagMatrix& fill (float val); + FloatComplexDiagMatrix& fill (const FloatComplex& val); + FloatComplexDiagMatrix& fill (float val, octave_idx_type beg, octave_idx_type end); + FloatComplexDiagMatrix& fill (const FloatComplex& val, octave_idx_type beg, octave_idx_type end); + FloatComplexDiagMatrix& fill (const FloatColumnVector& a); + FloatComplexDiagMatrix& fill (const FloatComplexColumnVector& a); + FloatComplexDiagMatrix& fill (const FloatRowVector& a); + FloatComplexDiagMatrix& fill (const FloatComplexRowVector& a); + FloatComplexDiagMatrix& fill (const FloatColumnVector& a, octave_idx_type beg); + FloatComplexDiagMatrix& fill (const FloatComplexColumnVector& a, octave_idx_type beg); + FloatComplexDiagMatrix& fill (const FloatRowVector& a, octave_idx_type beg); + FloatComplexDiagMatrix& fill (const FloatComplexRowVector& a, octave_idx_type beg); + + FloatComplexDiagMatrix hermitian (void) const { return MDiagArray2::hermitian (std::conj); } + FloatComplexDiagMatrix transpose (void) const { return MDiagArray2::transpose(); } + + friend FloatComplexDiagMatrix conj (const FloatComplexDiagMatrix& a); + + // resize is the destructive analog for this one + + FloatComplexMatrix extract (octave_idx_type r1, octave_idx_type c1, octave_idx_type r2, octave_idx_type c2) const; + + // extract row or column i + + FloatComplexRowVector row (octave_idx_type i) const; + FloatComplexRowVector row (char *s) const; + + FloatComplexColumnVector column (octave_idx_type i) const; + FloatComplexColumnVector column (char *s) const; + + FloatComplexDiagMatrix inverse (int& info) const; + FloatComplexDiagMatrix inverse (void) const; + + // diagonal matrix by diagonal matrix -> diagonal matrix operations + + FloatComplexDiagMatrix& operator += (const FloatDiagMatrix& a); + FloatComplexDiagMatrix& operator -= (const FloatDiagMatrix& a); + + // other operations + + FloatComplexColumnVector diag (octave_idx_type k = 0) const; + + // i/o + + friend std::ostream& operator << (std::ostream& os, const FloatComplexDiagMatrix& a); + +private: + + FloatComplexDiagMatrix (FloatComplex *d, octave_idx_type nr, octave_idx_type nc) + : MDiagArray2 (d, nr, nc) { } +}; + +// diagonal matrix by diagonal matrix -> diagonal matrix operations + +FloatComplexDiagMatrix +operator * (const FloatComplexDiagMatrix& a, const FloatComplexDiagMatrix& b); + +FloatComplexDiagMatrix +operator * (const FloatComplexDiagMatrix& a, const FloatDiagMatrix& b); + +FloatComplexDiagMatrix +operator * (const FloatDiagMatrix& a, const FloatComplexDiagMatrix& b); + +MDIAGARRAY2_FORWARD_DEFS (MDiagArray2, FloatComplexDiagMatrix, FloatComplex) + +#endif + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 liboctave/fCMatrix.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/fCMatrix.cc Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,4071 @@ +// Matrix manipulations. +/* + +Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, + 2003, 2004, 2005, 2006, 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +#include +#include + +// FIXME +#ifdef HAVE_SYS_TYPES_H +#include +#endif + +#include "Array-util.h" +#include "fCMatrix.h" +#include "fCmplxDET.h" +#include "fCmplxSCHUR.h" +#include "fCmplxSVD.h" +#include "fCmplxCHOL.h" +#include "f77-fcn.h" +#include "functor.h" +#include "lo-error.h" +#include "lo-ieee.h" +#include "lo-mappers.h" +#include "lo-utils.h" +#include "mx-base.h" +#include "mx-fcm-fdm.h" +#include "mx-fdm-fcm.h" +#include "mx-fcm-fs.h" +#include "mx-inlines.cc" +#include "oct-cmplx.h" + +#if defined (HAVE_FFTW3) +#include "oct-fftw.h" +#endif + +// Fortran functions we call. + +extern "C" +{ + F77_RET_T + F77_FUNC (xilaenv, XILAENV) (const octave_idx_type&, F77_CONST_CHAR_ARG_DECL, + F77_CONST_CHAR_ARG_DECL, + const octave_idx_type&, const octave_idx_type&, + const octave_idx_type&, const octave_idx_type&, + octave_idx_type& + F77_CHAR_ARG_LEN_DECL F77_CHAR_ARG_LEN_DECL); + + F77_RET_T + F77_FUNC (cgebal, CGEBAL) (F77_CONST_CHAR_ARG_DECL, + const octave_idx_type&, FloatComplex*, const octave_idx_type&, octave_idx_type&, + octave_idx_type&, float*, octave_idx_type& + F77_CHAR_ARG_LEN_DECL); + + F77_RET_T + F77_FUNC (sgebak, SGEBAK) (F77_CONST_CHAR_ARG_DECL, + F77_CONST_CHAR_ARG_DECL, + const octave_idx_type&, const octave_idx_type&, const octave_idx_type&, float*, + const octave_idx_type&, float*, const octave_idx_type&, octave_idx_type& + F77_CHAR_ARG_LEN_DECL + F77_CHAR_ARG_LEN_DECL); + + F77_RET_T + F77_FUNC (cgemm, CGEMM) (F77_CONST_CHAR_ARG_DECL, + F77_CONST_CHAR_ARG_DECL, + const octave_idx_type&, const octave_idx_type&, const octave_idx_type&, + const FloatComplex&, const FloatComplex*, const octave_idx_type&, + const FloatComplex*, const octave_idx_type&, const FloatComplex&, + FloatComplex*, const octave_idx_type& + F77_CHAR_ARG_LEN_DECL + F77_CHAR_ARG_LEN_DECL); + + F77_RET_T + F77_FUNC (cgemv, CGEMV) (F77_CONST_CHAR_ARG_DECL, + const octave_idx_type&, const octave_idx_type&, const FloatComplex&, + const FloatComplex*, const octave_idx_type&, const FloatComplex*, + const octave_idx_type&, const FloatComplex&, FloatComplex*, const octave_idx_type& + F77_CHAR_ARG_LEN_DECL); + + F77_RET_T + F77_FUNC (xcdotu, XCDOTU) (const octave_idx_type&, const FloatComplex*, const octave_idx_type&, + const FloatComplex*, const octave_idx_type&, FloatComplex&); + + F77_RET_T + F77_FUNC (cgetrf, CGETRF) (const octave_idx_type&, const octave_idx_type&, FloatComplex*, const octave_idx_type&, + octave_idx_type*, octave_idx_type&); + + F77_RET_T + F77_FUNC (cgetrs, CGETRS) (F77_CONST_CHAR_ARG_DECL, + const octave_idx_type&, const octave_idx_type&, FloatComplex*, const octave_idx_type&, + const octave_idx_type*, FloatComplex*, const octave_idx_type&, octave_idx_type& + F77_CHAR_ARG_LEN_DECL); + + F77_RET_T + F77_FUNC (cgetri, CGETRI) (const octave_idx_type&, FloatComplex*, const octave_idx_type&, const octave_idx_type*, + FloatComplex*, const octave_idx_type&, octave_idx_type&); + + F77_RET_T + F77_FUNC (cgecon, CGECON) (F77_CONST_CHAR_ARG_DECL, + const octave_idx_type&, FloatComplex*, + const octave_idx_type&, const float&, float&, + FloatComplex*, float*, octave_idx_type& + F77_CHAR_ARG_LEN_DECL); + + F77_RET_T + F77_FUNC (cgelsy, CGELSY) (const octave_idx_type&, const octave_idx_type&, const octave_idx_type&, + FloatComplex*, const octave_idx_type&, FloatComplex*, + const octave_idx_type&, octave_idx_type*, float&, octave_idx_type&, + FloatComplex*, const octave_idx_type&, float*, octave_idx_type&); + + F77_RET_T + F77_FUNC (cgelsd, CGELSD) (const octave_idx_type&, const octave_idx_type&, const octave_idx_type&, + FloatComplex*, const octave_idx_type&, FloatComplex*, + const octave_idx_type&, float*, float&, octave_idx_type&, + FloatComplex*, const octave_idx_type&, float*, + octave_idx_type*, octave_idx_type&); + + F77_RET_T + F77_FUNC (cpotrf, CPOTRF) (F77_CONST_CHAR_ARG_DECL, const octave_idx_type&, + FloatComplex*, const octave_idx_type&, + octave_idx_type& F77_CHAR_ARG_LEN_DECL); + + F77_RET_T + F77_FUNC (cpocon, CPOCON) (F77_CONST_CHAR_ARG_DECL, const octave_idx_type&, + FloatComplex*, const octave_idx_type&, const float&, + float&, FloatComplex*, float*, + octave_idx_type& F77_CHAR_ARG_LEN_DECL); + + F77_RET_T + F77_FUNC (cpotrs, CPOTRS) (F77_CONST_CHAR_ARG_DECL, const octave_idx_type&, + const octave_idx_type&, const FloatComplex*, + const octave_idx_type&, FloatComplex*, + const octave_idx_type&, octave_idx_type& + F77_CHAR_ARG_LEN_DECL); + + F77_RET_T + F77_FUNC (ctrtri, CTRTRI) (F77_CONST_CHAR_ARG_DECL, F77_CONST_CHAR_ARG_DECL, + const octave_idx_type&, const FloatComplex*, + const octave_idx_type&, octave_idx_type& + F77_CHAR_ARG_LEN_DECL + F77_CHAR_ARG_LEN_DECL); + + F77_RET_T + F77_FUNC (ctrcon, CTRCON) (F77_CONST_CHAR_ARG_DECL, F77_CONST_CHAR_ARG_DECL, + F77_CONST_CHAR_ARG_DECL, const octave_idx_type&, + const FloatComplex*, const octave_idx_type&, float&, + FloatComplex*, float*, octave_idx_type& + F77_CHAR_ARG_LEN_DECL + F77_CHAR_ARG_LEN_DECL + F77_CHAR_ARG_LEN_DECL); + + F77_RET_T + F77_FUNC (ctrtrs, CTRTRS) (F77_CONST_CHAR_ARG_DECL, F77_CONST_CHAR_ARG_DECL, + F77_CONST_CHAR_ARG_DECL, const octave_idx_type&, + const octave_idx_type&, const FloatComplex*, + const octave_idx_type&, FloatComplex*, + const octave_idx_type&, octave_idx_type& + F77_CHAR_ARG_LEN_DECL + F77_CHAR_ARG_LEN_DECL + F77_CHAR_ARG_LEN_DECL); + + F77_RET_T + F77_FUNC (cffti, CFFTI) (const octave_idx_type&, FloatComplex*); + + F77_RET_T + F77_FUNC (cfftf, CFFTF) (const octave_idx_type&, FloatComplex*, FloatComplex*); + + F77_RET_T + F77_FUNC (cfftb, CFFTB) (const octave_idx_type&, FloatComplex*, FloatComplex*); + + F77_RET_T + F77_FUNC (clartg, CLARTG) (const FloatComplex&, const FloatComplex&, + float&, FloatComplex&, FloatComplex&); + + F77_RET_T + F77_FUNC (ctrsyl, CTRSYL) (F77_CONST_CHAR_ARG_DECL, + F77_CONST_CHAR_ARG_DECL, + const octave_idx_type&, const octave_idx_type&, const octave_idx_type&, + const FloatComplex*, const octave_idx_type&, + const FloatComplex*, const octave_idx_type&, + const FloatComplex*, const octave_idx_type&, float&, octave_idx_type& + F77_CHAR_ARG_LEN_DECL + F77_CHAR_ARG_LEN_DECL); + + F77_RET_T + F77_FUNC (xclange, XCLANGE) (F77_CONST_CHAR_ARG_DECL, + const octave_idx_type&, const octave_idx_type&, const FloatComplex*, + const octave_idx_type&, float*, float& + F77_CHAR_ARG_LEN_DECL); +} + +static const FloatComplex FloatComplex_NaN_result (octave_Float_NaN, octave_Float_NaN); + +// FloatComplex Matrix class + +FloatComplexMatrix::FloatComplexMatrix (const FloatMatrix& a) + : MArray2 (a.rows (), a.cols ()) +{ + for (octave_idx_type j = 0; j < cols (); j++) + for (octave_idx_type i = 0; i < rows (); i++) + elem (i, j) = a.elem (i, j); +} + +FloatComplexMatrix::FloatComplexMatrix (const FloatRowVector& rv) + : MArray2 (1, rv.length (), 0.0) +{ + for (octave_idx_type i = 0; i < rv.length (); i++) + elem (0, i) = rv.elem (i); +} + +FloatComplexMatrix::FloatComplexMatrix (const FloatColumnVector& cv) + : MArray2 (cv.length (), 1, 0.0) +{ + for (octave_idx_type i = 0; i < cv.length (); i++) + elem (i, 0) = cv.elem (i); +} + +FloatComplexMatrix::FloatComplexMatrix (const FloatDiagMatrix& a) + : MArray2 (a.rows (), a.cols (), 0.0) +{ + for (octave_idx_type i = 0; i < a.length (); i++) + elem (i, i) = a.elem (i, i); +} + +FloatComplexMatrix::FloatComplexMatrix (const FloatComplexRowVector& rv) + : MArray2 (1, rv.length (), 0.0) +{ + for (octave_idx_type i = 0; i < rv.length (); i++) + elem (0, i) = rv.elem (i); +} + +FloatComplexMatrix::FloatComplexMatrix (const FloatComplexColumnVector& cv) + : MArray2 (cv.length (), 1, 0.0) +{ + for (octave_idx_type i = 0; i < cv.length (); i++) + elem (i, 0) = cv.elem (i); +} + +FloatComplexMatrix::FloatComplexMatrix (const FloatComplexDiagMatrix& a) + : MArray2 (a.rows (), a.cols (), 0.0) +{ + for (octave_idx_type i = 0; i < a.length (); i++) + elem (i, i) = a.elem (i, i); +} + +// FIXME -- could we use a templated mixed-type copy function +// here? + +FloatComplexMatrix::FloatComplexMatrix (const boolMatrix& a) + : MArray2 (a.rows (), a.cols (), 0.0) +{ + for (octave_idx_type i = 0; i < a.rows (); i++) + for (octave_idx_type j = 0; j < a.cols (); j++) + elem (i, j) = a.elem (i, j); +} + +FloatComplexMatrix::FloatComplexMatrix (const charMatrix& a) + : MArray2 (a.rows (), a.cols (), 0.0) +{ + for (octave_idx_type i = 0; i < a.rows (); i++) + for (octave_idx_type j = 0; j < a.cols (); j++) + elem (i, j) = a.elem (i, j); +} + +bool +FloatComplexMatrix::operator == (const FloatComplexMatrix& a) const +{ + if (rows () != a.rows () || cols () != a.cols ()) + return false; + + return mx_inline_equal (data (), a.data (), length ()); +} + +bool +FloatComplexMatrix::operator != (const FloatComplexMatrix& a) const +{ + return !(*this == a); +} + +bool +FloatComplexMatrix::is_hermitian (void) const +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + if (is_square () && nr > 0) + { + for (octave_idx_type i = 0; i < nr; i++) + for (octave_idx_type j = i; j < nc; j++) + if (elem (i, j) != conj (elem (j, i))) + return false; + + return true; + } + + return false; +} + +// destructive insert/delete/reorder operations + +FloatComplexMatrix& +FloatComplexMatrix::insert (const FloatMatrix& a, octave_idx_type r, octave_idx_type c) +{ + octave_idx_type a_nr = a.rows (); + octave_idx_type a_nc = a.cols (); + + if (r < 0 || r + a_nr > rows () || c < 0 || c + a_nc > cols ()) + { + (*current_liboctave_error_handler) ("range error for insert"); + return *this; + } + + if (a_nr >0 && a_nc > 0) + { + make_unique (); + + for (octave_idx_type j = 0; j < a_nc; j++) + for (octave_idx_type i = 0; i < a_nr; i++) + xelem (r+i, c+j) = a.elem (i, j); + } + + return *this; +} + +FloatComplexMatrix& +FloatComplexMatrix::insert (const FloatRowVector& a, octave_idx_type r, octave_idx_type c) +{ + octave_idx_type a_len = a.length (); + + if (r < 0 || r >= rows () || c < 0 || c + a_len > cols ()) + { + (*current_liboctave_error_handler) ("range error for insert"); + return *this; + } + + if (a_len > 0) + { + make_unique (); + + for (octave_idx_type i = 0; i < a_len; i++) + xelem (r, c+i) = a.elem (i); + } + + return *this; +} + +FloatComplexMatrix& +FloatComplexMatrix::insert (const FloatColumnVector& a, octave_idx_type r, octave_idx_type c) +{ + octave_idx_type a_len = a.length (); + + if (r < 0 || r + a_len > rows () || c < 0 || c >= cols ()) + { + (*current_liboctave_error_handler) ("range error for insert"); + return *this; + } + + if (a_len > 0) + { + make_unique (); + + for (octave_idx_type i = 0; i < a_len; i++) + xelem (r+i, c) = a.elem (i); + } + + return *this; +} + +FloatComplexMatrix& +FloatComplexMatrix::insert (const FloatDiagMatrix& a, octave_idx_type r, octave_idx_type c) +{ + octave_idx_type a_nr = a.rows (); + octave_idx_type a_nc = a.cols (); + + if (r < 0 || r + a_nr > rows () || c < 0 || c + a_nc > cols ()) + { + (*current_liboctave_error_handler) ("range error for insert"); + return *this; + } + + fill (0.0, r, c, r + a_nr - 1, c + a_nc - 1); + + octave_idx_type a_len = a.length (); + + if (a_len > 0) + { + make_unique (); + + for (octave_idx_type i = 0; i < a_len; i++) + xelem (r+i, c+i) = a.elem (i, i); + } + + return *this; +} + +FloatComplexMatrix& +FloatComplexMatrix::insert (const FloatComplexMatrix& a, octave_idx_type r, octave_idx_type c) +{ + Array2::insert (a, r, c); + return *this; +} + +FloatComplexMatrix& +FloatComplexMatrix::insert (const FloatComplexRowVector& a, octave_idx_type r, octave_idx_type c) +{ + octave_idx_type a_len = a.length (); + if (r < 0 || r >= rows () || c < 0 || c + a_len > cols ()) + { + (*current_liboctave_error_handler) ("range error for insert"); + return *this; + } + + for (octave_idx_type i = 0; i < a_len; i++) + elem (r, c+i) = a.elem (i); + + return *this; +} + +FloatComplexMatrix& +FloatComplexMatrix::insert (const FloatComplexColumnVector& a, octave_idx_type r, octave_idx_type c) +{ + octave_idx_type a_len = a.length (); + + if (r < 0 || r + a_len > rows () || c < 0 || c >= cols ()) + { + (*current_liboctave_error_handler) ("range error for insert"); + return *this; + } + + if (a_len > 0) + { + make_unique (); + + for (octave_idx_type i = 0; i < a_len; i++) + xelem (r+i, c) = a.elem (i); + } + + return *this; +} + +FloatComplexMatrix& +FloatComplexMatrix::insert (const FloatComplexDiagMatrix& a, octave_idx_type r, octave_idx_type c) +{ + octave_idx_type a_nr = a.rows (); + octave_idx_type a_nc = a.cols (); + + if (r < 0 || r + a_nr > rows () || c < 0 || c + a_nc > cols ()) + { + (*current_liboctave_error_handler) ("range error for insert"); + return *this; + } + + fill (0.0, r, c, r + a_nr - 1, c + a_nc - 1); + + octave_idx_type a_len = a.length (); + + if (a_len > 0) + { + make_unique (); + + for (octave_idx_type i = 0; i < a_len; i++) + xelem (r+i, c+i) = a.elem (i, i); + } + + return *this; +} + +FloatComplexMatrix& +FloatComplexMatrix::fill (float val) +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + if (nr > 0 && nc > 0) + { + make_unique (); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + xelem (i, j) = val; + } + + return *this; +} + +FloatComplexMatrix& +FloatComplexMatrix::fill (const FloatComplex& val) +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + if (nr > 0 && nc > 0) + { + make_unique (); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + xelem (i, j) = val; + } + + return *this; +} + +FloatComplexMatrix& +FloatComplexMatrix::fill (float val, octave_idx_type r1, octave_idx_type c1, octave_idx_type r2, octave_idx_type c2) +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + if (r1 < 0 || r2 < 0 || c1 < 0 || c2 < 0 + || r1 >= nr || r2 >= nr || c1 >= nc || c2 >= nc) + { + (*current_liboctave_error_handler) ("range error for fill"); + return *this; + } + + if (r1 > r2) { octave_idx_type tmp = r1; r1 = r2; r2 = tmp; } + if (c1 > c2) { octave_idx_type tmp = c1; c1 = c2; c2 = tmp; } + + if (r2 >= r1 && c2 >= c1) + { + make_unique (); + + for (octave_idx_type j = c1; j <= c2; j++) + for (octave_idx_type i = r1; i <= r2; i++) + xelem (i, j) = val; + } + + return *this; +} + +FloatComplexMatrix& +FloatComplexMatrix::fill (const FloatComplex& val, octave_idx_type r1, octave_idx_type c1, octave_idx_type r2, octave_idx_type c2) +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + if (r1 < 0 || r2 < 0 || c1 < 0 || c2 < 0 + || r1 >= nr || r2 >= nr || c1 >= nc || c2 >= nc) + { + (*current_liboctave_error_handler) ("range error for fill"); + return *this; + } + + if (r1 > r2) { octave_idx_type tmp = r1; r1 = r2; r2 = tmp; } + if (c1 > c2) { octave_idx_type tmp = c1; c1 = c2; c2 = tmp; } + + if (r2 >= r1 && c2 >=c1) + { + make_unique (); + + for (octave_idx_type j = c1; j <= c2; j++) + for (octave_idx_type i = r1; i <= r2; i++) + xelem (i, j) = val; + } + + return *this; +} + +FloatComplexMatrix +FloatComplexMatrix::append (const FloatMatrix& a) const +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + if (nr != a.rows ()) + { + (*current_liboctave_error_handler) ("row dimension mismatch for append"); + return *this; + } + + octave_idx_type nc_insert = nc; + FloatComplexMatrix retval (nr, nc + a.cols ()); + retval.insert (*this, 0, 0); + retval.insert (a, 0, nc_insert); + return retval; +} + +FloatComplexMatrix +FloatComplexMatrix::append (const FloatRowVector& a) const +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + if (nr != 1) + { + (*current_liboctave_error_handler) ("row dimension mismatch for append"); + return *this; + } + + octave_idx_type nc_insert = nc; + FloatComplexMatrix retval (nr, nc + a.length ()); + retval.insert (*this, 0, 0); + retval.insert (a, 0, nc_insert); + return retval; +} + +FloatComplexMatrix +FloatComplexMatrix::append (const FloatColumnVector& a) const +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + if (nr != a.length ()) + { + (*current_liboctave_error_handler) ("row dimension mismatch for append"); + return *this; + } + + octave_idx_type nc_insert = nc; + FloatComplexMatrix retval (nr, nc + 1); + retval.insert (*this, 0, 0); + retval.insert (a, 0, nc_insert); + return retval; +} + +FloatComplexMatrix +FloatComplexMatrix::append (const FloatDiagMatrix& a) const +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + if (nr != a.rows ()) + { + (*current_liboctave_error_handler) ("row dimension mismatch for append"); + return *this; + } + + octave_idx_type nc_insert = nc; + FloatComplexMatrix retval (nr, nc + a.cols ()); + retval.insert (*this, 0, 0); + retval.insert (a, 0, nc_insert); + return retval; +} + +FloatComplexMatrix +FloatComplexMatrix::append (const FloatComplexMatrix& a) const +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + if (nr != a.rows ()) + { + (*current_liboctave_error_handler) ("row dimension mismatch for append"); + return *this; + } + + octave_idx_type nc_insert = nc; + FloatComplexMatrix retval (nr, nc + a.cols ()); + retval.insert (*this, 0, 0); + retval.insert (a, 0, nc_insert); + return retval; +} + +FloatComplexMatrix +FloatComplexMatrix::append (const FloatComplexRowVector& a) const +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + if (nr != 1) + { + (*current_liboctave_error_handler) ("row dimension mismatch for append"); + return *this; + } + + octave_idx_type nc_insert = nc; + FloatComplexMatrix retval (nr, nc + a.length ()); + retval.insert (*this, 0, 0); + retval.insert (a, 0, nc_insert); + return retval; +} + +FloatComplexMatrix +FloatComplexMatrix::append (const FloatComplexColumnVector& a) const +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + if (nr != a.length ()) + { + (*current_liboctave_error_handler) ("row dimension mismatch for append"); + return *this; + } + + octave_idx_type nc_insert = nc; + FloatComplexMatrix retval (nr, nc + 1); + retval.insert (*this, 0, 0); + retval.insert (a, 0, nc_insert); + return retval; +} + +FloatComplexMatrix +FloatComplexMatrix::append (const FloatComplexDiagMatrix& a) const +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + if (nr != a.rows ()) + { + (*current_liboctave_error_handler) ("row dimension mismatch for append"); + return *this; + } + + octave_idx_type nc_insert = nc; + FloatComplexMatrix retval (nr, nc + a.cols ()); + retval.insert (*this, 0, 0); + retval.insert (a, 0, nc_insert); + return retval; +} + +FloatComplexMatrix +FloatComplexMatrix::stack (const FloatMatrix& a) const +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + if (nc != a.cols ()) + { + (*current_liboctave_error_handler) + ("column dimension mismatch for stack"); + return *this; + } + + octave_idx_type nr_insert = nr; + FloatComplexMatrix retval (nr + a.rows (), nc); + retval.insert (*this, 0, 0); + retval.insert (a, nr_insert, 0); + return retval; +} + +FloatComplexMatrix +FloatComplexMatrix::stack (const FloatRowVector& a) const +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + if (nc != a.length ()) + { + (*current_liboctave_error_handler) + ("column dimension mismatch for stack"); + return *this; + } + + octave_idx_type nr_insert = nr; + FloatComplexMatrix retval (nr + 1, nc); + retval.insert (*this, 0, 0); + retval.insert (a, nr_insert, 0); + return retval; +} + +FloatComplexMatrix +FloatComplexMatrix::stack (const FloatColumnVector& a) const +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + if (nc != 1) + { + (*current_liboctave_error_handler) + ("column dimension mismatch for stack"); + return *this; + } + + octave_idx_type nr_insert = nr; + FloatComplexMatrix retval (nr + a.length (), nc); + retval.insert (*this, 0, 0); + retval.insert (a, nr_insert, 0); + return retval; +} + +FloatComplexMatrix +FloatComplexMatrix::stack (const FloatDiagMatrix& a) const +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + if (nc != a.cols ()) + { + (*current_liboctave_error_handler) + ("column dimension mismatch for stack"); + return *this; + } + + octave_idx_type nr_insert = nr; + FloatComplexMatrix retval (nr + a.rows (), nc); + retval.insert (*this, 0, 0); + retval.insert (a, nr_insert, 0); + return retval; +} + +FloatComplexMatrix +FloatComplexMatrix::stack (const FloatComplexMatrix& a) const +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + if (nc != a.cols ()) + { + (*current_liboctave_error_handler) + ("column dimension mismatch for stack"); + return *this; + } + + octave_idx_type nr_insert = nr; + FloatComplexMatrix retval (nr + a.rows (), nc); + retval.insert (*this, 0, 0); + retval.insert (a, nr_insert, 0); + return retval; +} + +FloatComplexMatrix +FloatComplexMatrix::stack (const FloatComplexRowVector& a) const +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + if (nc != a.length ()) + { + (*current_liboctave_error_handler) + ("column dimension mismatch for stack"); + return *this; + } + + octave_idx_type nr_insert = nr; + FloatComplexMatrix retval (nr + 1, nc); + retval.insert (*this, 0, 0); + retval.insert (a, nr_insert, 0); + return retval; +} + +FloatComplexMatrix +FloatComplexMatrix::stack (const FloatComplexColumnVector& a) const +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + if (nc != 1) + { + (*current_liboctave_error_handler) + ("column dimension mismatch for stack"); + return *this; + } + + octave_idx_type nr_insert = nr; + FloatComplexMatrix retval (nr + a.length (), nc); + retval.insert (*this, 0, 0); + retval.insert (a, nr_insert, 0); + return retval; +} + +FloatComplexMatrix +FloatComplexMatrix::stack (const FloatComplexDiagMatrix& a) const +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + if (nc != a.cols ()) + { + (*current_liboctave_error_handler) + ("column dimension mismatch for stack"); + return *this; + } + + octave_idx_type nr_insert = nr; + FloatComplexMatrix retval (nr + a.rows (), nc); + retval.insert (*this, 0, 0); + retval.insert (a, nr_insert, 0); + return retval; +} + +FloatComplexMatrix +conj (const FloatComplexMatrix& a) +{ + octave_idx_type a_len = a.length (); + FloatComplexMatrix retval; + if (a_len > 0) + retval = FloatComplexMatrix (mx_inline_conj_dup (a.data (), a_len), + a.rows (), a.cols ()); + return retval; +} + +// resize is the destructive equivalent for this one + +FloatComplexMatrix +FloatComplexMatrix::extract (octave_idx_type r1, octave_idx_type c1, octave_idx_type r2, octave_idx_type c2) const +{ + if (r1 > r2) { octave_idx_type tmp = r1; r1 = r2; r2 = tmp; } + if (c1 > c2) { octave_idx_type tmp = c1; c1 = c2; c2 = tmp; } + + octave_idx_type new_r = r2 - r1 + 1; + octave_idx_type new_c = c2 - c1 + 1; + + FloatComplexMatrix result (new_r, new_c); + + for (octave_idx_type j = 0; j < new_c; j++) + for (octave_idx_type i = 0; i < new_r; i++) + result.xelem (i, j) = elem (r1+i, c1+j); + + return result; +} + +FloatComplexMatrix +FloatComplexMatrix::extract_n (octave_idx_type r1, octave_idx_type c1, octave_idx_type nr, octave_idx_type nc) const +{ + FloatComplexMatrix result (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + result.xelem (i, j) = elem (r1+i, c1+j); + + return result; +} + +// extract row or column i. + +FloatComplexRowVector +FloatComplexMatrix::row (octave_idx_type i) const +{ + octave_idx_type nc = cols (); + if (i < 0 || i >= rows ()) + { + (*current_liboctave_error_handler) ("invalid row selection"); + return FloatComplexRowVector (); + } + + FloatComplexRowVector retval (nc); + for (octave_idx_type j = 0; j < cols (); j++) + retval.xelem (j) = elem (i, j); + + return retval; +} + +FloatComplexColumnVector +FloatComplexMatrix::column (octave_idx_type i) const +{ + octave_idx_type nr = rows (); + if (i < 0 || i >= cols ()) + { + (*current_liboctave_error_handler) ("invalid column selection"); + return FloatComplexColumnVector (); + } + + FloatComplexColumnVector retval (nr); + for (octave_idx_type j = 0; j < nr; j++) + retval.xelem (j) = elem (j, i); + + return retval; +} + +FloatComplexMatrix +FloatComplexMatrix::inverse (void) const +{ + octave_idx_type info; + float rcond; + MatrixType mattype (*this); + return inverse (mattype, info, rcond, 0, 0); +} + +FloatComplexMatrix +FloatComplexMatrix::inverse (octave_idx_type& info) const +{ + float rcond; + MatrixType mattype (*this); + return inverse (mattype, info, rcond, 0, 0); +} + +FloatComplexMatrix +FloatComplexMatrix::inverse (octave_idx_type& info, float& rcond, int force, + int calc_cond) const +{ + MatrixType mattype (*this); + return inverse (mattype, info, rcond, force, calc_cond); +} + +FloatComplexMatrix +FloatComplexMatrix::inverse (MatrixType &mattype) const +{ + octave_idx_type info; + float rcond; + return inverse (mattype, info, rcond, 0, 0); +} + +FloatComplexMatrix +FloatComplexMatrix::inverse (MatrixType &mattype, octave_idx_type& info) const +{ + float rcond; + return inverse (mattype, info, rcond, 0, 0); +} + +FloatComplexMatrix +FloatComplexMatrix::tinverse (MatrixType &mattype, octave_idx_type& info, + float& rcond, int force, int calc_cond) const +{ + FloatComplexMatrix retval; + + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + if (nr != nc || nr == 0 || nc == 0) + (*current_liboctave_error_handler) ("inverse requires square matrix"); + else + { + int typ = mattype.type (); + char uplo = (typ == MatrixType::Lower ? 'L' : 'U'); + char udiag = 'N'; + retval = *this; + FloatComplex *tmp_data = retval.fortran_vec (); + + F77_XFCN (ctrtri, CTRTRI, (F77_CONST_CHAR_ARG2 (&uplo, 1), + F77_CONST_CHAR_ARG2 (&udiag, 1), + nr, tmp_data, nr, info + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1))); + + // Throw-away extra info LAPACK gives so as to not change output. + rcond = 0.0; + if (info != 0) + info = -1; + else if (calc_cond) + { + octave_idx_type ztrcon_info = 0; + char job = '1'; + + OCTAVE_LOCAL_BUFFER (FloatComplex, cwork, 2*nr); + OCTAVE_LOCAL_BUFFER (float, rwork, nr); + + F77_XFCN (ctrcon, CTRCON, (F77_CONST_CHAR_ARG2 (&job, 1), + F77_CONST_CHAR_ARG2 (&uplo, 1), + F77_CONST_CHAR_ARG2 (&udiag, 1), + nr, tmp_data, nr, rcond, + cwork, rwork, ztrcon_info + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1))); + + if (ztrcon_info != 0) + info = -1; + } + + if (info == -1 && ! force) + retval = *this; // Restore matrix contents. + } + + return retval; +} + +FloatComplexMatrix +FloatComplexMatrix::finverse (MatrixType &mattype, octave_idx_type& info, + float& rcond, int force, int calc_cond) const +{ + FloatComplexMatrix retval; + + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + if (nr != nc) + (*current_liboctave_error_handler) ("inverse requires square matrix"); + else + { + Array ipvt (nr); + octave_idx_type *pipvt = ipvt.fortran_vec (); + + retval = *this; + FloatComplex *tmp_data = retval.fortran_vec (); + + Array z(1); + octave_idx_type lwork = -1; + + // Query the optimum work array size. + + F77_XFCN (cgetri, CGETRI, (nc, tmp_data, nr, pipvt, + z.fortran_vec (), lwork, info)); + + lwork = static_cast (std::real(z(0))); + lwork = (lwork < 2 *nc ? 2*nc : lwork); + z.resize (lwork); + FloatComplex *pz = z.fortran_vec (); + + info = 0; + + // Calculate the norm of the matrix, for later use. + float anorm; + if (calc_cond) + anorm = retval.abs().sum().row(static_cast(0)).max(); + + F77_XFCN (cgetrf, CGETRF, (nc, nc, tmp_data, nr, pipvt, info)); + + // Throw-away extra info LAPACK gives so as to not change output. + rcond = 0.0; + if (info != 0) + info = -1; + else if (calc_cond) + { + // Now calculate the condition number for non-singular matrix. + octave_idx_type zgecon_info = 0; + char job = '1'; + Array rz (2 * nc); + float *prz = rz.fortran_vec (); + F77_XFCN (cgecon, CGECON, (F77_CONST_CHAR_ARG2 (&job, 1), + nc, tmp_data, nr, anorm, + rcond, pz, prz, zgecon_info + F77_CHAR_ARG_LEN (1))); + + if (zgecon_info != 0) + info = -1; + } + + if (info == -1 && ! force) + retval = *this; // Restore contents. + else + { + octave_idx_type zgetri_info = 0; + + F77_XFCN (cgetri, CGETRI, (nc, tmp_data, nr, pipvt, + pz, lwork, zgetri_info)); + + if (zgetri_info != 0) + info = -1; + } + + if (info != 0) + mattype.mark_as_rectangular(); + } + + return retval; +} + +FloatComplexMatrix +FloatComplexMatrix::inverse (MatrixType &mattype, octave_idx_type& info, + float& rcond, int force, int calc_cond) const +{ + int typ = mattype.type (false); + FloatComplexMatrix ret; + + if (typ == MatrixType::Unknown) + typ = mattype.type (*this); + + if (typ == MatrixType::Upper || typ == MatrixType::Lower) + ret = tinverse (mattype, info, rcond, force, calc_cond); + else + { + if (mattype.is_hermitian ()) + { + FloatComplexCHOL chol (*this, info, calc_cond); + if (info == 0) + { + if (calc_cond) + rcond = chol.rcond(); + else + rcond = 1.0; + ret = chol.inverse (); + } + else + mattype.mark_as_unsymmetric (); + } + + if (!mattype.is_hermitian ()) + ret = finverse(mattype, info, rcond, force, calc_cond); + + if ((mattype.is_hermitian () || calc_cond) && rcond == 0.) + ret = FloatComplexMatrix (rows (), columns (), FloatComplex (octave_Float_Inf, 0.)); + } + + return ret; +} + +FloatComplexMatrix +FloatComplexMatrix::pseudo_inverse (float tol) const +{ + FloatComplexMatrix retval; + + FloatComplexSVD result (*this, SVD::economy); + + FloatDiagMatrix S = result.singular_values (); + FloatComplexMatrix U = result.left_singular_matrix (); + FloatComplexMatrix V = result.right_singular_matrix (); + + FloatColumnVector sigma = S.diag (); + + octave_idx_type r = sigma.length () - 1; + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + if (tol <= 0.0) + { + if (nr > nc) + tol = nr * sigma.elem (0) * DBL_EPSILON; + else + tol = nc * sigma.elem (0) * DBL_EPSILON; + } + + while (r >= 0 && sigma.elem (r) < tol) + r--; + + if (r < 0) + retval = FloatComplexMatrix (nc, nr, 0.0); + else + { + FloatComplexMatrix Ur = U.extract (0, 0, nr-1, r); + FloatDiagMatrix D = FloatDiagMatrix (sigma.extract (0, r)) . inverse (); + FloatComplexMatrix Vr = V.extract (0, 0, nc-1, r); + retval = Vr * D * Ur.hermitian (); + } + + return retval; +} + +#if defined (HAVE_FFTW3) + +FloatComplexMatrix +FloatComplexMatrix::fourier (void) const +{ + size_t nr = rows (); + size_t nc = cols (); + + FloatComplexMatrix retval (nr, nc); + + size_t npts, nsamples; + + if (nr == 1 || nc == 1) + { + npts = nr > nc ? nr : nc; + nsamples = 1; + } + else + { + npts = nr; + nsamples = nc; + } + + const FloatComplex *in (data ()); + FloatComplex *out (retval.fortran_vec ()); + + octave_fftw::fft (in, out, npts, nsamples); + + return retval; +} + +FloatComplexMatrix +FloatComplexMatrix::ifourier (void) const +{ + size_t nr = rows (); + size_t nc = cols (); + + FloatComplexMatrix retval (nr, nc); + + size_t npts, nsamples; + + if (nr == 1 || nc == 1) + { + npts = nr > nc ? nr : nc; + nsamples = 1; + } + else + { + npts = nr; + nsamples = nc; + } + + const FloatComplex *in (data ()); + FloatComplex *out (retval.fortran_vec ()); + + octave_fftw::ifft (in, out, npts, nsamples); + + return retval; +} + +FloatComplexMatrix +FloatComplexMatrix::fourier2d (void) const +{ + dim_vector dv(rows (), cols ()); + + FloatComplexMatrix retval (rows (), cols ()); + const FloatComplex *in (data ()); + FloatComplex *out (retval.fortran_vec ()); + + octave_fftw::fftNd (in, out, 2, dv); + + return retval; +} + +FloatComplexMatrix +FloatComplexMatrix::ifourier2d (void) const +{ + dim_vector dv(rows (), cols ()); + + FloatComplexMatrix retval (rows (), cols ()); + const FloatComplex *in (data ()); + FloatComplex *out (retval.fortran_vec ()); + + octave_fftw::ifftNd (in, out, 2, dv); + + return retval; +} + +#else + +FloatComplexMatrix +FloatComplexMatrix::fourier (void) const +{ + FloatComplexMatrix retval; + + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + octave_idx_type npts, nsamples; + + if (nr == 1 || nc == 1) + { + npts = nr > nc ? nr : nc; + nsamples = 1; + } + else + { + npts = nr; + nsamples = nc; + } + + octave_idx_type nn = 4*npts+15; + + Array wsave (nn); + FloatComplex *pwsave = wsave.fortran_vec (); + + retval = *this; + FloatComplex *tmp_data = retval.fortran_vec (); + + F77_FUNC (cffti, CFFTI) (npts, pwsave); + + for (octave_idx_type j = 0; j < nsamples; j++) + { + OCTAVE_QUIT; + + F77_FUNC (cfftf, CFFTF) (npts, &tmp_data[npts*j], pwsave); + } + + return retval; +} + +FloatComplexMatrix +FloatComplexMatrix::ifourier (void) const +{ + FloatComplexMatrix retval; + + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + octave_idx_type npts, nsamples; + + if (nr == 1 || nc == 1) + { + npts = nr > nc ? nr : nc; + nsamples = 1; + } + else + { + npts = nr; + nsamples = nc; + } + + octave_idx_type nn = 4*npts+15; + + Array wsave (nn); + FloatComplex *pwsave = wsave.fortran_vec (); + + retval = *this; + FloatComplex *tmp_data = retval.fortran_vec (); + + F77_FUNC (cffti, CFFTI) (npts, pwsave); + + for (octave_idx_type j = 0; j < nsamples; j++) + { + OCTAVE_QUIT; + + F77_FUNC (cfftb, CFFTB) (npts, &tmp_data[npts*j], pwsave); + } + + for (octave_idx_type j = 0; j < npts*nsamples; j++) + tmp_data[j] = tmp_data[j] / static_cast (npts); + + return retval; +} + +FloatComplexMatrix +FloatComplexMatrix::fourier2d (void) const +{ + FloatComplexMatrix retval; + + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + octave_idx_type npts, nsamples; + + if (nr == 1 || nc == 1) + { + npts = nr > nc ? nr : nc; + nsamples = 1; + } + else + { + npts = nr; + nsamples = nc; + } + + octave_idx_type nn = 4*npts+15; + + Array wsave (nn); + FloatComplex *pwsave = wsave.fortran_vec (); + + retval = *this; + FloatComplex *tmp_data = retval.fortran_vec (); + + F77_FUNC (cffti, CFFTI) (npts, pwsave); + + for (octave_idx_type j = 0; j < nsamples; j++) + { + OCTAVE_QUIT; + + F77_FUNC (cfftf, CFFTF) (npts, &tmp_data[npts*j], pwsave); + } + + npts = nc; + nsamples = nr; + nn = 4*npts+15; + + wsave.resize (nn); + pwsave = wsave.fortran_vec (); + + Array tmp (npts); + FloatComplex *prow = tmp.fortran_vec (); + + F77_FUNC (cffti, CFFTI) (npts, pwsave); + + for (octave_idx_type j = 0; j < nsamples; j++) + { + OCTAVE_QUIT; + + for (octave_idx_type i = 0; i < npts; i++) + prow[i] = tmp_data[i*nr + j]; + + F77_FUNC (cfftf, CFFTF) (npts, prow, pwsave); + + for (octave_idx_type i = 0; i < npts; i++) + tmp_data[i*nr + j] = prow[i]; + } + + return retval; +} + +FloatComplexMatrix +FloatComplexMatrix::ifourier2d (void) const +{ + FloatComplexMatrix retval; + + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + octave_idx_type npts, nsamples; + + if (nr == 1 || nc == 1) + { + npts = nr > nc ? nr : nc; + nsamples = 1; + } + else + { + npts = nr; + nsamples = nc; + } + + octave_idx_type nn = 4*npts+15; + + Array wsave (nn); + FloatComplex *pwsave = wsave.fortran_vec (); + + retval = *this; + FloatComplex *tmp_data = retval.fortran_vec (); + + F77_FUNC (cffti, CFFTI) (npts, pwsave); + + for (octave_idx_type j = 0; j < nsamples; j++) + { + OCTAVE_QUIT; + + F77_FUNC (cfftb, CFFTB) (npts, &tmp_data[npts*j], pwsave); + } + + for (octave_idx_type j = 0; j < npts*nsamples; j++) + tmp_data[j] = tmp_data[j] / static_cast (npts); + + npts = nc; + nsamples = nr; + nn = 4*npts+15; + + wsave.resize (nn); + pwsave = wsave.fortran_vec (); + + Array tmp (npts); + FloatComplex *prow = tmp.fortran_vec (); + + F77_FUNC (cffti, CFFTI) (npts, pwsave); + + for (octave_idx_type j = 0; j < nsamples; j++) + { + OCTAVE_QUIT; + + for (octave_idx_type i = 0; i < npts; i++) + prow[i] = tmp_data[i*nr + j]; + + F77_FUNC (cfftb, CFFTB) (npts, prow, pwsave); + + for (octave_idx_type i = 0; i < npts; i++) + tmp_data[i*nr + j] = prow[i] / static_cast (npts); + } + + return retval; +} + +#endif + +FloatComplexDET +FloatComplexMatrix::determinant (void) const +{ + octave_idx_type info; + float rcond; + return determinant (info, rcond, 0); +} + +FloatComplexDET +FloatComplexMatrix::determinant (octave_idx_type& info) const +{ + float rcond; + return determinant (info, rcond, 0); +} + +FloatComplexDET +FloatComplexMatrix::determinant (octave_idx_type& info, float& rcond, int calc_cond) const +{ + FloatComplexDET retval; + + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + if (nr == 0 || nc == 0) + { + retval = FloatComplexDET (1.0, 0); + } + else + { + Array ipvt (nr); + octave_idx_type *pipvt = ipvt.fortran_vec (); + + FloatComplexMatrix atmp = *this; + FloatComplex *tmp_data = atmp.fortran_vec (); + + info = 0; + + // Calculate the norm of the matrix, for later use. + float anorm = 0; + if (calc_cond) + anorm = atmp.abs().sum().row(static_cast(0)).max(); + + F77_XFCN (cgetrf, CGETRF, (nr, nc, tmp_data, nr, pipvt, info)); + + // Throw-away extra info LAPACK gives so as to not change output. + rcond = 0.0; + if (info != 0) + { + info = -1; + retval = FloatComplexDET (); + } + else + { + if (calc_cond) + { + // Now calc the condition number for non-singular matrix. + char job = '1'; + Array z (2*nr); + FloatComplex *pz = z.fortran_vec (); + Array rz (2*nr); + float *prz = rz.fortran_vec (); + + F77_XFCN (cgecon, CGECON, (F77_CONST_CHAR_ARG2 (&job, 1), + nc, tmp_data, nr, anorm, + rcond, pz, prz, info + F77_CHAR_ARG_LEN (1))); + } + + if (info != 0) + { + info = -1; + retval = FloatComplexDET (); + } + else + { + FloatComplex c = 1.0; + int e = 0; + + for (octave_idx_type i = 0; i < nc; i++) + { + if (ipvt(i) != (i+1)) + c = -c; + + c *= atmp(i,i); + + if (c == static_cast (0.0)) + break; + + while (std::abs(c) < 0.5) + { + c *= 2.0; + e--; + } + + while (std::abs(c) >= 2.0) + { + c /= 2.0; + e++; + } + } + + retval = FloatComplexDET (c, e); + } + } + } + + return retval; +} + +FloatComplexMatrix +FloatComplexMatrix::utsolve (MatrixType &mattype, const FloatComplexMatrix& b, + octave_idx_type& info, float& rcond, + solve_singularity_handler sing_handler, + bool calc_cond) const +{ + FloatComplexMatrix retval; + + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + if (nr != b.rows ()) + (*current_liboctave_error_handler) + ("matrix dimension mismatch solution of linear equations"); + else if (nr == 0 || nc == 0 || b.cols () == 0) + retval = FloatComplexMatrix (nc, b.cols (), FloatComplex (0.0, 0.0)); + else + { + volatile int typ = mattype.type (); + + if (typ == MatrixType::Permuted_Upper || + typ == MatrixType::Upper) + { + octave_idx_type b_nc = b.cols (); + rcond = 1.; + info = 0; + + if (typ == MatrixType::Permuted_Upper) + { + (*current_liboctave_error_handler) + ("permuted triangular matrix not implemented"); + } + else + { + const FloatComplex *tmp_data = fortran_vec (); + + if (calc_cond) + { + char norm = '1'; + char uplo = 'U'; + char dia = 'N'; + + Array z (2 * nc); + FloatComplex *pz = z.fortran_vec (); + Array rz (nc); + float *prz = rz.fortran_vec (); + + F77_XFCN (ctrcon, CTRCON, (F77_CONST_CHAR_ARG2 (&norm, 1), + F77_CONST_CHAR_ARG2 (&uplo, 1), + F77_CONST_CHAR_ARG2 (&dia, 1), + nr, tmp_data, nr, rcond, + pz, prz, info + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1))); + + if (info != 0) + info = -2; + + volatile float rcond_plus_one = rcond + 1.0; + + if (rcond_plus_one == 1.0 || xisnan (rcond)) + { + info = -2; + + if (sing_handler) + sing_handler (rcond); + else + (*current_liboctave_error_handler) + ("matrix singular to machine precision, rcond = %g", + rcond); + } + } + + if (info == 0) + { + retval = b; + FloatComplex *result = retval.fortran_vec (); + + char uplo = 'U'; + char trans = 'N'; + char dia = 'N'; + + F77_XFCN (ctrtrs, CTRTRS, (F77_CONST_CHAR_ARG2 (&uplo, 1), + F77_CONST_CHAR_ARG2 (&trans, 1), + F77_CONST_CHAR_ARG2 (&dia, 1), + nr, b_nc, tmp_data, nr, + result, nr, info + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1))); + } + } + } + else + (*current_liboctave_error_handler) ("incorrect matrix type"); + } + + return retval; +} + +FloatComplexMatrix +FloatComplexMatrix::ltsolve (MatrixType &mattype, const FloatComplexMatrix& b, + octave_idx_type& info, float& rcond, + solve_singularity_handler sing_handler, + bool calc_cond) const +{ + FloatComplexMatrix retval; + + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + if (nr != b.rows ()) + (*current_liboctave_error_handler) + ("matrix dimension mismatch solution of linear equations"); + else if (nr == 0 || nc == 0 || b.cols () == 0) + retval = FloatComplexMatrix (nc, b.cols (), FloatComplex (0.0, 0.0)); + else + { + volatile int typ = mattype.type (); + + if (typ == MatrixType::Permuted_Lower || + typ == MatrixType::Lower) + { + octave_idx_type b_nc = b.cols (); + rcond = 1.; + info = 0; + + if (typ == MatrixType::Permuted_Lower) + { + (*current_liboctave_error_handler) + ("permuted triangular matrix not implemented"); + } + else + { + const FloatComplex *tmp_data = fortran_vec (); + + if (calc_cond) + { + char norm = '1'; + char uplo = 'L'; + char dia = 'N'; + + Array z (2 * nc); + FloatComplex *pz = z.fortran_vec (); + Array rz (nc); + float *prz = rz.fortran_vec (); + + F77_XFCN (ctrcon, CTRCON, (F77_CONST_CHAR_ARG2 (&norm, 1), + F77_CONST_CHAR_ARG2 (&uplo, 1), + F77_CONST_CHAR_ARG2 (&dia, 1), + nr, tmp_data, nr, rcond, + pz, prz, info + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1))); + + if (info != 0) + info = -2; + + volatile float rcond_plus_one = rcond + 1.0; + + if (rcond_plus_one == 1.0 || xisnan (rcond)) + { + info = -2; + + if (sing_handler) + sing_handler (rcond); + else + (*current_liboctave_error_handler) + ("matrix singular to machine precision, rcond = %g", + rcond); + } + } + + if (info == 0) + { + retval = b; + FloatComplex *result = retval.fortran_vec (); + + char uplo = 'L'; + char trans = 'N'; + char dia = 'N'; + + F77_XFCN (ctrtrs, CTRTRS, (F77_CONST_CHAR_ARG2 (&uplo, 1), + F77_CONST_CHAR_ARG2 (&trans, 1), + F77_CONST_CHAR_ARG2 (&dia, 1), + nr, b_nc, tmp_data, nr, + result, nr, info + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1))); + } + } + } + else + (*current_liboctave_error_handler) ("incorrect matrix type"); + } + + return retval; +} + +FloatComplexMatrix +FloatComplexMatrix::fsolve (MatrixType &mattype, const FloatComplexMatrix& b, + octave_idx_type& info, float& rcond, + solve_singularity_handler sing_handler, + bool calc_cond) const +{ + FloatComplexMatrix retval; + + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + + if (nr != nc || nr != b.rows ()) + (*current_liboctave_error_handler) + ("matrix dimension mismatch solution of linear equations"); + else if (nr == 0 || b.cols () == 0) + retval = FloatComplexMatrix (nc, b.cols (), FloatComplex (0.0, 0.0)); + else + { + volatile int typ = mattype.type (); + + // Calculate the norm of the matrix, for later use. + float anorm = -1.; + + if (typ == MatrixType::Hermitian) + { + info = 0; + char job = 'L'; + FloatComplexMatrix atmp = *this; + FloatComplex *tmp_data = atmp.fortran_vec (); + anorm = atmp.abs().sum().row(static_cast(0)).max(); + + F77_XFCN (cpotrf, CPOTRF, (F77_CONST_CHAR_ARG2 (&job, 1), nr, + tmp_data, nr, info + F77_CHAR_ARG_LEN (1))); + + // Throw-away extra info LAPACK gives so as to not change output. + rcond = 0.0; + if (info != 0) + { + info = -2; + + mattype.mark_as_unsymmetric (); + typ = MatrixType::Full; + } + else + { + if (calc_cond) + { + Array z (2 * nc); + FloatComplex *pz = z.fortran_vec (); + Array rz (nc); + float *prz = rz.fortran_vec (); + + F77_XFCN (cpocon, CPOCON, (F77_CONST_CHAR_ARG2 (&job, 1), + nr, tmp_data, nr, anorm, + rcond, pz, prz, info + F77_CHAR_ARG_LEN (1))); + + if (info != 0) + info = -2; + + volatile float rcond_plus_one = rcond + 1.0; + + if (rcond_plus_one == 1.0 || xisnan (rcond)) + { + info = -2; + + if (sing_handler) + sing_handler (rcond); + else + (*current_liboctave_error_handler) + ("matrix singular to machine precision, rcond = %g", + rcond); + } + } + + if (info == 0) + { + retval = b; + FloatComplex *result = retval.fortran_vec (); + + octave_idx_type b_nc = b.cols (); + + F77_XFCN (cpotrs, CPOTRS, (F77_CONST_CHAR_ARG2 (&job, 1), + nr, b_nc, tmp_data, nr, + result, b.rows(), info + F77_CHAR_ARG_LEN (1))); + } + else + { + mattype.mark_as_unsymmetric (); + typ = MatrixType::Full; + } + } + } + + if (typ == MatrixType::Full) + { + info = 0; + + Array ipvt (nr); + octave_idx_type *pipvt = ipvt.fortran_vec (); + + FloatComplexMatrix atmp = *this; + FloatComplex *tmp_data = atmp.fortran_vec (); + + Array z (2 * nc); + FloatComplex *pz = z.fortran_vec (); + Array rz (2 * nc); + float *prz = rz.fortran_vec (); + + // Calculate the norm of the matrix, for later use. + if (anorm < 0.) + anorm = atmp.abs().sum().row(static_cast(0)).max(); + + F77_XFCN (cgetrf, CGETRF, (nr, nr, tmp_data, nr, pipvt, info)); + + // Throw-away extra info LAPACK gives so as to not change output. + rcond = 0.0; + if (info != 0) + { + info = -2; + + if (sing_handler) + sing_handler (rcond); + else + (*current_liboctave_error_handler) + ("matrix singular to machine precision"); + + mattype.mark_as_rectangular (); + } + else + { + if (calc_cond) + { + // Now calculate the condition number for + // non-singular matrix. + char job = '1'; + F77_XFCN (cgecon, CGECON, (F77_CONST_CHAR_ARG2 (&job, 1), + nc, tmp_data, nr, anorm, + rcond, pz, prz, info + F77_CHAR_ARG_LEN (1))); + + if (info != 0) + info = -2; + + volatile float rcond_plus_one = rcond + 1.0; + + if (rcond_plus_one == 1.0 || xisnan (rcond)) + { + info = -2; + + if (sing_handler) + sing_handler (rcond); + else + (*current_liboctave_error_handler) + ("matrix singular to machine precision, rcond = %g", + rcond); + } + } + + if (info == 0) + { + retval = b; + FloatComplex *result = retval.fortran_vec (); + + octave_idx_type b_nc = b.cols (); + + char job = 'N'; + F77_XFCN (cgetrs, CGETRS, (F77_CONST_CHAR_ARG2 (&job, 1), + nr, b_nc, tmp_data, nr, + pipvt, result, b.rows(), info + F77_CHAR_ARG_LEN (1))); + } + else + mattype.mark_as_rectangular (); + } + } + } + + return retval; +} + +FloatComplexMatrix +FloatComplexMatrix::solve (MatrixType &typ, const FloatMatrix& b) const +{ + octave_idx_type info; + float rcond; + return solve (typ, b, info, rcond, 0); +} + +FloatComplexMatrix +FloatComplexMatrix::solve (MatrixType &typ, const FloatMatrix& b, + octave_idx_type& info) const +{ + float rcond; + return solve (typ, b, info, rcond, 0); +} + +FloatComplexMatrix +FloatComplexMatrix::solve (MatrixType &typ, const FloatMatrix& b, octave_idx_type& info, + float& rcond) const +{ + return solve (typ, b, info, rcond, 0); +} + +FloatComplexMatrix +FloatComplexMatrix::solve (MatrixType &typ, const FloatMatrix& b, octave_idx_type& info, + float& rcond, solve_singularity_handler sing_handler, + bool singular_fallback) const +{ + FloatComplexMatrix tmp (b); + return solve (typ, tmp, info, rcond, sing_handler, singular_fallback); +} + +FloatComplexMatrix +FloatComplexMatrix::solve (MatrixType &typ, const FloatComplexMatrix& b) const +{ + octave_idx_type info; + float rcond; + return solve (typ, b, info, rcond, 0); +} + +FloatComplexMatrix +FloatComplexMatrix::solve (MatrixType &typ, const FloatComplexMatrix& b, + octave_idx_type& info) const +{ + float rcond; + return solve (typ, b, info, rcond, 0); +} + +FloatComplexMatrix +FloatComplexMatrix::solve (MatrixType &typ, const FloatComplexMatrix& b, + octave_idx_type& info, float& rcond) const +{ + return solve (typ, b, info, rcond, 0); +} + +FloatComplexMatrix +FloatComplexMatrix::solve (MatrixType &mattype, const FloatComplexMatrix& b, + octave_idx_type& info, float& rcond, + solve_singularity_handler sing_handler, + bool singular_fallback) const +{ + FloatComplexMatrix retval; + int typ = mattype.type (); + + if (typ == MatrixType::Unknown) + typ = mattype.type (*this); + + // Only calculate the condition number for LU/Cholesky + if (typ == MatrixType::Upper || typ == MatrixType::Permuted_Upper) + retval = utsolve (mattype, b, info, rcond, sing_handler, false); + else if (typ == MatrixType::Lower || typ == MatrixType::Permuted_Lower) + retval = ltsolve (mattype, b, info, rcond, sing_handler, false); + else if (typ == MatrixType::Full || typ == MatrixType::Hermitian) + retval = fsolve (mattype, b, info, rcond, sing_handler, true); + else if (typ != MatrixType::Rectangular) + { + (*current_liboctave_error_handler) ("unknown matrix type"); + return FloatComplexMatrix (); + } + + // Rectangular or one of the above solvers flags a singular matrix + if (singular_fallback && mattype.type () == MatrixType::Rectangular) + { + octave_idx_type rank; + retval = lssolve (b, info, rank, rcond); + } + + return retval; +} + +FloatComplexColumnVector +FloatComplexMatrix::solve (MatrixType &typ, const FloatColumnVector& b) const +{ + octave_idx_type info; + float rcond; + return solve (typ, FloatComplexColumnVector (b), info, rcond, 0); +} + +FloatComplexColumnVector +FloatComplexMatrix::solve (MatrixType &typ, const FloatColumnVector& b, + octave_idx_type& info) const +{ + float rcond; + return solve (typ, FloatComplexColumnVector (b), info, rcond, 0); +} + +FloatComplexColumnVector +FloatComplexMatrix::solve (MatrixType &typ, const FloatColumnVector& b, + octave_idx_type& info, float& rcond) const +{ + return solve (typ, FloatComplexColumnVector (b), info, rcond, 0); +} + +FloatComplexColumnVector +FloatComplexMatrix::solve (MatrixType &typ, const FloatColumnVector& b, + octave_idx_type& info, float& rcond, + solve_singularity_handler sing_handler) const +{ + return solve (typ, FloatComplexColumnVector (b), info, rcond, sing_handler); +} + +FloatComplexColumnVector +FloatComplexMatrix::solve (MatrixType &typ, const FloatComplexColumnVector& b) const +{ + octave_idx_type info; + float rcond; + return solve (typ, b, info, rcond, 0); +} + +FloatComplexColumnVector +FloatComplexMatrix::solve (MatrixType &typ, const FloatComplexColumnVector& b, + octave_idx_type& info) const +{ + float rcond; + return solve (typ, b, info, rcond, 0); +} + +FloatComplexColumnVector +FloatComplexMatrix::solve (MatrixType &typ, const FloatComplexColumnVector& b, + octave_idx_type& info, float& rcond) const +{ + return solve (typ, b, info, rcond, 0); +} + +FloatComplexColumnVector +FloatComplexMatrix::solve (MatrixType &typ, const FloatComplexColumnVector& b, + octave_idx_type& info, float& rcond, + solve_singularity_handler sing_handler) const +{ + + FloatComplexMatrix tmp (b); + return solve (typ, tmp, info, rcond, sing_handler).column(static_cast (0)); +} + +FloatComplexMatrix +FloatComplexMatrix::solve (const FloatMatrix& b) const +{ + octave_idx_type info; + float rcond; + return solve (b, info, rcond, 0); +} + +FloatComplexMatrix +FloatComplexMatrix::solve (const FloatMatrix& b, octave_idx_type& info) const +{ + float rcond; + return solve (b, info, rcond, 0); +} + +FloatComplexMatrix +FloatComplexMatrix::solve (const FloatMatrix& b, octave_idx_type& info, float& rcond) const +{ + return solve (b, info, rcond, 0); +} + +FloatComplexMatrix +FloatComplexMatrix::solve (const FloatMatrix& b, octave_idx_type& info, float& rcond, + solve_singularity_handler sing_handler) const +{ + FloatComplexMatrix tmp (b); + return solve (tmp, info, rcond, sing_handler); +} + +FloatComplexMatrix +FloatComplexMatrix::solve (const FloatComplexMatrix& b) const +{ + octave_idx_type info; + float rcond; + return solve (b, info, rcond, 0); +} + +FloatComplexMatrix +FloatComplexMatrix::solve (const FloatComplexMatrix& b, octave_idx_type& info) const +{ + float rcond; + return solve (b, info, rcond, 0); +} + +FloatComplexMatrix +FloatComplexMatrix::solve (const FloatComplexMatrix& b, octave_idx_type& info, float& rcond) const +{ + return solve (b, info, rcond, 0); +} + +FloatComplexMatrix +FloatComplexMatrix::solve (const FloatComplexMatrix& b, octave_idx_type& info, float& rcond, + solve_singularity_handler sing_handler) const +{ + MatrixType mattype (*this); + return solve (mattype, b, info, rcond, sing_handler); +} + +FloatComplexColumnVector +FloatComplexMatrix::solve (const FloatColumnVector& b) const +{ + octave_idx_type info; + float rcond; + return solve (FloatComplexColumnVector (b), info, rcond, 0); +} + +FloatComplexColumnVector +FloatComplexMatrix::solve (const FloatColumnVector& b, octave_idx_type& info) const +{ + float rcond; + return solve (FloatComplexColumnVector (b), info, rcond, 0); +} + +FloatComplexColumnVector +FloatComplexMatrix::solve (const FloatColumnVector& b, octave_idx_type& info, + float& rcond) const +{ + return solve (FloatComplexColumnVector (b), info, rcond, 0); +} + +FloatComplexColumnVector +FloatComplexMatrix::solve (const FloatColumnVector& b, octave_idx_type& info, + float& rcond, + solve_singularity_handler sing_handler) const +{ + return solve (FloatComplexColumnVector (b), info, rcond, sing_handler); +} + +FloatComplexColumnVector +FloatComplexMatrix::solve (const FloatComplexColumnVector& b) const +{ + octave_idx_type info; + float rcond; + return solve (b, info, rcond, 0); +} + +FloatComplexColumnVector +FloatComplexMatrix::solve (const FloatComplexColumnVector& b, octave_idx_type& info) const +{ + float rcond; + return solve (b, info, rcond, 0); +} + +FloatComplexColumnVector +FloatComplexMatrix::solve (const FloatComplexColumnVector& b, octave_idx_type& info, + float& rcond) const +{ + return solve (b, info, rcond, 0); +} + +FloatComplexColumnVector +FloatComplexMatrix::solve (const FloatComplexColumnVector& b, octave_idx_type& info, + float& rcond, + solve_singularity_handler sing_handler) const +{ + MatrixType mattype (*this); + return solve (mattype, b, info, rcond, sing_handler); +} + +FloatComplexMatrix +FloatComplexMatrix::lssolve (const FloatMatrix& b) const +{ + octave_idx_type info; + octave_idx_type rank; + float rcond; + return lssolve (FloatComplexMatrix (b), info, rank, rcond); +} + +FloatComplexMatrix +FloatComplexMatrix::lssolve (const FloatMatrix& b, octave_idx_type& info) const +{ + octave_idx_type rank; + float rcond; + return lssolve (FloatComplexMatrix (b), info, rank, rcond); +} + +FloatComplexMatrix +FloatComplexMatrix::lssolve (const FloatMatrix& b, octave_idx_type& info, + octave_idx_type& rank) const +{ + float rcond; + return lssolve (FloatComplexMatrix (b), info, rank, rcond); +} + +FloatComplexMatrix +FloatComplexMatrix::lssolve (const FloatMatrix& b, octave_idx_type& info, + octave_idx_type& rank, float& rcond) const +{ + return lssolve (FloatComplexMatrix (b), info, rank, rcond); +} + +FloatComplexMatrix +FloatComplexMatrix::lssolve (const FloatComplexMatrix& b) const +{ + octave_idx_type info; + octave_idx_type rank; + float rcond; + return lssolve (b, info, rank, rcond); +} + +FloatComplexMatrix +FloatComplexMatrix::lssolve (const FloatComplexMatrix& b, octave_idx_type& info) const +{ + octave_idx_type rank; + float rcond; + return lssolve (b, info, rank, rcond); +} + +FloatComplexMatrix +FloatComplexMatrix::lssolve (const FloatComplexMatrix& b, octave_idx_type& info, + octave_idx_type& rank) const +{ + float rcond; + return lssolve (b, info, rank, rcond); +} + +FloatComplexMatrix +FloatComplexMatrix::lssolve (const FloatComplexMatrix& b, octave_idx_type& info, + octave_idx_type& rank, float& rcond) const +{ + FloatComplexMatrix retval; + + octave_idx_type nrhs = b.cols (); + + octave_idx_type m = rows (); + octave_idx_type n = cols (); + + if (m != b.rows ()) + (*current_liboctave_error_handler) + ("matrix dimension mismatch solution of linear equations"); + else if (m== 0 || n == 0 || b.cols () == 0) + retval = FloatComplexMatrix (n, b.cols (), FloatComplex (0.0, 0.0)); + else + { + volatile octave_idx_type minmn = (m < n ? m : n); + octave_idx_type maxmn = m > n ? m : n; + rcond = -1.0; + + if (m != n) + { + retval = FloatComplexMatrix (maxmn, nrhs); + + for (octave_idx_type j = 0; j < nrhs; j++) + for (octave_idx_type i = 0; i < m; i++) + retval.elem (i, j) = b.elem (i, j); + } + else + retval = b; + + FloatComplexMatrix atmp = *this; + FloatComplex *tmp_data = atmp.fortran_vec (); + + FloatComplex *pretval = retval.fortran_vec (); + Array s (minmn); + float *ps = s.fortran_vec (); + + // Ask ZGELSD what the dimension of WORK should be. + octave_idx_type lwork = -1; + + Array work (1); + + octave_idx_type smlsiz; + F77_FUNC (xilaenv, XILAENV) (9, F77_CONST_CHAR_ARG2 ("CGELSD", 6), + F77_CONST_CHAR_ARG2 (" ", 1), + 0, 0, 0, 0, smlsiz + F77_CHAR_ARG_LEN (6) + F77_CHAR_ARG_LEN (1)); + + octave_idx_type mnthr; + F77_FUNC (xilaenv, XILAENV) (6, F77_CONST_CHAR_ARG2 ("CGELSD", 6), + F77_CONST_CHAR_ARG2 (" ", 1), + m, n, nrhs, -1, mnthr + F77_CHAR_ARG_LEN (6) + F77_CHAR_ARG_LEN (1)); + + // We compute the size of rwork and iwork because ZGELSD in + // older versions of LAPACK does not return them on a query + // call. + float dminmn = static_cast (minmn); + float dsmlsizp1 = static_cast (smlsiz+1); +#if defined (HAVE_LOG2) + float tmp = log2 (dminmn / dsmlsizp1); +#else + float tmp = log (dminmn / dsmlsizp1) / log (2.0); +#endif + octave_idx_type nlvl = static_cast (tmp) + 1; + if (nlvl < 0) + nlvl = 0; + + octave_idx_type lrwork = minmn*(10 + 2*smlsiz + 8*nlvl) + + 3*smlsiz*nrhs + (smlsiz+1)*(smlsiz+1); + if (lrwork < 1) + lrwork = 1; + Array rwork (lrwork); + float *prwork = rwork.fortran_vec (); + + octave_idx_type liwork = 3 * minmn * nlvl + 11 * minmn; + if (liwork < 1) + liwork = 1; + Array iwork (liwork); + octave_idx_type* piwork = iwork.fortran_vec (); + + F77_XFCN (cgelsd, CGELSD, (m, n, nrhs, tmp_data, m, pretval, maxmn, + ps, rcond, rank, work.fortran_vec (), + lwork, prwork, piwork, info)); + + // The workspace query is broken in at least LAPACK 3.0.0 + // through 3.1.1 when n >= mnthr. The obtuse formula below + // should provide sufficient workspace for ZGELSD to operate + // efficiently. + if (n >= mnthr) + { + octave_idx_type addend = m; + + if (2*m-4 > addend) + addend = 2*m-4; + + if (nrhs > addend) + addend = nrhs; + + if (n-3*m > addend) + addend = n-3*m; + + const octave_idx_type lworkaround = 4*m + m*m + addend; + + if (std::real (work(0)) < lworkaround) + work(0) = lworkaround; + } + else if (m >= n) + { + octave_idx_type lworkaround = 2*m + m*nrhs; + + if (std::real (work(0)) < lworkaround) + work(0) = lworkaround; + } + + lwork = static_cast (std::real (work(0))); + work.resize (lwork); + + F77_XFCN (cgelsd, CGELSD, (m, n, nrhs, tmp_data, m, pretval, + maxmn, ps, rcond, rank, + work.fortran_vec (), lwork, + prwork, piwork, info)); + + if (rank < minmn) + (*current_liboctave_warning_handler) + ("zgelsd: rank deficient %dx%d matrix, rank = %d, tol = %e", + m, n, rank, rcond); + + if (s.elem (0) == 0.0) + rcond = 0.0; + else + rcond = s.elem (minmn - 1) / s.elem (0); + + retval.resize (n, nrhs); + } + + return retval; +} + +FloatComplexColumnVector +FloatComplexMatrix::lssolve (const FloatColumnVector& b) const +{ + octave_idx_type info; + octave_idx_type rank; + float rcond; + return lssolve (FloatComplexColumnVector (b), info, rank, rcond); +} + +FloatComplexColumnVector +FloatComplexMatrix::lssolve (const FloatColumnVector& b, octave_idx_type& info) const +{ + octave_idx_type rank; + float rcond; + return lssolve (FloatComplexColumnVector (b), info, rank, rcond); +} + +FloatComplexColumnVector +FloatComplexMatrix::lssolve (const FloatColumnVector& b, octave_idx_type& info, + octave_idx_type& rank) const +{ + float rcond; + return lssolve (FloatComplexColumnVector (b), info, rank, rcond); +} + +FloatComplexColumnVector +FloatComplexMatrix::lssolve (const FloatColumnVector& b, octave_idx_type& info, + octave_idx_type& rank, float& rcond) const +{ + return lssolve (FloatComplexColumnVector (b), info, rank, rcond); +} + +FloatComplexColumnVector +FloatComplexMatrix::lssolve (const FloatComplexColumnVector& b) const +{ + octave_idx_type info; + octave_idx_type rank; + float rcond; + return lssolve (b, info, rank, rcond); +} + +FloatComplexColumnVector +FloatComplexMatrix::lssolve (const FloatComplexColumnVector& b, octave_idx_type& info) const +{ + octave_idx_type rank; + float rcond; + return lssolve (b, info, rank, rcond); +} + +FloatComplexColumnVector +FloatComplexMatrix::lssolve (const FloatComplexColumnVector& b, octave_idx_type& info, + octave_idx_type& rank) const +{ + float rcond; + return lssolve (b, info, rank, rcond); + +} + +FloatComplexColumnVector +FloatComplexMatrix::lssolve (const FloatComplexColumnVector& b, octave_idx_type& info, + octave_idx_type& rank, float& rcond) const +{ + FloatComplexColumnVector retval; + + octave_idx_type nrhs = 1; + + octave_idx_type m = rows (); + octave_idx_type n = cols (); + + if (m != b.length ()) + (*current_liboctave_error_handler) + ("matrix dimension mismatch solution of linear equations"); + else if (m == 0 || n == 0 || b.cols () == 0) + retval = FloatComplexColumnVector (n, FloatComplex (0.0, 0.0)); + else + { + volatile octave_idx_type minmn = (m < n ? m : n); + octave_idx_type maxmn = m > n ? m : n; + rcond = -1.0; + + if (m != n) + { + retval = FloatComplexColumnVector (maxmn); + + for (octave_idx_type i = 0; i < m; i++) + retval.elem (i) = b.elem (i); + } + else + retval = b; + + FloatComplexMatrix atmp = *this; + FloatComplex *tmp_data = atmp.fortran_vec (); + + FloatComplex *pretval = retval.fortran_vec (); + Array s (minmn); + float *ps = s.fortran_vec (); + + // Ask ZGELSD what the dimension of WORK should be. + octave_idx_type lwork = -1; + + Array work (1); + + octave_idx_type smlsiz; + F77_FUNC (xilaenv, XILAENV) (9, F77_CONST_CHAR_ARG2 ("CGELSD", 6), + F77_CONST_CHAR_ARG2 (" ", 1), + 0, 0, 0, 0, smlsiz + F77_CHAR_ARG_LEN (6) + F77_CHAR_ARG_LEN (1)); + + // We compute the size of rwork and iwork because ZGELSD in + // older versions of LAPACK does not return them on a query + // call. + float dminmn = static_cast (minmn); + float dsmlsizp1 = static_cast (smlsiz+1); +#if defined (HAVE_LOG2) + float tmp = log2 (dminmn / dsmlsizp1); +#else + float tmp = log (dminmn / dsmlsizp1) / log (2.0); +#endif + octave_idx_type nlvl = static_cast (tmp) + 1; + if (nlvl < 0) + nlvl = 0; + + octave_idx_type lrwork = minmn*(10 + 2*smlsiz + 8*nlvl) + + 3*smlsiz*nrhs + (smlsiz+1)*(smlsiz+1); + if (lrwork < 1) + lrwork = 1; + Array rwork (lrwork); + float *prwork = rwork.fortran_vec (); + + octave_idx_type liwork = 3 * minmn * nlvl + 11 * minmn; + if (liwork < 1) + liwork = 1; + Array iwork (liwork); + octave_idx_type* piwork = iwork.fortran_vec (); + + F77_XFCN (cgelsd, CGELSD, (m, n, nrhs, tmp_data, m, pretval, maxmn, + ps, rcond, rank, work.fortran_vec (), + lwork, prwork, piwork, info)); + + lwork = static_cast (std::real (work(0))); + work.resize (lwork); + rwork.resize (static_cast (rwork(0))); + iwork.resize (iwork(0)); + + F77_XFCN (cgelsd, CGELSD, (m, n, nrhs, tmp_data, m, pretval, + maxmn, ps, rcond, rank, + work.fortran_vec (), lwork, + prwork, piwork, info)); + + if (rank < minmn) + { + if (rank < minmn) + (*current_liboctave_warning_handler) + ("zgelsd: rank deficient %dx%d matrix, rank = %d, tol = %e", + m, n, rank, rcond); + + if (s.elem (0) == 0.0) + rcond = 0.0; + else + rcond = s.elem (minmn - 1) / s.elem (0); + + retval.resize (n, nrhs); + } + } + + return retval; +} + +// Constants for matrix exponential calculation. + +static float padec [] = +{ + 5.0000000000000000e-1, + 1.1666666666666667e-1, + 1.6666666666666667e-2, + 1.6025641025641026e-3, + 1.0683760683760684e-4, + 4.8562548562548563e-6, + 1.3875013875013875e-7, + 1.9270852604185938e-9, +}; + +static void +solve_singularity_warning (float rcond) +{ + (*current_liboctave_warning_handler) + ("singular matrix encountered in expm calculation, rcond = %g", + rcond); +} + +FloatComplexMatrix +FloatComplexMatrix::expm (void) const +{ + FloatComplexMatrix retval; + + FloatComplexMatrix m = *this; + + octave_idx_type nc = columns (); + + // Preconditioning step 1: trace normalization to reduce dynamic + // range of poles, but avoid making stable eigenvalues unstable. + + // trace shift value + FloatComplex trshift = 0.0; + + for (octave_idx_type i = 0; i < nc; i++) + trshift += m.elem (i, i); + + trshift /= nc; + + if (trshift.real () < 0.0) + { + trshift = trshift.imag (); + if (trshift.real () > 709.0) + trshift = 709.0; + } + + for (octave_idx_type i = 0; i < nc; i++) + m.elem (i, i) -= trshift; + + // Preconditioning step 2: eigenvalue balancing. + // code follows development in AEPBAL + + FloatComplex *mp = m.fortran_vec (); + + octave_idx_type info, ilo, ihi,ilos,ihis; + Array dpermute (nc); + Array dscale (nc); + + // FIXME -- should pass job as a parameter in expm + + // Permute first + char job = 'P'; + F77_XFCN (cgebal, CGEBAL, (F77_CONST_CHAR_ARG2 (&job, 1), + nc, mp, nc, ilo, ihi, + dpermute.fortran_vec (), info + F77_CHAR_ARG_LEN (1))); + + // then scale + job = 'S'; + F77_XFCN (cgebal, CGEBAL, (F77_CONST_CHAR_ARG2 (&job, 1), + nc, mp, nc, ilos, ihis, + dscale.fortran_vec (), info + F77_CHAR_ARG_LEN (1))); + + // Preconditioning step 3: scaling. + + FloatColumnVector work (nc); + float inf_norm; + + F77_XFCN (xclange, XCLANGE, (F77_CONST_CHAR_ARG2 ("I", 1), + nc, nc, m.fortran_vec (), nc, + work.fortran_vec (), inf_norm + F77_CHAR_ARG_LEN (1))); + + int sqpow = (inf_norm > 0.0 + ? static_cast (1.0 + log (inf_norm) / log (2.0)) : 0); + + // Check whether we need to square at all. + + if (sqpow < 0) + sqpow = 0; + + if (sqpow > 0) + { + if (sqpow > 1023) + sqpow = 1023; + + float scale_factor = 1.0; + for (octave_idx_type i = 0; i < sqpow; i++) + scale_factor *= 2.0; + + m = m / scale_factor; + } + + // npp, dpp: pade' approx polynomial matrices. + + FloatComplexMatrix npp (nc, nc, 0.0); + FloatComplex *pnpp = npp.fortran_vec (); + FloatComplexMatrix dpp = npp; + FloatComplex *pdpp = dpp.fortran_vec (); + + // Now powers a^8 ... a^1. + + int minus_one_j = -1; + for (octave_idx_type j = 7; j >= 0; j--) + { + for (octave_idx_type i = 0; i < nc; i++) + { + octave_idx_type k = i * nc + i; + pnpp[k] += padec[j]; + pdpp[k] += minus_one_j * padec[j]; + } + + npp = m * npp; + pnpp = npp.fortran_vec (); + + dpp = m * dpp; + pdpp = dpp.fortran_vec (); + + minus_one_j *= -1; + } + + // Zero power. + + dpp = -dpp; + for (octave_idx_type j = 0; j < nc; j++) + { + npp.elem (j, j) += 1.0; + dpp.elem (j, j) += 1.0; + } + + // Compute pade approximation = inverse (dpp) * npp. + + float rcond; + retval = dpp.solve (npp, info, rcond, solve_singularity_warning); + + if (info < 0) + return retval; + + // Reverse preconditioning step 3: repeated squaring. + + while (sqpow) + { + retval = retval * retval; + sqpow--; + } + + // Reverse preconditioning step 2: inverse balancing. + // Done in two steps: inverse scaling, then inverse permutation + + // inverse scaling (diagonal transformation) + for (octave_idx_type i = 0; i < nc; i++) + for (octave_idx_type j = 0; j < nc; j++) + retval(i,j) *= dscale(i) / dscale(j); + + OCTAVE_QUIT; + + // construct balancing permutation vector + Array iperm (nc); + for (octave_idx_type i = 0; i < nc; i++) + iperm(i) = i; // initialize to identity permutation + + // leading permutations in forward order + for (octave_idx_type i = 0; i < (ilo-1); i++) + { + octave_idx_type swapidx = static_cast (dpermute(i)) - 1; + octave_idx_type tmp = iperm(i); + iperm(i) = iperm(swapidx); + iperm(swapidx) = tmp; + } + + // construct inverse balancing permutation vector + Array invpvec (nc); + for (octave_idx_type i = 0; i < nc; i++) + invpvec(iperm(i)) = i; // Thanks to R. A. Lippert for this method + + OCTAVE_QUIT; + + FloatComplexMatrix tmpMat = retval; + for (octave_idx_type i = 0; i < nc; i++) + for (octave_idx_type j = 0; j < nc; j++) + retval(i,j) = tmpMat(invpvec(i),invpvec(j)); + + OCTAVE_QUIT; + + for (octave_idx_type i = 0; i < nc; i++) + iperm(i) = i; // initialize to identity permutation + + // trailing permutations must be done in reverse order + for (octave_idx_type i = nc - 1; i >= ihi; i--) + { + octave_idx_type swapidx = static_cast (dpermute(i)) - 1; + octave_idx_type tmp = iperm(i); + iperm(i) = iperm(swapidx); + iperm(swapidx) = tmp; + } + + // construct inverse balancing permutation vector + for (octave_idx_type i = 0; i < nc; i++) + invpvec(iperm(i)) = i; // Thanks to R. A. Lippert for this method + + OCTAVE_QUIT; + + tmpMat = retval; + for (octave_idx_type i = 0; i < nc; i++) + for (octave_idx_type j = 0; j < nc; j++) + retval(i,j) = tmpMat(invpvec(i),invpvec(j)); + + // Reverse preconditioning step 1: fix trace normalization. + + return exp (trshift) * retval; +} + +// column vector by row vector -> matrix operations + +FloatComplexMatrix +operator * (const FloatColumnVector& v, const FloatComplexRowVector& a) +{ + FloatComplexColumnVector tmp (v); + return tmp * a; +} + +FloatComplexMatrix +operator * (const FloatComplexColumnVector& a, const FloatRowVector& b) +{ + FloatComplexRowVector tmp (b); + return a * tmp; +} + +FloatComplexMatrix +operator * (const FloatComplexColumnVector& v, const FloatComplexRowVector& a) +{ + FloatComplexMatrix retval; + + octave_idx_type len = v.length (); + + if (len != 0) + { + octave_idx_type a_len = a.length (); + + retval.resize (len, a_len); + FloatComplex *c = retval.fortran_vec (); + + F77_XFCN (cgemm, CGEMM, (F77_CONST_CHAR_ARG2 ("N", 1), + F77_CONST_CHAR_ARG2 ("N", 1), + len, a_len, 1, 1.0, v.data (), len, + a.data (), 1, 0.0, c, len + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1))); + } + + return retval; +} + +// matrix by diagonal matrix -> matrix operations + +FloatComplexMatrix& +FloatComplexMatrix::operator += (const FloatDiagMatrix& a) +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + octave_idx_type a_nr = rows (); + octave_idx_type a_nc = cols (); + + if (nr != a_nr || nc != a_nc) + { + gripe_nonconformant ("operator +=", nr, nc, a_nr, a_nc); + return *this; + } + + for (octave_idx_type i = 0; i < a.length (); i++) + elem (i, i) += a.elem (i, i); + + return *this; +} + +FloatComplexMatrix& +FloatComplexMatrix::operator -= (const FloatDiagMatrix& a) +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + octave_idx_type a_nr = rows (); + octave_idx_type a_nc = cols (); + + if (nr != a_nr || nc != a_nc) + { + gripe_nonconformant ("operator -=", nr, nc, a_nr, a_nc); + return *this; + } + + for (octave_idx_type i = 0; i < a.length (); i++) + elem (i, i) -= a.elem (i, i); + + return *this; +} + +FloatComplexMatrix& +FloatComplexMatrix::operator += (const FloatComplexDiagMatrix& a) +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + octave_idx_type a_nr = rows (); + octave_idx_type a_nc = cols (); + + if (nr != a_nr || nc != a_nc) + { + gripe_nonconformant ("operator +=", nr, nc, a_nr, a_nc); + return *this; + } + + for (octave_idx_type i = 0; i < a.length (); i++) + elem (i, i) += a.elem (i, i); + + return *this; +} + +FloatComplexMatrix& +FloatComplexMatrix::operator -= (const FloatComplexDiagMatrix& a) +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + octave_idx_type a_nr = rows (); + octave_idx_type a_nc = cols (); + + if (nr != a_nr || nc != a_nc) + { + gripe_nonconformant ("operator -=", nr, nc, a_nr, a_nc); + return *this; + } + + for (octave_idx_type i = 0; i < a.length (); i++) + elem (i, i) -= a.elem (i, i); + + return *this; +} + +// matrix by matrix -> matrix operations + +FloatComplexMatrix& +FloatComplexMatrix::operator += (const FloatMatrix& a) +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + octave_idx_type a_nr = a.rows (); + octave_idx_type a_nc = a.cols (); + + if (nr != a_nr || nc != a_nc) + { + gripe_nonconformant ("operator +=", nr, nc, a_nr, a_nc); + return *this; + } + + if (nr == 0 || nc == 0) + return *this; + + FloatComplex *d = fortran_vec (); // Ensures only one reference to my privates! + + mx_inline_add2 (d, a.data (), length ()); + return *this; +} + +FloatComplexMatrix& +FloatComplexMatrix::operator -= (const FloatMatrix& a) +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + octave_idx_type a_nr = a.rows (); + octave_idx_type a_nc = a.cols (); + + if (nr != a_nr || nc != a_nc) + { + gripe_nonconformant ("operator -=", nr, nc, a_nr, a_nc); + return *this; + } + + if (nr == 0 || nc == 0) + return *this; + + FloatComplex *d = fortran_vec (); // Ensures only one reference to my privates! + + mx_inline_subtract2 (d, a.data (), length ()); + return *this; +} + +// unary operations + +boolMatrix +FloatComplexMatrix::operator ! (void) const +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + boolMatrix b (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + b.elem (i, j) = elem (i, j) == static_cast (0.0); + + return b; +} + +// other operations + +FloatMatrix +FloatComplexMatrix::map (dmapper fcn) const +{ + return MArray2::map (func_ptr (fcn)); +} + +FloatComplexMatrix +FloatComplexMatrix::map (cmapper fcn) const +{ + return MArray2::map (func_ptr (fcn)); +} + +boolMatrix +FloatComplexMatrix::map (bmapper fcn) const +{ + return MArray2::map (func_ptr (fcn)); +} + +bool +FloatComplexMatrix::any_element_is_inf_or_nan (void) const +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + { + FloatComplex val = elem (i, j); + if (xisinf (val) || xisnan (val)) + return true; + } + + return false; +} + +// Return true if no elements have imaginary components. + +bool +FloatComplexMatrix::all_elements_are_real (void) const +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + for (octave_idx_type j = 0; j < nc; j++) + { + for (octave_idx_type i = 0; i < nr; i++) + { + float ip = std::imag (elem (i, j)); + + if (ip != 0.0 || lo_ieee_signbit (ip)) + return false; + } + } + + return true; +} + +// Return nonzero if any element of CM has a non-integer real or +// imaginary part. Also extract the largest and smallest (real or +// imaginary) values and return them in MAX_VAL and MIN_VAL. + +bool +FloatComplexMatrix::all_integers (float& max_val, float& min_val) const +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + if (nr > 0 && nc > 0) + { + FloatComplex val = elem (0, 0); + + float r_val = std::real (val); + float i_val = std::imag (val); + + max_val = r_val; + min_val = r_val; + + if (i_val > max_val) + max_val = i_val; + + if (i_val < max_val) + min_val = i_val; + } + else + return false; + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + { + FloatComplex val = elem (i, j); + + float r_val = std::real (val); + float i_val = std::imag (val); + + if (r_val > max_val) + max_val = r_val; + + if (i_val > max_val) + max_val = i_val; + + if (r_val < min_val) + min_val = r_val; + + if (i_val < min_val) + min_val = i_val; + + if (D_NINT (r_val) != r_val || D_NINT (i_val) != i_val) + return false; + } + + return true; +} + +bool +FloatComplexMatrix::too_large_for_float (void) const +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + { + FloatComplex val = elem (i, j); + + float r_val = std::real (val); + float i_val = std::imag (val); + + if ((! (xisnan (r_val) || xisinf (r_val)) + && fabs (r_val) > FLT_MAX) + || (! (xisnan (i_val) || xisinf (i_val)) + && fabs (i_val) > FLT_MAX)) + return true; + } + + return false; +} + +// FIXME Do these really belong here? Maybe they should be +// in a base class? + +boolMatrix +FloatComplexMatrix::all (int dim) const +{ + // FIXME Can't use MX_ALL_OP as need to static cast to float to the ROW + // and COL expressions + +#define ROW_EXPR \ + if (elem (i, j) == static_cast (0.0)) \ + { \ + retval.elem (i, 0) = false; \ + break; \ + } + +#define COL_EXPR \ + if (elem (i, j) == static_cast (0.0)) \ + { \ + retval.elem (0, j) = false; \ + break; \ + } + + MX_BASE_REDUCTION_OP (boolMatrix, ROW_EXPR, COL_EXPR, true, true); + +#undef ROW_EXPR +#undef COL_EXPR +} + +boolMatrix +FloatComplexMatrix::any (int dim) const +{ + // FIXME Can't use MX_ANY_OP as need to static cast to float to the ROW + // and COL expressions + +#define ROW_EXPR \ + if (elem (i, j) != static_cast (0.0)) \ + { \ + retval.elem (i, 0) = true; \ + break; \ + } + +#define COL_EXPR \ + if (elem (i, j) != static_cast (0.0)) \ + { \ + retval.elem (0, j) = true; \ + break; \ + } + + MX_BASE_REDUCTION_OP (boolMatrix, ROW_EXPR, COL_EXPR, false, false); + +#undef ROW_EXPR +#undef COL_EXPR +} + +FloatComplexMatrix +FloatComplexMatrix::cumprod (int dim) const +{ + MX_CUMULATIVE_OP (FloatComplexMatrix, FloatComplex, *=); +} + +FloatComplexMatrix +FloatComplexMatrix::cumsum (int dim) const +{ + MX_CUMULATIVE_OP (FloatComplexMatrix, FloatComplex, +=); +} + +FloatComplexMatrix +FloatComplexMatrix::prod (int dim) const +{ + MX_REDUCTION_OP (FloatComplexMatrix, *=, 1.0, 1.0); +} + +FloatComplexMatrix +FloatComplexMatrix::sum (int dim) const +{ + MX_REDUCTION_OP (FloatComplexMatrix, +=, 0.0, 0.0); +} + +FloatComplexMatrix +FloatComplexMatrix::sumsq (int dim) const +{ +#define ROW_EXPR \ + FloatComplex d = elem (i, j); \ + retval.elem (i, 0) += d * conj (d) + +#define COL_EXPR \ + FloatComplex d = elem (i, j); \ + retval.elem (0, j) += d * conj (d) + + MX_BASE_REDUCTION_OP (FloatComplexMatrix, ROW_EXPR, COL_EXPR, 0.0, 0.0); + +#undef ROW_EXPR +#undef COL_EXPR +} + +FloatMatrix FloatComplexMatrix::abs (void) const +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + FloatMatrix retval (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + retval (i, j) = std::abs (elem (i, j)); + + return retval; +} + +FloatComplexMatrix +FloatComplexMatrix::diag (octave_idx_type k) const +{ + return MArray2::diag (k); +} + +bool +FloatComplexMatrix::row_is_real_only (octave_idx_type i) const +{ + bool retval = true; + + octave_idx_type nc = columns (); + + for (octave_idx_type j = 0; j < nc; j++) + { + if (std::imag (elem (i, j)) != 0.0) + { + retval = false; + break; + } + } + + return retval; +} + +bool +FloatComplexMatrix::column_is_real_only (octave_idx_type j) const +{ + bool retval = true; + + octave_idx_type nr = rows (); + + for (octave_idx_type i = 0; i < nr; i++) + { + if (std::imag (elem (i, j)) != 0.0) + { + retval = false; + break; + } + } + + return retval; +} + +FloatComplexColumnVector +FloatComplexMatrix::row_min (void) const +{ + Array dummy_idx; + return row_min (dummy_idx); +} + +FloatComplexColumnVector +FloatComplexMatrix::row_min (Array& idx_arg) const +{ + FloatComplexColumnVector result; + + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + if (nr > 0 && nc > 0) + { + result.resize (nr); + idx_arg.resize (nr); + + for (octave_idx_type i = 0; i < nr; i++) + { + bool real_only = row_is_real_only (i); + + octave_idx_type idx_j; + + FloatComplex tmp_min; + + float abs_min = octave_Float_NaN; + + for (idx_j = 0; idx_j < nc; idx_j++) + { + tmp_min = elem (i, idx_j); + + if (! xisnan (tmp_min)) + { + abs_min = real_only ? std::real (tmp_min) : std::abs (tmp_min); + break; + } + } + + for (octave_idx_type j = idx_j+1; j < nc; j++) + { + FloatComplex tmp = elem (i, j); + + if (xisnan (tmp)) + continue; + + float abs_tmp = real_only ? std::real (tmp) : std::abs (tmp); + + if (abs_tmp < abs_min) + { + idx_j = j; + tmp_min = tmp; + abs_min = abs_tmp; + } + } + + if (xisnan (tmp_min)) + { + result.elem (i) = FloatComplex_NaN_result; + idx_arg.elem (i) = 0; + } + else + { + result.elem (i) = tmp_min; + idx_arg.elem (i) = idx_j; + } + } + } + + return result; +} + +FloatComplexColumnVector +FloatComplexMatrix::row_max (void) const +{ + Array dummy_idx; + return row_max (dummy_idx); +} + +FloatComplexColumnVector +FloatComplexMatrix::row_max (Array& idx_arg) const +{ + FloatComplexColumnVector result; + + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + if (nr > 0 && nc > 0) + { + result.resize (nr); + idx_arg.resize (nr); + + for (octave_idx_type i = 0; i < nr; i++) + { + bool real_only = row_is_real_only (i); + + octave_idx_type idx_j; + + FloatComplex tmp_max; + + float abs_max = octave_Float_NaN; + + for (idx_j = 0; idx_j < nc; idx_j++) + { + tmp_max = elem (i, idx_j); + + if (! xisnan (tmp_max)) + { + abs_max = real_only ? std::real (tmp_max) : std::abs (tmp_max); + break; + } + } + + for (octave_idx_type j = idx_j+1; j < nc; j++) + { + FloatComplex tmp = elem (i, j); + + if (xisnan (tmp)) + continue; + + float abs_tmp = real_only ? std::real (tmp) : std::abs (tmp); + + if (abs_tmp > abs_max) + { + idx_j = j; + tmp_max = tmp; + abs_max = abs_tmp; + } + } + + if (xisnan (tmp_max)) + { + result.elem (i) = FloatComplex_NaN_result; + idx_arg.elem (i) = 0; + } + else + { + result.elem (i) = tmp_max; + idx_arg.elem (i) = idx_j; + } + } + } + + return result; +} + +FloatComplexRowVector +FloatComplexMatrix::column_min (void) const +{ + Array dummy_idx; + return column_min (dummy_idx); +} + +FloatComplexRowVector +FloatComplexMatrix::column_min (Array& idx_arg) const +{ + FloatComplexRowVector result; + + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + if (nr > 0 && nc > 0) + { + result.resize (nc); + idx_arg.resize (nc); + + for (octave_idx_type j = 0; j < nc; j++) + { + bool real_only = column_is_real_only (j); + + octave_idx_type idx_i; + + FloatComplex tmp_min; + + float abs_min = octave_Float_NaN; + + for (idx_i = 0; idx_i < nr; idx_i++) + { + tmp_min = elem (idx_i, j); + + if (! xisnan (tmp_min)) + { + abs_min = real_only ? std::real (tmp_min) : std::abs (tmp_min); + break; + } + } + + for (octave_idx_type i = idx_i+1; i < nr; i++) + { + FloatComplex tmp = elem (i, j); + + if (xisnan (tmp)) + continue; + + float abs_tmp = real_only ? std::real (tmp) : std::abs (tmp); + + if (abs_tmp < abs_min) + { + idx_i = i; + tmp_min = tmp; + abs_min = abs_tmp; + } + } + + if (xisnan (tmp_min)) + { + result.elem (j) = FloatComplex_NaN_result; + idx_arg.elem (j) = 0; + } + else + { + result.elem (j) = tmp_min; + idx_arg.elem (j) = idx_i; + } + } + } + + return result; +} + +FloatComplexRowVector +FloatComplexMatrix::column_max (void) const +{ + Array dummy_idx; + return column_max (dummy_idx); +} + +FloatComplexRowVector +FloatComplexMatrix::column_max (Array& idx_arg) const +{ + FloatComplexRowVector result; + + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + if (nr > 0 && nc > 0) + { + result.resize (nc); + idx_arg.resize (nc); + + for (octave_idx_type j = 0; j < nc; j++) + { + bool real_only = column_is_real_only (j); + + octave_idx_type idx_i; + + FloatComplex tmp_max; + + float abs_max = octave_Float_NaN; + + for (idx_i = 0; idx_i < nr; idx_i++) + { + tmp_max = elem (idx_i, j); + + if (! xisnan (tmp_max)) + { + abs_max = real_only ? std::real (tmp_max) : std::abs (tmp_max); + break; + } + } + + for (octave_idx_type i = idx_i+1; i < nr; i++) + { + FloatComplex tmp = elem (i, j); + + if (xisnan (tmp)) + continue; + + float abs_tmp = real_only ? std::real (tmp) : std::abs (tmp); + + if (abs_tmp > abs_max) + { + idx_i = i; + tmp_max = tmp; + abs_max = abs_tmp; + } + } + + if (xisnan (tmp_max)) + { + result.elem (j) = FloatComplex_NaN_result; + idx_arg.elem (j) = 0; + } + else + { + result.elem (j) = tmp_max; + idx_arg.elem (j) = idx_i; + } + } + } + + return result; +} + +// i/o + +std::ostream& +operator << (std::ostream& os, const FloatComplexMatrix& a) +{ + for (octave_idx_type i = 0; i < a.rows (); i++) + { + for (octave_idx_type j = 0; j < a.cols (); j++) + { + os << " "; + octave_write_complex (os, a.elem (i, j)); + } + os << "\n"; + } + return os; +} + +std::istream& +operator >> (std::istream& is, FloatComplexMatrix& a) +{ + octave_idx_type nr = a.rows (); + octave_idx_type nc = a.cols (); + + if (nr < 1 || nc < 1) + is.clear (std::ios::badbit); + else + { + FloatComplex tmp; + for (octave_idx_type i = 0; i < nr; i++) + for (octave_idx_type j = 0; j < nc; j++) + { + tmp = octave_read_complex (is); + if (is) + a.elem (i, j) = tmp; + else + goto done; + } + } + +done: + + return is; +} + +FloatComplexMatrix +Givens (const FloatComplex& x, const FloatComplex& y) +{ + float cc; + FloatComplex cs, temp_r; + + F77_FUNC (clartg, CLARTG) (x, y, cc, cs, temp_r); + + FloatComplexMatrix g (2, 2); + + g.elem (0, 0) = cc; + g.elem (1, 1) = cc; + g.elem (0, 1) = cs; + g.elem (1, 0) = -conj (cs); + + return g; +} + +FloatComplexMatrix +Sylvester (const FloatComplexMatrix& a, const FloatComplexMatrix& b, + const FloatComplexMatrix& c) +{ + FloatComplexMatrix retval; + + // FIXME -- need to check that a, b, and c are all the same + // size. + + // Compute Schur decompositions + + FloatComplexSCHUR as (a, "U"); + FloatComplexSCHUR bs (b, "U"); + + // Transform c to new coordinates. + + FloatComplexMatrix ua = as.unitary_matrix (); + FloatComplexMatrix sch_a = as.schur_matrix (); + + FloatComplexMatrix ub = bs.unitary_matrix (); + FloatComplexMatrix sch_b = bs.schur_matrix (); + + FloatComplexMatrix cx = ua.hermitian () * c * ub; + + // Solve the sylvester equation, back-transform, and return the + // solution. + + octave_idx_type a_nr = a.rows (); + octave_idx_type b_nr = b.rows (); + + float scale; + octave_idx_type info; + + FloatComplex *pa = sch_a.fortran_vec (); + FloatComplex *pb = sch_b.fortran_vec (); + FloatComplex *px = cx.fortran_vec (); + + F77_XFCN (ctrsyl, CTRSYL, (F77_CONST_CHAR_ARG2 ("N", 1), + F77_CONST_CHAR_ARG2 ("N", 1), + 1, a_nr, b_nr, pa, a_nr, pb, + b_nr, px, a_nr, scale, info + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1))); + + // FIXME -- check info? + + retval = -ua * cx * ub.hermitian (); + + return retval; +} + +FloatComplexMatrix +operator * (const FloatComplexMatrix& m, const FloatMatrix& a) +{ + FloatComplexMatrix tmp (a); + return m * tmp; +} + +FloatComplexMatrix +operator * (const FloatMatrix& m, const FloatComplexMatrix& a) +{ + FloatComplexMatrix tmp (m); + return tmp * a; +} + +/* Simple Dot Product, Matrix-Vector and Matrix-Matrix Unit tests +%!assert([1+i 2+i 3+i] * [ 4+i ; 5+i ; 6+i], 29+21i, 1e-14) +%!assert([1+i 2+i ; 3+i 4+i ] * [5+i ; 6+i], [15 + 14i ; 37 + 18i], 1e-14) +%!assert([1+i 2+i ; 3+i 4+i ] * [5+i 6+i ; 7+i 8+i], [17 + 15i 20 + 17i; 41 + 19i 48 + 21i], 1e-14) +*/ + +/* Test some simple identities +%!shared M, cv, rv +%! M = randn(10,10)+i*rand(10,10); +%! cv = randn(10,1)+i*rand(10,1); +%! rv = randn(1,10)+i*rand(1,10); +%!assert([M*cv,M*cv],M*[cv,cv],1e-14) +%!assert([rv*M;rv*M],[rv;rv]*M,1e-14) +%!assert(2*rv*cv,[rv,rv]*[cv;cv],1e-14) +*/ + +FloatComplexMatrix +operator * (const FloatComplexMatrix& m, const FloatComplexMatrix& a) +{ + FloatComplexMatrix retval; + + octave_idx_type nr = m.rows (); + octave_idx_type nc = m.cols (); + + octave_idx_type a_nr = a.rows (); + octave_idx_type a_nc = a.cols (); + + if (nc != a_nr) + gripe_nonconformant ("operator *", nr, nc, a_nr, a_nc); + else + { + if (nr == 0 || nc == 0 || a_nc == 0) + retval.resize (nr, a_nc, 0.0); + else + { + octave_idx_type ld = nr; + octave_idx_type lda = a.rows (); + + retval.resize (nr, a_nc); + FloatComplex *c = retval.fortran_vec (); + + if (a_nc == 1) + { + if (nr == 1) + F77_FUNC (xcdotu, XCDOTU) (nc, m.data (), 1, a.data (), 1, *c); + else + { + F77_XFCN (cgemv, CGEMV, (F77_CONST_CHAR_ARG2 ("N", 1), + nr, nc, 1.0, m.data (), ld, + a.data (), 1, 0.0, c, 1 + F77_CHAR_ARG_LEN (1))); + } + } + else + { + F77_XFCN (cgemm, CGEMM, (F77_CONST_CHAR_ARG2 ("N", 1), + F77_CONST_CHAR_ARG2 ("N", 1), + nr, a_nc, nc, 1.0, m.data (), + ld, a.data (), lda, 0.0, c, nr + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1))); + } + } + } + + return retval; +} + +// FIXME -- it would be nice to share code among the min/max +// functions below. + +#define EMPTY_RETURN_CHECK(T) \ + if (nr == 0 || nc == 0) \ + return T (nr, nc); + +FloatComplexMatrix +min (const FloatComplex& c, const FloatComplexMatrix& m) +{ + octave_idx_type nr = m.rows (); + octave_idx_type nc = m.columns (); + + EMPTY_RETURN_CHECK (FloatComplexMatrix); + + FloatComplexMatrix result (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + { + OCTAVE_QUIT; + result (i, j) = xmin (c, m (i, j)); + } + + return result; +} + +FloatComplexMatrix +min (const FloatComplexMatrix& m, const FloatComplex& c) +{ + octave_idx_type nr = m.rows (); + octave_idx_type nc = m.columns (); + + EMPTY_RETURN_CHECK (FloatComplexMatrix); + + FloatComplexMatrix result (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + { + OCTAVE_QUIT; + result (i, j) = xmin (m (i, j), c); + } + + return result; +} + +FloatComplexMatrix +min (const FloatComplexMatrix& a, const FloatComplexMatrix& b) +{ + octave_idx_type nr = a.rows (); + octave_idx_type nc = a.columns (); + + if (nr != b.rows () || nc != b.columns ()) + { + (*current_liboctave_error_handler) + ("two-arg min expecting args of same size"); + return FloatComplexMatrix (); + } + + EMPTY_RETURN_CHECK (FloatComplexMatrix); + + FloatComplexMatrix result (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + { + int columns_are_real_only = 1; + for (octave_idx_type i = 0; i < nr; i++) + { + OCTAVE_QUIT; + if (std::imag (a (i, j)) != 0.0 || std::imag (b (i, j)) != 0.0) + { + columns_are_real_only = 0; + break; + } + } + + if (columns_are_real_only) + { + for (octave_idx_type i = 0; i < nr; i++) + result (i, j) = xmin (std::real (a (i, j)), std::real (b (i, j))); + } + else + { + for (octave_idx_type i = 0; i < nr; i++) + { + OCTAVE_QUIT; + result (i, j) = xmin (a (i, j), b (i, j)); + } + } + } + + return result; +} + +FloatComplexMatrix +max (const FloatComplex& c, const FloatComplexMatrix& m) +{ + octave_idx_type nr = m.rows (); + octave_idx_type nc = m.columns (); + + EMPTY_RETURN_CHECK (FloatComplexMatrix); + + FloatComplexMatrix result (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + { + OCTAVE_QUIT; + result (i, j) = xmax (c, m (i, j)); + } + + return result; +} + +FloatComplexMatrix +max (const FloatComplexMatrix& m, const FloatComplex& c) +{ + octave_idx_type nr = m.rows (); + octave_idx_type nc = m.columns (); + + EMPTY_RETURN_CHECK (FloatComplexMatrix); + + FloatComplexMatrix result (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + { + OCTAVE_QUIT; + result (i, j) = xmax (m (i, j), c); + } + + return result; +} + +FloatComplexMatrix +max (const FloatComplexMatrix& a, const FloatComplexMatrix& b) +{ + octave_idx_type nr = a.rows (); + octave_idx_type nc = a.columns (); + + if (nr != b.rows () || nc != b.columns ()) + { + (*current_liboctave_error_handler) + ("two-arg max expecting args of same size"); + return FloatComplexMatrix (); + } + + EMPTY_RETURN_CHECK (FloatComplexMatrix); + + FloatComplexMatrix result (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + { + int columns_are_real_only = 1; + for (octave_idx_type i = 0; i < nr; i++) + { + OCTAVE_QUIT; + if (std::imag (a (i, j)) != 0.0 || std::imag (b (i, j)) != 0.0) + { + columns_are_real_only = 0; + break; + } + } + + if (columns_are_real_only) + { + for (octave_idx_type i = 0; i < nr; i++) + { + OCTAVE_QUIT; + result (i, j) = xmax (std::real (a (i, j)), std::real (b (i, j))); + } + } + else + { + for (octave_idx_type i = 0; i < nr; i++) + { + OCTAVE_QUIT; + result (i, j) = xmax (a (i, j), b (i, j)); + } + } + } + + return result; +} + +MS_CMP_OPS(FloatComplexMatrix, std::real, FloatComplex, std::real) +MS_BOOL_OPS(FloatComplexMatrix, FloatComplex, static_cast (0.0)) + +SM_CMP_OPS(FloatComplex, std::real, FloatComplexMatrix, std::real) +SM_BOOL_OPS(FloatComplex, FloatComplexMatrix, static_cast (0.0)) + +MM_CMP_OPS(FloatComplexMatrix, std::real, FloatComplexMatrix, std::real) +MM_BOOL_OPS(FloatComplexMatrix, FloatComplexMatrix, static_cast (0.0)) + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 liboctave/fCMatrix.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/fCMatrix.h Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,417 @@ +/* + +Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2002, 2003, + 2004, 2005, 2006, 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#if !defined (octave_FloatComplexMatrix_h) +#define octave_FloatComplexMatrix_h 1 + +#include "MArray2.h" +#include "MDiagArray2.h" +#include "MatrixType.h" + +#include "mx-defs.h" +#include "mx-op-defs.h" +#include "oct-cmplx.h" + +class +OCTAVE_API +FloatComplexMatrix : public MArray2 +{ +public: + + typedef void (*solve_singularity_handler) (float rcond); + + FloatComplexMatrix (void) : MArray2 () { } + + FloatComplexMatrix (octave_idx_type r, octave_idx_type c) : MArray2 (r, c) { } + + FloatComplexMatrix (octave_idx_type r, octave_idx_type c, const FloatComplex& val) + : MArray2 (r, c, val) { } + + FloatComplexMatrix (const dim_vector& dv) : MArray2 (dv) { } + + FloatComplexMatrix (const dim_vector& dv, const FloatComplex& val) + : MArray2 (dv, val) { } + + FloatComplexMatrix (const FloatComplexMatrix& a) : MArray2 (a) { } + + template + FloatComplexMatrix (const MArray2& a) : MArray2 (a) { } + + template + FloatComplexMatrix (const Array2& a) : MArray2 (a) { } + + explicit FloatComplexMatrix (const FloatMatrix& a); + + explicit FloatComplexMatrix (const FloatRowVector& rv); + + explicit FloatComplexMatrix (const FloatColumnVector& cv); + + explicit FloatComplexMatrix (const FloatDiagMatrix& a); + + explicit FloatComplexMatrix (const FloatComplexRowVector& rv); + + explicit FloatComplexMatrix (const FloatComplexColumnVector& cv); + + explicit FloatComplexMatrix (const FloatComplexDiagMatrix& a); + + explicit FloatComplexMatrix (const boolMatrix& a); + + explicit FloatComplexMatrix (const charMatrix& a); + + FloatComplexMatrix& operator = (const FloatComplexMatrix& a) + { + MArray2::operator = (a); + return *this; + } + + bool operator == (const FloatComplexMatrix& a) const; + bool operator != (const FloatComplexMatrix& a) const; + + bool is_hermitian (void) const; + + // destructive insert/delete/reorder operations + + FloatComplexMatrix& insert (const FloatMatrix& a, octave_idx_type r, octave_idx_type c); + FloatComplexMatrix& insert (const FloatRowVector& a, octave_idx_type r, octave_idx_type c); + FloatComplexMatrix& insert (const FloatColumnVector& a, octave_idx_type r, octave_idx_type c); + FloatComplexMatrix& insert (const FloatDiagMatrix& a, octave_idx_type r, octave_idx_type c); + + FloatComplexMatrix& insert (const FloatComplexMatrix& a, octave_idx_type r, octave_idx_type c); + FloatComplexMatrix& insert (const FloatComplexRowVector& a, octave_idx_type r, octave_idx_type c); + FloatComplexMatrix& insert (const FloatComplexColumnVector& a, octave_idx_type r, octave_idx_type c); + FloatComplexMatrix& insert (const FloatComplexDiagMatrix& a, octave_idx_type r, octave_idx_type c); + + FloatComplexMatrix& fill (float val); + FloatComplexMatrix& fill (const FloatComplex& val); + FloatComplexMatrix& fill (float val, octave_idx_type r1, octave_idx_type c1, octave_idx_type r2, octave_idx_type c2); + FloatComplexMatrix& fill (const FloatComplex& val, octave_idx_type r1, octave_idx_type c1, octave_idx_type r2, octave_idx_type c2); + + FloatComplexMatrix append (const FloatMatrix& a) const; + FloatComplexMatrix append (const FloatRowVector& a) const; + FloatComplexMatrix append (const FloatColumnVector& a) const; + FloatComplexMatrix append (const FloatDiagMatrix& a) const; + + FloatComplexMatrix append (const FloatComplexMatrix& a) const; + FloatComplexMatrix append (const FloatComplexRowVector& a) const; + FloatComplexMatrix append (const FloatComplexColumnVector& a) const; + FloatComplexMatrix append (const FloatComplexDiagMatrix& a) const; + + FloatComplexMatrix stack (const FloatMatrix& a) const; + FloatComplexMatrix stack (const FloatRowVector& a) const; + FloatComplexMatrix stack (const FloatColumnVector& a) const; + FloatComplexMatrix stack (const FloatDiagMatrix& a) const; + + FloatComplexMatrix stack (const FloatComplexMatrix& a) const; + FloatComplexMatrix stack (const FloatComplexRowVector& a) const; + FloatComplexMatrix stack (const FloatComplexColumnVector& a) const; + FloatComplexMatrix stack (const FloatComplexDiagMatrix& a) const; + + FloatComplexMatrix hermitian (void) const + { return MArray2::hermitian (std::conj); } + FloatComplexMatrix transpose (void) const + { return MArray2::transpose (); } + + friend FloatComplexMatrix conj (const FloatComplexMatrix& a); + + // resize is the destructive equivalent for this one + + FloatComplexMatrix extract (octave_idx_type r1, octave_idx_type c1, octave_idx_type r2, octave_idx_type c2) const; + + FloatComplexMatrix extract_n (octave_idx_type r1, octave_idx_type c1, octave_idx_type nr, octave_idx_type nc) const; + + // extract row or column i. + + FloatComplexRowVector row (octave_idx_type i) const; + + FloatComplexColumnVector column (octave_idx_type i) const; + +private: + FloatComplexMatrix tinverse (MatrixType &mattype, octave_idx_type& info, + float& rcond, int force, int calc_cond) const; + + FloatComplexMatrix finverse (MatrixType &mattype, octave_idx_type& info, + float& rcond, int force, int calc_cond) const; + +public: + FloatComplexMatrix inverse (void) const; + FloatComplexMatrix inverse (octave_idx_type& info) const; + FloatComplexMatrix inverse (octave_idx_type& info, float& rcond, int force = 0, + int calc_cond = 1) const; + + FloatComplexMatrix inverse (MatrixType &mattype) const; + FloatComplexMatrix inverse (MatrixType &mattype, octave_idx_type& info) const; + FloatComplexMatrix inverse (MatrixType &mattype, octave_idx_type& info, + float& rcond, int force = 0, + int calc_cond = 1) const; + + FloatComplexMatrix pseudo_inverse (float tol = 0.0) const; + + FloatComplexMatrix fourier (void) const; + FloatComplexMatrix ifourier (void) const; + + FloatComplexMatrix fourier2d (void) const; + FloatComplexMatrix ifourier2d (void) const; + + FloatComplexDET determinant (void) const; + FloatComplexDET determinant (octave_idx_type& info) const; + FloatComplexDET determinant (octave_idx_type& info, float& rcond, int calc_cond = 1) const; + +private: + // Upper triangular matrix solvers + FloatComplexMatrix utsolve (MatrixType &typ, const FloatComplexMatrix& b, + octave_idx_type& info, float& rcond, + solve_singularity_handler sing_handler, + bool calc_cond = false) const; + + // Lower triangular matrix solvers + FloatComplexMatrix ltsolve (MatrixType &typ, const FloatComplexMatrix& b, + octave_idx_type& info, float& rcond, + solve_singularity_handler sing_handler, + bool calc_cond = false) const; + + // Full matrix solvers (umfpack/cholesky) + FloatComplexMatrix fsolve (MatrixType &typ, const FloatComplexMatrix& b, + octave_idx_type& info, float& rcond, + solve_singularity_handler sing_handler, + bool calc_cond = false) const; + +public: + // Generic interface to solver with no probing of type + FloatComplexMatrix solve (MatrixType &typ, const FloatMatrix& b) const; + FloatComplexMatrix solve (MatrixType &typ, const FloatMatrix& b, + octave_idx_type& info) const; + FloatComplexMatrix solve (MatrixType &typ, const FloatMatrix& b, + octave_idx_type& info, float& rcond) const; + FloatComplexMatrix solve (MatrixType &typ, const FloatMatrix& b, octave_idx_type& info, + float& rcond, solve_singularity_handler sing_handler, + bool singular_fallback = true) const; + + FloatComplexMatrix solve (MatrixType &typ, const FloatComplexMatrix& b) const; + FloatComplexMatrix solve (MatrixType &typ, const FloatComplexMatrix& b, + octave_idx_type& info) const; + FloatComplexMatrix solve (MatrixType &typ, const FloatComplexMatrix& b, + octave_idx_type& info, float& rcond) const; + FloatComplexMatrix solve (MatrixType &typ, const FloatComplexMatrix& b, + octave_idx_type& info, float& rcond, + solve_singularity_handler sing_handler, + bool singular_fallback = true) const; + + FloatComplexColumnVector solve (MatrixType &typ, const FloatColumnVector& b) const; + FloatComplexColumnVector solve (MatrixType &typ, const FloatColumnVector& b, + octave_idx_type& info) const; + FloatComplexColumnVector solve (MatrixType &typ, const FloatColumnVector& b, + octave_idx_type& info, float& rcond) const; + FloatComplexColumnVector solve (MatrixType &typ, const FloatColumnVector& b, + octave_idx_type& info, float& rcond, + solve_singularity_handler sing_handler) const; + + FloatComplexColumnVector solve (MatrixType &typ, + const FloatComplexColumnVector& b) const; + FloatComplexColumnVector solve (MatrixType &typ, const FloatComplexColumnVector& b, + octave_idx_type& info) const; + FloatComplexColumnVector solve (MatrixType &typ, const FloatComplexColumnVector& b, + octave_idx_type& info, float& rcond) const; + FloatComplexColumnVector solve (MatrixType &typ, const FloatComplexColumnVector& b, + octave_idx_type& info, float& rcond, + solve_singularity_handler sing_handler) const; + + // Generic interface to solver with probing of type + FloatComplexMatrix solve (const FloatMatrix& b) const; + FloatComplexMatrix solve (const FloatMatrix& b, octave_idx_type& info) const; + FloatComplexMatrix solve (const FloatMatrix& b, octave_idx_type& info, float& rcond) const; + FloatComplexMatrix solve (const FloatMatrix& b, octave_idx_type& info, float& rcond, + solve_singularity_handler sing_handler) const; + + FloatComplexMatrix solve (const FloatComplexMatrix& b) const; + FloatComplexMatrix solve (const FloatComplexMatrix& b, octave_idx_type& info) const; + FloatComplexMatrix solve (const FloatComplexMatrix& b, octave_idx_type& info, float& rcond) const; + FloatComplexMatrix solve (const FloatComplexMatrix& b, octave_idx_type& info, float& rcond, + solve_singularity_handler sing_handler) const; + + FloatComplexColumnVector solve (const FloatColumnVector& b) const; + FloatComplexColumnVector solve (const FloatColumnVector& b, octave_idx_type& info) const; + FloatComplexColumnVector solve (const FloatColumnVector& b, octave_idx_type& info, + float& rcond) const; + FloatComplexColumnVector solve (const FloatColumnVector& b, octave_idx_type& info, float& rcond, + solve_singularity_handler sing_handler) const; + + FloatComplexColumnVector solve (const FloatComplexColumnVector& b) const; + FloatComplexColumnVector solve (const FloatComplexColumnVector& b, octave_idx_type& info) const; + FloatComplexColumnVector solve (const FloatComplexColumnVector& b, octave_idx_type& info, + float& rcond) const; + FloatComplexColumnVector solve (const FloatComplexColumnVector& b, octave_idx_type& info, + float& rcond, + solve_singularity_handler sing_handler) const; + + FloatComplexMatrix lssolve (const FloatMatrix& b) const; + FloatComplexMatrix lssolve (const FloatMatrix& b, octave_idx_type& info) const; + FloatComplexMatrix lssolve (const FloatMatrix& b, octave_idx_type& info, + octave_idx_type& rank) const; + FloatComplexMatrix lssolve (const FloatMatrix& b, octave_idx_type& info, + octave_idx_type& rank, float& rcond) const; + + FloatComplexMatrix lssolve (const FloatComplexMatrix& b) const; + FloatComplexMatrix lssolve (const FloatComplexMatrix& b, octave_idx_type& info) const; + FloatComplexMatrix lssolve (const FloatComplexMatrix& b, octave_idx_type& info, + octave_idx_type& rank) const; + FloatComplexMatrix lssolve (const FloatComplexMatrix& b, octave_idx_type& info, + octave_idx_type& rank, float& rcond) const; + + FloatComplexColumnVector lssolve (const FloatColumnVector& b) const; + FloatComplexColumnVector lssolve (const FloatColumnVector& b, + octave_idx_type& info) const; + FloatComplexColumnVector lssolve (const FloatColumnVector& b, octave_idx_type& info, + octave_idx_type& rank) const; + FloatComplexColumnVector lssolve (const FloatColumnVector& b, octave_idx_type& info, + octave_idx_type& rank, float& rcond) const; + + FloatComplexColumnVector lssolve (const FloatComplexColumnVector& b) const; + FloatComplexColumnVector lssolve (const FloatComplexColumnVector& b, + octave_idx_type& info) const; + FloatComplexColumnVector lssolve (const FloatComplexColumnVector& b, + octave_idx_type& info, + octave_idx_type& rank) const; + FloatComplexColumnVector lssolve (const FloatComplexColumnVector& b, + octave_idx_type& info, + octave_idx_type& rank, float& rcond) const; + + FloatComplexMatrix expm (void) const; + + // matrix by diagonal matrix -> matrix operations + + FloatComplexMatrix& operator += (const FloatDiagMatrix& a); + FloatComplexMatrix& operator -= (const FloatDiagMatrix& a); + + FloatComplexMatrix& operator += (const FloatComplexDiagMatrix& a); + FloatComplexMatrix& operator -= (const FloatComplexDiagMatrix& a); + + // matrix by matrix -> matrix operations + + FloatComplexMatrix& operator += (const FloatMatrix& a); + FloatComplexMatrix& operator -= (const FloatMatrix& a); + + // unary operations + + boolMatrix operator ! (void) const; + + // other operations + + typedef float (*dmapper) (const FloatComplex&); + typedef FloatComplex (*cmapper) (const FloatComplex&); + typedef bool (*bmapper) (const FloatComplex&); + + FloatMatrix map (dmapper fcn) const; + FloatComplexMatrix map (cmapper fcn) const; + boolMatrix map (bmapper fcn) const; + + bool any_element_is_inf_or_nan (void) const; + bool all_elements_are_real (void) const; + bool all_integers (float& max_val, float& min_val) const; + bool too_large_for_float (void) const; + + boolMatrix all (int dim = -1) const; + boolMatrix any (int dim = -1) const; + + FloatComplexMatrix cumprod (int dim = -1) const; + FloatComplexMatrix cumsum (int dim = -1) const; + FloatComplexMatrix prod (int dim = -1) const; + FloatComplexMatrix sum (int dim = -1) const; + FloatComplexMatrix sumsq (int dim = -1) const; + FloatMatrix abs (void) const; + + FloatComplexMatrix diag (octave_idx_type k = 0) const; + + bool row_is_real_only (octave_idx_type) const; + bool column_is_real_only (octave_idx_type) const; + + FloatComplexColumnVector row_min (void) const; + FloatComplexColumnVector row_max (void) const; + + FloatComplexColumnVector row_min (Array& index) const; + FloatComplexColumnVector row_max (Array& index) const; + + FloatComplexRowVector column_min (void) const; + FloatComplexRowVector column_max (void) const; + + FloatComplexRowVector column_min (Array& index) const; + FloatComplexRowVector column_max (Array& index) const; + + // i/o + + friend OCTAVE_API std::ostream& operator << (std::ostream& os, const FloatComplexMatrix& a); + friend OCTAVE_API std::istream& operator >> (std::istream& is, FloatComplexMatrix& a); + + static FloatComplex resize_fill_value (void) { return FloatComplex (0.0, 0.0); } + +private: + + FloatComplexMatrix (FloatComplex *d, octave_idx_type r, octave_idx_type c) : MArray2 (d, r, c) { } +}; + +// column vector by row vector -> matrix operations + +extern OCTAVE_API FloatComplexMatrix +operator * (const FloatColumnVector& a, const FloatComplexRowVector& b); + +extern OCTAVE_API FloatComplexMatrix +operator * (const FloatComplexColumnVector& a, const FloatRowVector& b); + +extern OCTAVE_API FloatComplexMatrix +operator * (const FloatComplexColumnVector& a, const FloatComplexRowVector& b); + +extern OCTAVE_API FloatComplexMatrix +Givens (const FloatComplex&, const FloatComplex&); + +extern OCTAVE_API FloatComplexMatrix +Sylvester (const FloatComplexMatrix&, const FloatComplexMatrix&, const FloatComplexMatrix&); + +extern OCTAVE_API FloatComplexMatrix operator * (const FloatMatrix&, const FloatComplexMatrix&); +extern OCTAVE_API FloatComplexMatrix operator * (const FloatComplexMatrix&, const FloatMatrix&); +extern OCTAVE_API FloatComplexMatrix operator * (const FloatComplexMatrix&, const FloatComplexMatrix&); + +extern OCTAVE_API FloatComplexMatrix min (const FloatComplex& c, const FloatComplexMatrix& m); +extern OCTAVE_API FloatComplexMatrix min (const FloatComplexMatrix& m, const FloatComplex& c); +extern OCTAVE_API FloatComplexMatrix min (const FloatComplexMatrix& a, const FloatComplexMatrix& b); + +extern OCTAVE_API FloatComplexMatrix max (const FloatComplex& c, const FloatComplexMatrix& m); +extern OCTAVE_API FloatComplexMatrix max (const FloatComplexMatrix& m, const FloatComplex& c); +extern OCTAVE_API FloatComplexMatrix max (const FloatComplexMatrix& a, const FloatComplexMatrix& b); + +MS_CMP_OP_DECLS (FloatComplexMatrix, FloatComplex, OCTAVE_API) +MS_BOOL_OP_DECLS (FloatComplexMatrix, FloatComplex, OCTAVE_API) + +SM_CMP_OP_DECLS (FloatComplex, FloatComplexMatrix, OCTAVE_API) +SM_BOOL_OP_DECLS (FloatComplex, FloatComplexMatrix, OCTAVE_API) + +MM_CMP_OP_DECLS (FloatComplexMatrix, FloatComplexMatrix, OCTAVE_API) +MM_BOOL_OP_DECLS (FloatComplexMatrix, FloatComplexMatrix, OCTAVE_API) + +MARRAY_FORWARD_DEFS (MArray2, FloatComplexMatrix, FloatComplex) + +#endif + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 liboctave/fCNDArray.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/fCNDArray.cc Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,1200 @@ +// N-D Array manipulations. +/* + +Copyright (C) 1996, 1997, 2003, 2004, 2005, 2006, 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +#include + +#include "Array-util.h" +#include "fCNDArray.h" +#include "mx-base.h" +#include "f77-fcn.h" +#include "functor.h" +#include "lo-ieee.h" +#include "lo-mappers.h" + +#if defined (HAVE_FFTW3) +#include "oct-fftw.h" +#else +extern "C" +{ + F77_RET_T + F77_FUNC (cffti, CFFTI) (const octave_idx_type&, FloatComplex*); + + F77_RET_T + F77_FUNC (cfftf, CFFTF) (const octave_idx_type&, FloatComplex*, FloatComplex*); + + F77_RET_T + F77_FUNC (cfftb, CFFTB) (const octave_idx_type&, FloatComplex*, FloatComplex*); +} +#endif + +#if defined (HAVE_FFTW3) +FloatComplexNDArray +FloatComplexNDArray::fourier (int dim) const +{ + dim_vector dv = dims (); + + if (dim > dv.length () || dim < 0) + return FloatComplexNDArray (); + + octave_idx_type stride = 1; + octave_idx_type n = dv(dim); + + for (int i = 0; i < dim; i++) + stride *= dv(i); + + octave_idx_type howmany = numel () / dv (dim); + howmany = (stride == 1 ? howmany : (howmany > stride ? stride : howmany)); + octave_idx_type nloop = (stride == 1 ? 1 : numel () / dv (dim) / stride); + octave_idx_type dist = (stride == 1 ? n : 1); + + const FloatComplex *in (fortran_vec ()); + FloatComplexNDArray retval (dv); + FloatComplex *out (retval.fortran_vec ()); + + // Need to be careful here about the distance between fft's + for (octave_idx_type k = 0; k < nloop; k++) + octave_fftw::fft (in + k * stride * n, out + k * stride * n, + n, howmany, stride, dist); + + return retval; +} + +FloatComplexNDArray +FloatComplexNDArray::ifourier (int dim) const +{ + dim_vector dv = dims (); + + if (dim > dv.length () || dim < 0) + return FloatComplexNDArray (); + + octave_idx_type stride = 1; + octave_idx_type n = dv(dim); + + for (int i = 0; i < dim; i++) + stride *= dv(i); + + octave_idx_type howmany = numel () / dv (dim); + howmany = (stride == 1 ? howmany : (howmany > stride ? stride : howmany)); + octave_idx_type nloop = (stride == 1 ? 1 : numel () / dv (dim) / stride); + octave_idx_type dist = (stride == 1 ? n : 1); + + const FloatComplex *in (fortran_vec ()); + FloatComplexNDArray retval (dv); + FloatComplex *out (retval.fortran_vec ()); + + // Need to be careful here about the distance between fft's + for (octave_idx_type k = 0; k < nloop; k++) + octave_fftw::ifft (in + k * stride * n, out + k * stride * n, + n, howmany, stride, dist); + + return retval; +} + +FloatComplexNDArray +FloatComplexNDArray::fourier2d (void) const +{ + dim_vector dv = dims(); + if (dv.length () < 2) + return FloatComplexNDArray (); + + dim_vector dv2(dv(0), dv(1)); + const FloatComplex *in = fortran_vec (); + FloatComplexNDArray retval (dv); + FloatComplex *out = retval.fortran_vec (); + octave_idx_type howmany = numel() / dv(0) / dv(1); + octave_idx_type dist = dv(0) * dv(1); + + for (octave_idx_type i=0; i < howmany; i++) + octave_fftw::fftNd (in + i*dist, out + i*dist, 2, dv2); + + return retval; +} + +FloatComplexNDArray +FloatComplexNDArray::ifourier2d (void) const +{ + dim_vector dv = dims(); + if (dv.length () < 2) + return FloatComplexNDArray (); + + dim_vector dv2(dv(0), dv(1)); + const FloatComplex *in = fortran_vec (); + FloatComplexNDArray retval (dv); + FloatComplex *out = retval.fortran_vec (); + octave_idx_type howmany = numel() / dv(0) / dv(1); + octave_idx_type dist = dv(0) * dv(1); + + for (octave_idx_type i=0; i < howmany; i++) + octave_fftw::ifftNd (in + i*dist, out + i*dist, 2, dv2); + + return retval; +} + +FloatComplexNDArray +FloatComplexNDArray::fourierNd (void) const +{ + dim_vector dv = dims (); + int rank = dv.length (); + + const FloatComplex *in (fortran_vec ()); + FloatComplexNDArray retval (dv); + FloatComplex *out (retval.fortran_vec ()); + + octave_fftw::fftNd (in, out, rank, dv); + + return retval; +} + +FloatComplexNDArray +FloatComplexNDArray::ifourierNd (void) const +{ + dim_vector dv = dims (); + int rank = dv.length (); + + const FloatComplex *in (fortran_vec ()); + FloatComplexNDArray retval (dv); + FloatComplex *out (retval.fortran_vec ()); + + octave_fftw::ifftNd (in, out, rank, dv); + + return retval; +} + +#else +FloatComplexNDArray +FloatComplexNDArray::fourier (int dim) const +{ + dim_vector dv = dims (); + + if (dim > dv.length () || dim < 0) + return FloatComplexNDArray (); + + FloatComplexNDArray retval (dv); + octave_idx_type npts = dv(dim); + octave_idx_type nn = 4*npts+15; + Array wsave (nn); + FloatComplex *pwsave = wsave.fortran_vec (); + + OCTAVE_LOCAL_BUFFER (FloatComplex, tmp, npts); + + octave_idx_type stride = 1; + + for (int i = 0; i < dim; i++) + stride *= dv(i); + + octave_idx_type howmany = numel () / npts; + howmany = (stride == 1 ? howmany : (howmany > stride ? stride : howmany)); + octave_idx_type nloop = (stride == 1 ? 1 : numel () / npts / stride); + octave_idx_type dist = (stride == 1 ? npts : 1); + + F77_FUNC (cffti, CFFTI) (npts, pwsave); + + for (octave_idx_type k = 0; k < nloop; k++) + { + for (octave_idx_type j = 0; j < howmany; j++) + { + OCTAVE_QUIT; + + for (octave_idx_type i = 0; i < npts; i++) + tmp[i] = elem((i + k*npts)*stride + j*dist); + + F77_FUNC (cfftf, CFFTF) (npts, tmp, pwsave); + + for (octave_idx_type i = 0; i < npts; i++) + retval ((i + k*npts)*stride + j*dist) = tmp[i]; + } + } + + return retval; +} + +FloatComplexNDArray +FloatComplexNDArray::ifourier (int dim) const +{ + dim_vector dv = dims (); + + if (dim > dv.length () || dim < 0) + return FloatComplexNDArray (); + + FloatComplexNDArray retval (dv); + octave_idx_type npts = dv(dim); + octave_idx_type nn = 4*npts+15; + Array wsave (nn); + FloatComplex *pwsave = wsave.fortran_vec (); + + OCTAVE_LOCAL_BUFFER (FloatComplex, tmp, npts); + + octave_idx_type stride = 1; + + for (int i = 0; i < dim; i++) + stride *= dv(i); + + octave_idx_type howmany = numel () / npts; + howmany = (stride == 1 ? howmany : (howmany > stride ? stride : howmany)); + octave_idx_type nloop = (stride == 1 ? 1 : numel () / npts / stride); + octave_idx_type dist = (stride == 1 ? npts : 1); + + F77_FUNC (cffti, CFFTI) (npts, pwsave); + + for (octave_idx_type k = 0; k < nloop; k++) + { + for (octave_idx_type j = 0; j < howmany; j++) + { + OCTAVE_QUIT; + + for (octave_idx_type i = 0; i < npts; i++) + tmp[i] = elem((i + k*npts)*stride + j*dist); + + F77_FUNC (cfftb, CFFTB) (npts, tmp, pwsave); + + for (octave_idx_type i = 0; i < npts; i++) + retval ((i + k*npts)*stride + j*dist) = tmp[i] / + static_cast (npts); + } + } + + return retval; +} + +FloatComplexNDArray +FloatComplexNDArray::fourier2d (void) const +{ + dim_vector dv = dims (); + dim_vector dv2 (dv(0), dv(1)); + int rank = 2; + FloatComplexNDArray retval (*this); + octave_idx_type stride = 1; + + for (int i = 0; i < rank; i++) + { + octave_idx_type npts = dv2(i); + octave_idx_type nn = 4*npts+15; + Array wsave (nn); + FloatComplex *pwsave = wsave.fortran_vec (); + Array row (npts); + FloatComplex *prow = row.fortran_vec (); + + octave_idx_type howmany = numel () / npts; + howmany = (stride == 1 ? howmany : + (howmany > stride ? stride : howmany)); + octave_idx_type nloop = (stride == 1 ? 1 : numel () / npts / stride); + octave_idx_type dist = (stride == 1 ? npts : 1); + + F77_FUNC (cffti, CFFTI) (npts, pwsave); + + for (octave_idx_type k = 0; k < nloop; k++) + { + for (octave_idx_type j = 0; j < howmany; j++) + { + OCTAVE_QUIT; + + for (octave_idx_type l = 0; l < npts; l++) + prow[l] = retval ((l + k*npts)*stride + j*dist); + + F77_FUNC (cfftf, CFFTF) (npts, prow, pwsave); + + for (octave_idx_type l = 0; l < npts; l++) + retval ((l + k*npts)*stride + j*dist) = prow[l]; + } + } + + stride *= dv2(i); + } + + return retval; +} + +FloatComplexNDArray +FloatComplexNDArray::ifourier2d (void) const +{ + dim_vector dv = dims(); + dim_vector dv2 (dv(0), dv(1)); + int rank = 2; + FloatComplexNDArray retval (*this); + octave_idx_type stride = 1; + + for (int i = 0; i < rank; i++) + { + octave_idx_type npts = dv2(i); + octave_idx_type nn = 4*npts+15; + Array wsave (nn); + FloatComplex *pwsave = wsave.fortran_vec (); + Array row (npts); + FloatComplex *prow = row.fortran_vec (); + + octave_idx_type howmany = numel () / npts; + howmany = (stride == 1 ? howmany : + (howmany > stride ? stride : howmany)); + octave_idx_type nloop = (stride == 1 ? 1 : numel () / npts / stride); + octave_idx_type dist = (stride == 1 ? npts : 1); + + F77_FUNC (cffti, CFFTI) (npts, pwsave); + + for (octave_idx_type k = 0; k < nloop; k++) + { + for (octave_idx_type j = 0; j < howmany; j++) + { + OCTAVE_QUIT; + + for (octave_idx_type l = 0; l < npts; l++) + prow[l] = retval ((l + k*npts)*stride + j*dist); + + F77_FUNC (cfftb, CFFTB) (npts, prow, pwsave); + + for (octave_idx_type l = 0; l < npts; l++) + retval ((l + k*npts)*stride + j*dist) = prow[l] / + static_cast (npts); + } + } + + stride *= dv2(i); + } + + return retval; +} + +FloatComplexNDArray +FloatComplexNDArray::fourierNd (void) const +{ + dim_vector dv = dims (); + int rank = dv.length (); + FloatComplexNDArray retval (*this); + octave_idx_type stride = 1; + + for (int i = 0; i < rank; i++) + { + octave_idx_type npts = dv(i); + octave_idx_type nn = 4*npts+15; + Array wsave (nn); + FloatComplex *pwsave = wsave.fortran_vec (); + Array row (npts); + FloatComplex *prow = row.fortran_vec (); + + octave_idx_type howmany = numel () / npts; + howmany = (stride == 1 ? howmany : + (howmany > stride ? stride : howmany)); + octave_idx_type nloop = (stride == 1 ? 1 : numel () / npts / stride); + octave_idx_type dist = (stride == 1 ? npts : 1); + + F77_FUNC (cffti, CFFTI) (npts, pwsave); + + for (octave_idx_type k = 0; k < nloop; k++) + { + for (octave_idx_type j = 0; j < howmany; j++) + { + OCTAVE_QUIT; + + for (octave_idx_type l = 0; l < npts; l++) + prow[l] = retval ((l + k*npts)*stride + j*dist); + + F77_FUNC (cfftf, CFFTF) (npts, prow, pwsave); + + for (octave_idx_type l = 0; l < npts; l++) + retval ((l + k*npts)*stride + j*dist) = prow[l]; + } + } + + stride *= dv(i); + } + + return retval; +} + +FloatComplexNDArray +FloatComplexNDArray::ifourierNd (void) const +{ + dim_vector dv = dims (); + int rank = dv.length (); + FloatComplexNDArray retval (*this); + octave_idx_type stride = 1; + + for (int i = 0; i < rank; i++) + { + octave_idx_type npts = dv(i); + octave_idx_type nn = 4*npts+15; + Array wsave (nn); + FloatComplex *pwsave = wsave.fortran_vec (); + Array row (npts); + FloatComplex *prow = row.fortran_vec (); + + octave_idx_type howmany = numel () / npts; + howmany = (stride == 1 ? howmany : + (howmany > stride ? stride : howmany)); + octave_idx_type nloop = (stride == 1 ? 1 : numel () / npts / stride); + octave_idx_type dist = (stride == 1 ? npts : 1); + + F77_FUNC (cffti, CFFTI) (npts, pwsave); + + for (octave_idx_type k = 0; k < nloop; k++) + { + for (octave_idx_type j = 0; j < howmany; j++) + { + OCTAVE_QUIT; + + for (octave_idx_type l = 0; l < npts; l++) + prow[l] = retval ((l + k*npts)*stride + j*dist); + + F77_FUNC (cfftb, CFFTB) (npts, prow, pwsave); + + for (octave_idx_type l = 0; l < npts; l++) + retval ((l + k*npts)*stride + j*dist) = prow[l] / + static_cast (npts); + } + } + + stride *= dv(i); + } + + return retval; +} + +#endif + +// unary operations + +boolNDArray +FloatComplexNDArray::operator ! (void) const +{ + boolNDArray b (dims ()); + + for (octave_idx_type i = 0; i < length (); i++) + b.elem (i) = elem (i) == static_cast (0.0); + + return b; +} + +// FIXME -- this is not quite the right thing. + +bool +FloatComplexNDArray::any_element_is_inf_or_nan (void) const +{ + octave_idx_type nel = nelem (); + + for (octave_idx_type i = 0; i < nel; i++) + { + FloatComplex val = elem (i); + if (xisinf (val) || xisnan (val)) + return true; + } + return false; +} + +// Return true if no elements have imaginary components. + +bool +FloatComplexNDArray::all_elements_are_real (void) const +{ + octave_idx_type nel = nelem (); + + for (octave_idx_type i = 0; i < nel; i++) + { + float ip = std::imag (elem (i)); + + if (ip != 0.0 || lo_ieee_signbit (ip)) + return false; + } + + return true; +} + +// Return nonzero if any element of CM has a non-integer real or +// imaginary part. Also extract the largest and smallest (real or +// imaginary) values and return them in MAX_VAL and MIN_VAL. + +bool +FloatComplexNDArray::all_integers (float& max_val, float& min_val) const +{ + octave_idx_type nel = nelem (); + + if (nel > 0) + { + FloatComplex val = elem (0); + + float r_val = std::real (val); + float i_val = std::imag (val); + + max_val = r_val; + min_val = r_val; + + if (i_val > max_val) + max_val = i_val; + + if (i_val < max_val) + min_val = i_val; + } + else + return false; + + for (octave_idx_type i = 0; i < nel; i++) + { + FloatComplex val = elem (i); + + float r_val = std::real (val); + float i_val = std::imag (val); + + if (r_val > max_val) + max_val = r_val; + + if (i_val > max_val) + max_val = i_val; + + if (r_val < min_val) + min_val = r_val; + + if (i_val < min_val) + min_val = i_val; + + if (D_NINT (r_val) != r_val || D_NINT (i_val) != i_val) + return false; + } + + return true; +} + +bool +FloatComplexNDArray::too_large_for_float (void) const +{ + octave_idx_type nel = nelem (); + + for (octave_idx_type i = 0; i < nel; i++) + { + FloatComplex val = elem (i); + + float r_val = std::real (val); + float i_val = std::imag (val); + + if ((! (xisnan (r_val) || xisinf (r_val)) + && fabs (r_val) > FLT_MAX) + || (! (xisnan (i_val) || xisinf (i_val)) + && fabs (i_val) > FLT_MAX)) + return true; + } + + return false; +} + +boolNDArray +FloatComplexNDArray::all (int dim) const +{ + MX_ND_ANY_ALL_REDUCTION + (MX_ND_ALL_EVAL (elem (iter_idx) == FloatComplex (0, 0)), true); +} + +boolNDArray +FloatComplexNDArray::any (int dim) const +{ + MX_ND_ANY_ALL_REDUCTION + (MX_ND_ANY_EVAL (elem (iter_idx) != FloatComplex (0, 0) + && ! (lo_ieee_isnan (std::real (elem (iter_idx))) + || lo_ieee_isnan (std::imag (elem (iter_idx))))), + false); +} + +FloatComplexNDArray +FloatComplexNDArray::cumprod (int dim) const +{ + MX_ND_CUMULATIVE_OP (FloatComplexNDArray, FloatComplex, FloatComplex (1, 0), *); +} + +FloatComplexNDArray +FloatComplexNDArray::cumsum (int dim) const +{ + MX_ND_CUMULATIVE_OP (FloatComplexNDArray, FloatComplex, FloatComplex (0, 0), +); +} + +FloatComplexNDArray +FloatComplexNDArray::prod (int dim) const +{ + MX_ND_REDUCTION (retval(result_idx) *= elem (iter_idx), FloatComplex (1, 0), FloatComplexNDArray); +} + +FloatComplexNDArray +FloatComplexNDArray::sumsq (int dim) const +{ + MX_ND_REDUCTION (retval(result_idx) += std::imag (elem (iter_idx)) + ? elem (iter_idx) * conj (elem (iter_idx)) + : std::pow (elem (iter_idx), 2), FloatComplex (0, 0), FloatComplexNDArray); +} + +FloatComplexNDArray +FloatComplexNDArray::sum (int dim) const +{ + MX_ND_REDUCTION (retval(result_idx) += elem (iter_idx), FloatComplex (0, 0), FloatComplexNDArray); +} + +FloatComplexNDArray +FloatComplexNDArray::concat (const FloatComplexNDArray& rb, const Array& ra_idx) +{ + if (rb.numel () > 0) + insert (rb, ra_idx); + return *this; +} + +FloatComplexNDArray +FloatComplexNDArray::concat (const FloatNDArray& rb, const Array& ra_idx) +{ + FloatComplexNDArray tmp (rb); + if (rb.numel () > 0) + insert (tmp, ra_idx); + return *this; +} + +FloatComplexNDArray +concat (NDArray& ra, FloatComplexNDArray& rb, const Array& ra_idx) +{ + FloatComplexNDArray retval (ra); + if (rb.numel () > 0) + retval.insert (rb, ra_idx); + return retval; +} + +static const FloatComplex FloatComplex_NaN_result (octave_Float_NaN, octave_Float_NaN); + +FloatComplexNDArray +FloatComplexNDArray::max (int dim) const +{ + ArrayN dummy_idx; + return max (dummy_idx, dim); +} + +FloatComplexNDArray +FloatComplexNDArray::max (ArrayN& idx_arg, int dim) const +{ + dim_vector dv = dims (); + dim_vector dr = dims (); + + if (dv.numel () == 0 || dim > dv.length () || dim < 0) + return FloatComplexNDArray (); + + dr(dim) = 1; + + FloatComplexNDArray result (dr); + idx_arg.resize (dr); + + octave_idx_type x_stride = 1; + octave_idx_type x_len = dv(dim); + for (int i = 0; i < dim; i++) + x_stride *= dv(i); + + for (octave_idx_type i = 0; i < dr.numel (); i++) + { + octave_idx_type x_offset; + if (x_stride == 1) + x_offset = i * x_len; + else + { + octave_idx_type x_offset2 = 0; + x_offset = i; + while (x_offset >= x_stride) + { + x_offset -= x_stride; + x_offset2++; + } + x_offset += x_offset2 * x_stride * x_len; + } + + octave_idx_type idx_j; + + FloatComplex tmp_max; + + float abs_max = octave_Float_NaN; + + for (idx_j = 0; idx_j < x_len; idx_j++) + { + tmp_max = elem (idx_j * x_stride + x_offset); + + if (! xisnan (tmp_max)) + { + abs_max = std::abs(tmp_max); + break; + } + } + + for (octave_idx_type j = idx_j+1; j < x_len; j++) + { + FloatComplex tmp = elem (j * x_stride + x_offset); + + if (xisnan (tmp)) + continue; + + float abs_tmp = std::abs (tmp); + + if (abs_tmp > abs_max) + { + idx_j = j; + tmp_max = tmp; + abs_max = abs_tmp; + } + } + + if (xisnan (tmp_max)) + { + result.elem (i) = FloatComplex_NaN_result; + idx_arg.elem (i) = 0; + } + else + { + result.elem (i) = tmp_max; + idx_arg.elem (i) = idx_j; + } + } + + result.chop_trailing_singletons (); + idx_arg.chop_trailing_singletons (); + + return result; +} + +FloatComplexNDArray +FloatComplexNDArray::min (int dim) const +{ + ArrayN dummy_idx; + return min (dummy_idx, dim); +} + +FloatComplexNDArray +FloatComplexNDArray::min (ArrayN& idx_arg, int dim) const +{ + dim_vector dv = dims (); + dim_vector dr = dims (); + + if (dv.numel () == 0 || dim > dv.length () || dim < 0) + return FloatComplexNDArray (); + + dr(dim) = 1; + + FloatComplexNDArray result (dr); + idx_arg.resize (dr); + + octave_idx_type x_stride = 1; + octave_idx_type x_len = dv(dim); + for (int i = 0; i < dim; i++) + x_stride *= dv(i); + + for (octave_idx_type i = 0; i < dr.numel (); i++) + { + octave_idx_type x_offset; + if (x_stride == 1) + x_offset = i * x_len; + else + { + octave_idx_type x_offset2 = 0; + x_offset = i; + while (x_offset >= x_stride) + { + x_offset -= x_stride; + x_offset2++; + } + x_offset += x_offset2 * x_stride * x_len; + } + + octave_idx_type idx_j; + + FloatComplex tmp_min; + + float abs_min = octave_Float_NaN; + + for (idx_j = 0; idx_j < x_len; idx_j++) + { + tmp_min = elem (idx_j * x_stride + x_offset); + + if (! xisnan (tmp_min)) + { + abs_min = std::abs(tmp_min); + break; + } + } + + for (octave_idx_type j = idx_j+1; j < x_len; j++) + { + FloatComplex tmp = elem (j * x_stride + x_offset); + + if (xisnan (tmp)) + continue; + + float abs_tmp = std::abs (tmp); + + if (abs_tmp < abs_min) + { + idx_j = j; + tmp_min = tmp; + abs_min = abs_tmp; + } + } + + if (xisnan (tmp_min)) + { + result.elem (i) = FloatComplex_NaN_result; + idx_arg.elem (i) = 0; + } + else + { + result.elem (i) = tmp_min; + idx_arg.elem (i) = idx_j; + } + } + + result.chop_trailing_singletons (); + idx_arg.chop_trailing_singletons (); + + return result; +} + +FloatNDArray +FloatComplexNDArray::abs (void) const +{ + FloatNDArray retval (dims ()); + + octave_idx_type nel = nelem (); + + for (octave_idx_type i = 0; i < nel; i++) + retval(i) = std::abs (elem (i)); + + return retval; +} + +FloatComplexNDArray& +FloatComplexNDArray::insert (const NDArray& a, octave_idx_type r, octave_idx_type c) +{ + dim_vector a_dv = a.dims (); + + int n = a_dv.length (); + + if (n == dimensions.length ()) + { + Array a_ra_idx (a_dv.length (), 0); + + a_ra_idx.elem (0) = r; + a_ra_idx.elem (1) = c; + + for (int i = 0; i < n; i++) + { + if (a_ra_idx (i) < 0 || (a_ra_idx (i) + a_dv (i)) > dimensions (i)) + { + (*current_liboctave_error_handler) + ("Array::insert: range error for insert"); + return *this; + } + } + + a_ra_idx.elem (0) = 0; + a_ra_idx.elem (1) = 0; + + octave_idx_type n_elt = a.numel (); + + // IS make_unique () NECCESSARY HERE?? + + for (octave_idx_type i = 0; i < n_elt; i++) + { + Array ra_idx = a_ra_idx; + + ra_idx.elem (0) = a_ra_idx (0) + r; + ra_idx.elem (1) = a_ra_idx (1) + c; + + elem (ra_idx) = a.elem (a_ra_idx); + + increment_index (a_ra_idx, a_dv); + } + } + else + (*current_liboctave_error_handler) + ("Array::insert: invalid indexing operation"); + + return *this; +} + +FloatComplexNDArray& +FloatComplexNDArray::insert (const FloatComplexNDArray& a, octave_idx_type r, octave_idx_type c) +{ + Array::insert (a, r, c); + return *this; +} + +FloatComplexNDArray& +FloatComplexNDArray::insert (const FloatComplexNDArray& a, const Array& ra_idx) +{ + Array::insert (a, ra_idx); + return *this; +} + +FloatComplexMatrix +FloatComplexNDArray::matrix_value (void) const +{ + FloatComplexMatrix retval; + + int nd = ndims (); + + switch (nd) + { + case 1: + retval = FloatComplexMatrix (Array2 (*this, dimensions(0), 1)); + break; + + case 2: + retval = FloatComplexMatrix (Array2 (*this, dimensions(0), + dimensions(1))); + break; + + default: + (*current_liboctave_error_handler) + ("invalid conversion of FloatComplexNDArray to FloatComplexMatrix"); + break; + } + + return retval; +} + +void +FloatComplexNDArray::increment_index (Array& ra_idx, + const dim_vector& dimensions, + int start_dimension) +{ + ::increment_index (ra_idx, dimensions, start_dimension); +} + +octave_idx_type +FloatComplexNDArray::compute_index (Array& ra_idx, + const dim_vector& dimensions) +{ + return ::compute_index (ra_idx, dimensions); +} + +FloatComplexNDArray +FloatComplexNDArray::diag (octave_idx_type k) const +{ + return MArrayN::diag (k); +} + +FloatNDArray +FloatComplexNDArray::map (dmapper fcn) const +{ + return MArrayN::map (func_ptr (fcn)); +} + +FloatComplexNDArray +FloatComplexNDArray::map (cmapper fcn) const +{ + return MArrayN::map (func_ptr (fcn)); +} + +boolNDArray +FloatComplexNDArray::map (bmapper fcn) const +{ + return MArrayN::map (func_ptr (fcn)); +} + +// This contains no information on the array structure !!! +std::ostream& +operator << (std::ostream& os, const FloatComplexNDArray& a) +{ + octave_idx_type nel = a.nelem (); + + for (octave_idx_type i = 0; i < nel; i++) + { + os << " "; + octave_write_complex (os, a.elem (i)); + os << "\n"; + } + return os; +} + +std::istream& +operator >> (std::istream& is, FloatComplexNDArray& a) +{ + octave_idx_type nel = a.nelem (); + + if (nel < 1 ) + is.clear (std::ios::badbit); + else + { + FloatComplex tmp; + for (octave_idx_type i = 0; i < nel; i++) + { + tmp = octave_read_complex (is); + if (is) + a.elem (i) = tmp; + else + goto done; + } + } + + done: + + return is; +} + +// FIXME -- it would be nice to share code among the min/max +// functions below. + +#define EMPTY_RETURN_CHECK(T) \ + if (nel == 0) \ + return T (dv); + +FloatComplexNDArray +min (const FloatComplex& c, const FloatComplexNDArray& m) +{ + dim_vector dv = m.dims (); + int nel = dv.numel (); + + EMPTY_RETURN_CHECK (FloatComplexNDArray); + + FloatComplexNDArray result (dv); + + for (int i = 0; i < nel; i++) + { + OCTAVE_QUIT; + result (i) = xmin (c, m (i)); + } + + return result; +} + +FloatComplexNDArray +min (const FloatComplexNDArray& m, const FloatComplex& c) +{ + dim_vector dv = m.dims (); + int nel = dv.numel (); + + EMPTY_RETURN_CHECK (FloatComplexNDArray); + + FloatComplexNDArray result (dv); + + for (int i = 0; i < nel; i++) + { + OCTAVE_QUIT; + result (i) = xmin (c, m (i)); + } + + return result; +} + +FloatComplexNDArray +min (const FloatComplexNDArray& a, const FloatComplexNDArray& b) +{ + dim_vector dv = a.dims (); + int nel = dv.numel (); + + if (dv != b.dims ()) + { + (*current_liboctave_error_handler) + ("two-arg min expecting args of same size"); + return FloatComplexNDArray (); + } + + EMPTY_RETURN_CHECK (FloatComplexNDArray); + + FloatComplexNDArray result (dv); + + for (int i = 0; i < nel; i++) + { + OCTAVE_QUIT; + result (i) = xmin (a (i), b (i)); + } + + return result; +} + +FloatComplexNDArray +max (const FloatComplex& c, const FloatComplexNDArray& m) +{ + dim_vector dv = m.dims (); + int nel = dv.numel (); + + EMPTY_RETURN_CHECK (FloatComplexNDArray); + + FloatComplexNDArray result (dv); + + for (int i = 0; i < nel; i++) + { + OCTAVE_QUIT; + result (i) = xmax (c, m (i)); + } + + return result; +} + +FloatComplexNDArray +max (const FloatComplexNDArray& m, const FloatComplex& c) +{ + dim_vector dv = m.dims (); + int nel = dv.numel (); + + EMPTY_RETURN_CHECK (FloatComplexNDArray); + + FloatComplexNDArray result (dv); + + for (int i = 0; i < nel; i++) + { + OCTAVE_QUIT; + result (i) = xmax (c, m (i)); + } + + return result; +} + +FloatComplexNDArray +max (const FloatComplexNDArray& a, const FloatComplexNDArray& b) +{ + dim_vector dv = a.dims (); + int nel = dv.numel (); + + if (dv != b.dims ()) + { + (*current_liboctave_error_handler) + ("two-arg max expecting args of same size"); + return FloatComplexNDArray (); + } + + EMPTY_RETURN_CHECK (FloatComplexNDArray); + + FloatComplexNDArray result (dv); + + for (int i = 0; i < nel; i++) + { + OCTAVE_QUIT; + result (i) = xmax (a (i), b (i)); + } + + return result; +} + +NDS_CMP_OPS(FloatComplexNDArray, std::real, FloatComplex, std::real) +NDS_BOOL_OPS(FloatComplexNDArray, FloatComplex, static_cast (0.0)) + +SND_CMP_OPS(FloatComplex, std::real, FloatComplexNDArray, std::real) +SND_BOOL_OPS(FloatComplex, FloatComplexNDArray, static_cast (0.0)) + +NDND_CMP_OPS(FloatComplexNDArray, std::real, FloatComplexNDArray, std::real) +NDND_BOOL_OPS(FloatComplexNDArray, FloatComplexNDArray, static_cast (0.0)) + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 liboctave/fCNDArray.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/fCNDArray.h Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,164 @@ +/* + +Copyright (C) 2003, 2004, 2005, 2006, 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#if !defined (octave_FloatComplexNDArray_h) +#define octave_FloatComplexNDArray_h 1 + +#include "MArrayN.h" +#include "fCMatrix.h" + +#include "mx-defs.h" +#include "mx-op-defs.h" + +class +OCTAVE_API +FloatComplexNDArray : public MArrayN +{ +public: + + FloatComplexNDArray (void) : MArrayN () { } + + FloatComplexNDArray (const dim_vector& dv) : MArrayN (dv) { } + + FloatComplexNDArray (const dim_vector& dv, const FloatComplex& val) + : MArrayN (dv, val) { } + + FloatComplexNDArray (const FloatComplexNDArray& a) : MArrayN (a) { } + + FloatComplexNDArray (const FloatComplexMatrix& a) : MArrayN (a) { } + + template + FloatComplexNDArray (const MArrayN& a) : MArrayN (a) { } + + template + FloatComplexNDArray (const ArrayN& a) : MArrayN (a) { } + + FloatComplexNDArray& operator = (const FloatComplexNDArray& a) + { + MArrayN::operator = (a); + return *this; + } + + // unary operations + + boolNDArray operator ! (void) const; + + // FIXME -- this is not quite the right thing. + + bool any_element_is_inf_or_nan (void) const; + bool all_elements_are_real (void) const; + bool all_integers (float& max_val, float& min_val) const; + bool too_large_for_float (void) const; + + boolNDArray all (int dim = -1) const; + boolNDArray any (int dim = -1) const; + + FloatComplexNDArray cumprod (int dim = -1) const; + FloatComplexNDArray cumsum (int dim = -1) const; + FloatComplexNDArray prod (int dim = -1) const; + FloatComplexNDArray sum (int dim = -1) const; + FloatComplexNDArray sumsq (int dim = -1) const; + FloatComplexNDArray concat (const FloatComplexNDArray& rb, const Array& ra_idx); + FloatComplexNDArray concat (const FloatNDArray& rb, const Array& ra_idx); + + FloatComplexNDArray max (int dim = 0) const; + FloatComplexNDArray max (ArrayN& index, int dim = 0) const; + FloatComplexNDArray min (int dim = 0) const; + FloatComplexNDArray min (ArrayN& index, int dim = 0) const; + FloatComplexNDArray& insert (const NDArray& a, octave_idx_type r, octave_idx_type c); + FloatComplexNDArray& insert (const FloatComplexNDArray& a, octave_idx_type r, octave_idx_type c); + FloatComplexNDArray& insert (const FloatComplexNDArray& a, const Array& ra_idx); + + FloatNDArray abs (void) const; + + FloatComplexNDArray fourier (int dim = 1) const; + FloatComplexNDArray ifourier (int dim = 1) const; + + FloatComplexNDArray fourier2d (void) const; + FloatComplexNDArray ifourier2d (void) const; + + FloatComplexNDArray fourierNd (void) const; + FloatComplexNDArray ifourierNd (void) const; + + FloatComplexMatrix matrix_value (void) const; + + FloatComplexNDArray squeeze (void) const { return MArrayN::squeeze (); } + + static void increment_index (Array& ra_idx, + const dim_vector& dimensions, + int start_dimension = 0); + + static octave_idx_type compute_index (Array& ra_idx, + const dim_vector& dimensions); + + // i/o + + friend OCTAVE_API std::ostream& operator << (std::ostream& os, const FloatComplexNDArray& a); + friend OCTAVE_API std::istream& operator >> (std::istream& is, FloatComplexNDArray& a); + + static FloatComplex resize_fill_value (void) { return FloatComplex (0.0, 0.0); } + + // bool all_elements_are_real (void) const; + // bool all_integers (float& max_val, float& min_val) const; + + FloatComplexNDArray diag (octave_idx_type k = 0) const; + + typedef float (*dmapper) (const FloatComplex&); + typedef FloatComplex (*cmapper) (const FloatComplex&); + typedef bool (*bmapper) (const FloatComplex&); + + FloatNDArray map (dmapper fcn) const; + FloatComplexNDArray map (cmapper fcn) const; + boolNDArray map (bmapper fcn) const; + +private: + + FloatComplexNDArray (FloatComplex *d, const dim_vector& dv) + : MArrayN (d, dv) { } +}; + +extern OCTAVE_API FloatComplexNDArray min (const FloatComplex& c, const FloatComplexNDArray& m); +extern OCTAVE_API FloatComplexNDArray min (const FloatComplexNDArray& m, const FloatComplex& c); +extern OCTAVE_API FloatComplexNDArray min (const FloatComplexNDArray& a, const FloatComplexNDArray& b); + +extern OCTAVE_API FloatComplexNDArray max (const FloatComplex& c, const FloatComplexNDArray& m); +extern OCTAVE_API FloatComplexNDArray max (const FloatComplexNDArray& m, const FloatComplex& c); +extern OCTAVE_API FloatComplexNDArray max (const FloatComplexNDArray& a, const FloatComplexNDArray& b); + +NDS_CMP_OP_DECLS (FloatComplexNDArray, FloatComplex, OCTAVE_API) +NDS_BOOL_OP_DECLS (FloatComplexNDArray, FloatComplex, OCTAVE_API) + +SND_CMP_OP_DECLS (FloatComplex, FloatComplexNDArray, OCTAVE_API) +SND_BOOL_OP_DECLS (FloatComplex, FloatComplexNDArray, OCTAVE_API) + +NDND_CMP_OP_DECLS (FloatComplexNDArray, FloatComplexNDArray, OCTAVE_API) +NDND_BOOL_OP_DECLS (FloatComplexNDArray, FloatComplexNDArray, OCTAVE_API) + +MARRAY_FORWARD_DEFS (MArrayN, FloatComplexNDArray, FloatComplex) + +#endif + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 liboctave/fCRowVector.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/fCRowVector.cc Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,511 @@ +// RowVector manipulations. +/* + +Copyright (C) 1994, 1995, 1996, 1997, 1999, 2000, 2001, 2002, 2003, + 2004, 2005, 2006, 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +#include "Array-util.h" +#include "f77-fcn.h" +#include "functor.h" +#include "lo-error.h" +#include "mx-base.h" +#include "mx-inlines.cc" +#include "oct-cmplx.h" + +// Fortran functions we call. + +extern "C" +{ + F77_RET_T + F77_FUNC (cgemv, CGEMV) (F77_CONST_CHAR_ARG_DECL, + const octave_idx_type&, const octave_idx_type&, const FloatComplex&, + const FloatComplex*, const octave_idx_type&, const FloatComplex*, + const octave_idx_type&, const FloatComplex&, FloatComplex*, const octave_idx_type& + F77_CHAR_ARG_LEN_DECL); + + F77_RET_T + F77_FUNC (xcdotu, XCDOTU) (const octave_idx_type&, const FloatComplex*, const octave_idx_type&, + const FloatComplex*, const octave_idx_type&, FloatComplex&); +} + +// FloatComplex Row Vector class + +FloatComplexRowVector::FloatComplexRowVector (const FloatRowVector& a) + : MArray (a.length ()) +{ + for (octave_idx_type i = 0; i < length (); i++) + elem (i) = a.elem (i); +} + +bool +FloatComplexRowVector::operator == (const FloatComplexRowVector& a) const +{ + octave_idx_type len = length (); + if (len != a.length ()) + return 0; + return mx_inline_equal (data (), a.data (), len); +} + +bool +FloatComplexRowVector::operator != (const FloatComplexRowVector& a) const +{ + return !(*this == a); +} + +// destructive insert/delete/reorder operations + +FloatComplexRowVector& +FloatComplexRowVector::insert (const FloatRowVector& a, octave_idx_type c) +{ + octave_idx_type a_len = a.length (); + + if (c < 0 || c + a_len > length ()) + { + (*current_liboctave_error_handler) ("range error for insert"); + return *this; + } + + if (a_len > 0) + { + make_unique (); + + for (octave_idx_type i = 0; i < a_len; i++) + xelem (c+i) = a.elem (i); + } + + return *this; +} + +FloatComplexRowVector& +FloatComplexRowVector::insert (const FloatComplexRowVector& a, octave_idx_type c) +{ + octave_idx_type a_len = a.length (); + + if (c < 0 || c + a_len > length ()) + { + (*current_liboctave_error_handler) ("range error for insert"); + return *this; + } + + if (a_len > 0) + { + make_unique (); + + for (octave_idx_type i = 0; i < a_len; i++) + xelem (c+i) = a.elem (i); + } + + return *this; +} + +FloatComplexRowVector& +FloatComplexRowVector::fill (float val) +{ + octave_idx_type len = length (); + + if (len > 0) + { + make_unique (); + + for (octave_idx_type i = 0; i < len; i++) + xelem (i) = val; + } + + return *this; +} + +FloatComplexRowVector& +FloatComplexRowVector::fill (const FloatComplex& val) +{ + octave_idx_type len = length (); + + if (len > 0) + { + make_unique (); + + for (octave_idx_type i = 0; i < len; i++) + xelem (i) = val; + } + + return *this; +} + +FloatComplexRowVector& +FloatComplexRowVector::fill (float val, octave_idx_type c1, octave_idx_type c2) +{ + octave_idx_type len = length (); + + if (c1 < 0 || c2 < 0 || c1 >= len || c2 >= len) + { + (*current_liboctave_error_handler) ("range error for fill"); + return *this; + } + + if (c1 > c2) { octave_idx_type tmp = c1; c1 = c2; c2 = tmp; } + + if (c2 >= c1) + { + make_unique (); + + for (octave_idx_type i = c1; i <= c2; i++) + xelem (i) = val; + } + + return *this; +} + +FloatComplexRowVector& +FloatComplexRowVector::fill (const FloatComplex& val, octave_idx_type c1, octave_idx_type c2) +{ + octave_idx_type len = length (); + + if (c1 < 0 || c2 < 0 || c1 >= len || c2 >= len) + { + (*current_liboctave_error_handler) ("range error for fill"); + return *this; + } + + if (c1 > c2) { octave_idx_type tmp = c1; c1 = c2; c2 = tmp; } + + if (c2 >= c1) + { + make_unique (); + + for (octave_idx_type i = c1; i <= c2; i++) + xelem (i) = val; + } + + return *this; +} + +FloatComplexRowVector +FloatComplexRowVector::append (const FloatRowVector& a) const +{ + octave_idx_type len = length (); + octave_idx_type nc_insert = len; + FloatComplexRowVector retval (len + a.length ()); + retval.insert (*this, 0); + retval.insert (a, nc_insert); + return retval; +} + +FloatComplexRowVector +FloatComplexRowVector::append (const FloatComplexRowVector& a) const +{ + octave_idx_type len = length (); + octave_idx_type nc_insert = len; + FloatComplexRowVector retval (len + a.length ()); + retval.insert (*this, 0); + retval.insert (a, nc_insert); + return retval; +} + +FloatComplexColumnVector +FloatComplexRowVector::hermitian (void) const +{ + return MArray::hermitian (std::conj); +} + +FloatComplexColumnVector +FloatComplexRowVector::transpose (void) const +{ + return MArray::transpose (); +} + +FloatComplexRowVector +conj (const FloatComplexRowVector& a) +{ + octave_idx_type a_len = a.length (); + FloatComplexRowVector retval; + if (a_len > 0) + retval = FloatComplexRowVector (mx_inline_conj_dup (a.data (), a_len), a_len); + return retval; +} + +// resize is the destructive equivalent for this one + +FloatComplexRowVector +FloatComplexRowVector::extract (octave_idx_type c1, octave_idx_type c2) const +{ + if (c1 > c2) { octave_idx_type tmp = c1; c1 = c2; c2 = tmp; } + + octave_idx_type new_c = c2 - c1 + 1; + + FloatComplexRowVector result (new_c); + + for (octave_idx_type i = 0; i < new_c; i++) + result.elem (i) = elem (c1+i); + + return result; +} + +FloatComplexRowVector +FloatComplexRowVector::extract_n (octave_idx_type r1, octave_idx_type n) const +{ + FloatComplexRowVector result (n); + + for (octave_idx_type i = 0; i < n; i++) + result.elem (i) = elem (r1+i); + + return result; +} + +// row vector by row vector -> row vector operations + +FloatComplexRowVector& +FloatComplexRowVector::operator += (const FloatRowVector& a) +{ + octave_idx_type len = length (); + + octave_idx_type a_len = a.length (); + + if (len != a_len) + { + gripe_nonconformant ("operator +=", len, a_len); + return *this; + } + + if (len == 0) + return *this; + + FloatComplex *d = fortran_vec (); // Ensures only one reference to my privates! + + mx_inline_add2 (d, a.data (), len); + return *this; +} + +FloatComplexRowVector& +FloatComplexRowVector::operator -= (const FloatRowVector& a) +{ + octave_idx_type len = length (); + + octave_idx_type a_len = a.length (); + + if (len != a_len) + { + gripe_nonconformant ("operator -=", len, a_len); + return *this; + } + + if (len == 0) + return *this; + + FloatComplex *d = fortran_vec (); // Ensures only one reference to my privates! + + mx_inline_subtract2 (d, a.data (), len); + return *this; +} + +// row vector by matrix -> row vector + +FloatComplexRowVector +operator * (const FloatComplexRowVector& v, const FloatComplexMatrix& a) +{ + FloatComplexRowVector retval; + + octave_idx_type len = v.length (); + + octave_idx_type a_nr = a.rows (); + octave_idx_type a_nc = a.cols (); + + if (a_nr != len) + gripe_nonconformant ("operator *", 1, len, a_nr, a_nc); + else + { + if (len == 0) + retval.resize (a_nc, 0.0); + else + { + // Transpose A to form A'*x == (x'*A)' + + octave_idx_type ld = a_nr; + + retval.resize (a_nc); + FloatComplex *y = retval.fortran_vec (); + + F77_XFCN (cgemv, CGEMV, (F77_CONST_CHAR_ARG2 ("T", 1), + a_nr, a_nc, 1.0, a.data (), + ld, v.data (), 1, 0.0, y, 1 + F77_CHAR_ARG_LEN (1))); + } + } + + return retval; +} + +FloatComplexRowVector +operator * (const FloatRowVector& v, const FloatComplexMatrix& a) +{ + FloatComplexRowVector tmp (v); + return tmp * a; +} + +// other operations + +FloatRowVector +FloatComplexRowVector::map (dmapper fcn) const +{ + return MArray::map (func_ptr (fcn)); +} + +FloatComplexRowVector +FloatComplexRowVector::map (cmapper fcn) const +{ + return MArray::map (func_ptr (fcn)); +} + +FloatComplex +FloatComplexRowVector::min (void) const +{ + octave_idx_type len = length (); + if (len == 0) + return FloatComplex (0.0); + + FloatComplex res = elem (0); + float absres = std::abs (res); + + for (octave_idx_type i = 1; i < len; i++) + if (std::abs (elem (i)) < absres) + { + res = elem (i); + absres = std::abs (res); + } + + return res; +} + +FloatComplex +FloatComplexRowVector::max (void) const +{ + octave_idx_type len = length (); + if (len == 0) + return FloatComplex (0.0); + + FloatComplex res = elem (0); + float absres = std::abs (res); + + for (octave_idx_type i = 1; i < len; i++) + if (std::abs (elem (i)) > absres) + { + res = elem (i); + absres = std::abs (res); + } + + return res; +} + +// i/o + +std::ostream& +operator << (std::ostream& os, const FloatComplexRowVector& a) +{ +// int field_width = os.precision () + 7; + for (octave_idx_type i = 0; i < a.length (); i++) + os << " " /* setw (field_width) */ << a.elem (i); + return os; +} + +std::istream& +operator >> (std::istream& is, FloatComplexRowVector& a) +{ + octave_idx_type len = a.length(); + + if (len < 1) + is.clear (std::ios::badbit); + else + { + FloatComplex tmp; + for (octave_idx_type i = 0; i < len; i++) + { + is >> tmp; + if (is) + a.elem (i) = tmp; + else + break; + } + } + return is; +} + +// row vector by column vector -> scalar + +// row vector by column vector -> scalar + +FloatComplex +operator * (const FloatComplexRowVector& v, const FloatColumnVector& a) +{ + FloatComplexColumnVector tmp (a); + return v * tmp; +} + +FloatComplex +operator * (const FloatComplexRowVector& v, const FloatComplexColumnVector& a) +{ + FloatComplex retval (0.0, 0.0); + + octave_idx_type len = v.length (); + + octave_idx_type a_len = a.length (); + + if (len != a_len) + gripe_nonconformant ("operator *", len, a_len); + else if (len != 0) + F77_FUNC (xcdotu, XCDOTU) (len, v.data (), 1, a.data (), 1, retval); + + return retval; +} + +// other operations + +FloatComplexRowVector +linspace (const FloatComplex& x1, const FloatComplex& x2, octave_idx_type n) +{ + FloatComplexRowVector retval; + + if (n > 0) + { + retval.resize (n); + FloatComplex delta = (x2 - x1) / static_cast (n - 1.0); + retval.elem (0) = x1; + for (octave_idx_type i = 1; i < n-1; i++) + retval.elem (i) = x1 + static_cast (1.0) * i * delta; + retval.elem (n-1) = x2; + } + else + { + retval.resize (1); + retval.elem (0) = x2; + } + + return retval; +} + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 liboctave/fCRowVector.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/fCRowVector.h Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,136 @@ +/* + +Copyright (C) 1994, 1995, 1996, 1997, 2000, 2002, 2003, 2004, 2005, + 2006, 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#if !defined (octave_FloatComplexRowVector_h) +#define octave_FloatComplexRowVector_h 1 + +#include "MArray.h" + +#include "mx-defs.h" + +class +OCTAVE_API +FloatComplexRowVector : public MArray +{ +friend class FloatComplexColumnVector; + +public: + + FloatComplexRowVector (void) : MArray () { } + + explicit FloatComplexRowVector (octave_idx_type n) : MArray (n) { } + + FloatComplexRowVector (octave_idx_type n, const FloatComplex& val) : MArray (n, val) { } + + FloatComplexRowVector (const FloatComplexRowVector& a) : MArray (a) { } + + FloatComplexRowVector (const MArray& a) : MArray (a) { } + + explicit FloatComplexRowVector (const FloatRowVector& a); + + FloatComplexRowVector& operator = (const FloatComplexRowVector& a) + { + MArray::operator = (a); + return *this; + } + + bool operator == (const FloatComplexRowVector& a) const; + bool operator != (const FloatComplexRowVector& a) const; + + // destructive insert/delete/reorder operations + + FloatComplexRowVector& insert (const FloatRowVector& a, octave_idx_type c); + FloatComplexRowVector& insert (const FloatComplexRowVector& a, octave_idx_type c); + + FloatComplexRowVector& fill (float val); + FloatComplexRowVector& fill (const FloatComplex& val); + FloatComplexRowVector& fill (float val, octave_idx_type c1, octave_idx_type c2); + FloatComplexRowVector& fill (const FloatComplex& val, octave_idx_type c1, octave_idx_type c2); + + FloatComplexRowVector append (const FloatRowVector& a) const; + FloatComplexRowVector append (const FloatComplexRowVector& a) const; + + FloatComplexColumnVector hermitian (void) const; + FloatComplexColumnVector transpose (void) const; + + friend FloatComplexRowVector conj (const FloatComplexRowVector& a); + + // resize is the destructive equivalent for this one + + FloatComplexRowVector extract (octave_idx_type c1, octave_idx_type c2) const; + + FloatComplexRowVector extract_n (octave_idx_type c1, octave_idx_type n) const; + + // row vector by row vector -> row vector operations + + FloatComplexRowVector& operator += (const FloatRowVector& a); + FloatComplexRowVector& operator -= (const FloatRowVector& a); + + // row vector by matrix -> row vector + + friend FloatComplexRowVector operator * (const FloatComplexRowVector& a, + const FloatComplexMatrix& b); + + friend FloatComplexRowVector operator * (const FloatRowVector& a, + const FloatComplexMatrix& b); + + // other operations + + typedef float (*dmapper) (const FloatComplex&); + typedef FloatComplex (*cmapper) (const FloatComplex&); + + FloatRowVector map (dmapper fcn) const; + FloatComplexRowVector map (cmapper fcn) const; + + FloatComplex min (void) const; + FloatComplex max (void) const; + + // i/o + + friend std::ostream& operator << (std::ostream& os, const FloatComplexRowVector& a); + friend std::istream& operator >> (std::istream& is, FloatComplexRowVector& a); + +private: + + FloatComplexRowVector (FloatComplex *d, octave_idx_type l) : MArray (d, l) { } +}; + +// row vector by column vector -> scalar + +FloatComplex operator * (const FloatComplexRowVector& a, const ColumnVector& b); + +FloatComplex operator * (const FloatComplexRowVector& a, const FloatComplexColumnVector& b); + +// other operations + +OCTAVE_API FloatComplexRowVector linspace (const FloatComplex& x1, const FloatComplex& x2, octave_idx_type n); + +MARRAY_FORWARD_DEFS (MArray, FloatComplexRowVector, FloatComplex) + +#endif + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 liboctave/fCmplxCHOL.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/fCmplxCHOL.cc Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,287 @@ +/* + +Copyright (C) 1994, 1995, 1996, 1997, 2002, 2003, 2004, 2005, 2007 + John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +// updating/downdating by Jaroslav Hajek 2008 + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +#include "fMatrix.h" +#include "fRowVector.h" +#include "fCmplxCHOL.h" +#include "f77-fcn.h" +#include "lo-error.h" + +extern "C" +{ + F77_RET_T + F77_FUNC (cpotrf, CPOTRF) (F77_CONST_CHAR_ARG_DECL, const octave_idx_type&, + FloatComplex*, const octave_idx_type&, octave_idx_type& + F77_CHAR_ARG_LEN_DECL); + F77_RET_T + F77_FUNC (cpotri, CPOTRI) (F77_CONST_CHAR_ARG_DECL, const octave_idx_type&, + FloatComplex*, const octave_idx_type&, octave_idx_type& + F77_CHAR_ARG_LEN_DECL); + + F77_RET_T + F77_FUNC (cpocon, CPOCON) (F77_CONST_CHAR_ARG_DECL, const octave_idx_type&, + FloatComplex*, const octave_idx_type&, const float&, + float&, FloatComplex*, float*, + octave_idx_type& F77_CHAR_ARG_LEN_DECL); + F77_RET_T + F77_FUNC (cch1up, CCH1UP) (const octave_idx_type&, FloatComplex*, FloatComplex*, float*); + + F77_RET_T + F77_FUNC (cch1dn, CCH1DN) (const octave_idx_type&, FloatComplex*, FloatComplex*, float*, + octave_idx_type&); + + F77_RET_T + F77_FUNC (cqrshc, CQRSHC) (const octave_idx_type&, const octave_idx_type&, const octave_idx_type&, + FloatComplex*, FloatComplex*, const octave_idx_type&, const octave_idx_type&); + + F77_RET_T + F77_FUNC (cchinx, CCHINX) (const octave_idx_type&, const FloatComplex*, FloatComplex*, const octave_idx_type&, + const FloatComplex*, octave_idx_type&); + + F77_RET_T + F77_FUNC (cchdex, CCHDEX) (const octave_idx_type&, const FloatComplex*, FloatComplex*, const octave_idx_type&); +} + +octave_idx_type +FloatComplexCHOL::init (const FloatComplexMatrix& a, bool calc_cond) +{ + octave_idx_type a_nr = a.rows (); + octave_idx_type a_nc = a.cols (); + + if (a_nr != a_nc) + { + (*current_liboctave_error_handler) + ("FloatComplexCHOL requires square matrix"); + return -1; + } + + octave_idx_type n = a_nc; + octave_idx_type info; + + chol_mat = a; + FloatComplex *h = chol_mat.fortran_vec (); + + // Calculate the norm of the matrix, for later use. + float anorm = 0; + if (calc_cond) + anorm = chol_mat.abs().sum().row(static_cast(0)).max(); + + F77_XFCN (cpotrf, CPOTRF, (F77_CONST_CHAR_ARG2 ("U", 1), n, h, n, info + F77_CHAR_ARG_LEN (1))); + + xrcond = 0.0; + if (info != 0) + info = -1; + else if (calc_cond) + { + octave_idx_type cpocon_info = 0; + + // Now calculate the condition number for non-singular matrix. + Array z (2*n); + FloatComplex *pz = z.fortran_vec (); + Array rz (n); + float *prz = rz.fortran_vec (); + F77_XFCN (cpocon, CPOCON, (F77_CONST_CHAR_ARG2 ("U", 1), n, h, + n, anorm, xrcond, pz, prz, cpocon_info + F77_CHAR_ARG_LEN (1))); + + if (cpocon_info != 0) + info = -1; + } + else + { + // If someone thinks of a more graceful way of doing this (or + // faster for that matter :-)), please let me know! + + if (n > 1) + for (octave_idx_type j = 0; j < a_nc; j++) + for (octave_idx_type i = j+1; i < a_nr; i++) + chol_mat.xelem (i, j) = 0.0; + } + + return info; +} + +static FloatComplexMatrix +chol2inv_internal (const FloatComplexMatrix& r) +{ + FloatComplexMatrix retval; + + octave_idx_type r_nr = r.rows (); + octave_idx_type r_nc = r.cols (); + + if (r_nr == r_nc) + { + octave_idx_type n = r_nc; + octave_idx_type info; + + FloatComplexMatrix tmp = r; + + F77_XFCN (cpotri, CPOTRI, (F77_CONST_CHAR_ARG2 ("U", 1), n, + tmp.fortran_vec (), n, info + F77_CHAR_ARG_LEN (1))); + + // If someone thinks of a more graceful way of doing this (or + // faster for that matter :-)), please let me know! + + if (n > 1) + for (octave_idx_type j = 0; j < r_nc; j++) + for (octave_idx_type i = j+1; i < r_nr; i++) + tmp.xelem (i, j) = std::conj (tmp.xelem (j, i)); + + retval = tmp; + } + else + (*current_liboctave_error_handler) ("chol2inv requires square matrix"); + + return retval; +} + +// Compute the inverse of a matrix using the Cholesky factorization. +FloatComplexMatrix +FloatComplexCHOL::inverse (void) const +{ + return chol2inv_internal (chol_mat); +} + +void +FloatComplexCHOL::set (const FloatComplexMatrix& R) +{ + if (R.is_square ()) + chol_mat = R; + else + (*current_liboctave_error_handler) ("CHOL requires square matrix"); +} + +void +FloatComplexCHOL::update (const FloatComplexMatrix& u) +{ + octave_idx_type n = chol_mat.rows (); + + if (u.length () == n) + { + FloatComplexMatrix tmp = u; + + OCTAVE_LOCAL_BUFFER (float, w, n); + + F77_XFCN (cch1up, CCH1UP, (n, chol_mat.fortran_vec (), + tmp.fortran_vec (), w)); + } + else + (*current_liboctave_error_handler) ("CHOL update dimension mismatch"); +} + +octave_idx_type +FloatComplexCHOL::downdate (const FloatComplexMatrix& u) +{ + octave_idx_type info = -1; + + octave_idx_type n = chol_mat.rows (); + + if (u.length () == n) + { + FloatComplexMatrix tmp = u; + + OCTAVE_LOCAL_BUFFER (float, w, n); + + F77_XFCN (cch1dn, CCH1DN, (n, chol_mat.fortran_vec (), + tmp.fortran_vec (), w, info)); + } + else + (*current_liboctave_error_handler) ("CHOL downdate dimension mismatch"); + + return info; +} + +octave_idx_type +FloatComplexCHOL::insert_sym (const FloatComplexMatrix& u, octave_idx_type j) +{ + octave_idx_type info = -1; + + octave_idx_type n = chol_mat.rows (); + + if (u.length () != n+1) + (*current_liboctave_error_handler) ("CHOL insert dimension mismatch"); + else if (j < 0 || j > n) + (*current_liboctave_error_handler) ("CHOL insert index out of range"); + else + { + FloatComplexMatrix chol_mat1 (n+1, n+1); + + F77_XFCN (cchinx, CCHINX, (n, chol_mat.data (), chol_mat1.fortran_vec (), + j+1, u.data (), info)); + + chol_mat = chol_mat1; + } + + return info; +} + +void +FloatComplexCHOL::delete_sym (octave_idx_type j) +{ + octave_idx_type n = chol_mat.rows (); + + if (j < 0 || j > n-1) + (*current_liboctave_error_handler) ("CHOL delete index out of range"); + else + { + FloatComplexMatrix chol_mat1 (n-1, n-1); + + F77_XFCN (cchdex, CCHDEX, (n, chol_mat.data (), chol_mat1.fortran_vec (), j+1)); + + chol_mat = chol_mat1; + } +} + +void +FloatComplexCHOL::shift_sym (octave_idx_type i, octave_idx_type j) +{ + octave_idx_type n = chol_mat.rows (); + FloatComplex dummy; + + if (i < 0 || i > n-1 || j < 0 || j > n-1) + (*current_liboctave_error_handler) ("CHOL shift index out of range"); + else + F77_XFCN (cqrshc, CQRSHC, (0, n, n, &dummy, chol_mat.fortran_vec (), i+1, j+1)); +} + +FloatComplexMatrix +chol2inv (const FloatComplexMatrix& r) +{ + return chol2inv_internal (r); +} + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 liboctave/fCmplxCHOL.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/fCmplxCHOL.h Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,99 @@ +/* + +Copyright (C) 1994, 1995, 1996, 1997, 2000, 2002, 2004, 2005, 2006, + 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +// updating/downdating by Jaroslav Hajek 2008 + +#if !defined (octave_FloatComplexCHOL_h) +#define octave_FloatComplexCHOL_h 1 + +#include + +#include "fCMatrix.h" + +class +OCTAVE_API +FloatComplexCHOL +{ +public: + + FloatComplexCHOL (void) : chol_mat () { } + + FloatComplexCHOL (const FloatComplexMatrix& a, bool calc_cond = false) { init (a, calc_cond); } + + FloatComplexCHOL (const FloatComplexMatrix& a, octave_idx_type& info, bool calc_cond = false) + { + info = init (a, calc_cond); + } + + FloatComplexCHOL (const FloatComplexCHOL& a) + : chol_mat (a.chol_mat), xrcond (a.xrcond) { } + + FloatComplexCHOL& operator = (const FloatComplexCHOL& a) + { + if (this != &a) + { + chol_mat = a.chol_mat; + xrcond = a.xrcond; + } + + return *this; + } + + FloatComplexMatrix chol_matrix (void) const { return chol_mat; } + + float rcond (void) const { return xrcond; } + + FloatComplexMatrix inverse (void) const; + + void set (const FloatComplexMatrix& R); + + void update (const FloatComplexMatrix& u); + + octave_idx_type downdate (const FloatComplexMatrix& u); + + octave_idx_type insert_sym (const FloatComplexMatrix& u, octave_idx_type j); + + void delete_sym (octave_idx_type j); + + void shift_sym (octave_idx_type i, octave_idx_type j); + + friend OCTAVE_API std::ostream& operator << (std::ostream& os, const FloatComplexCHOL& a); + +private: + + FloatComplexMatrix chol_mat; + + float xrcond; + + octave_idx_type init (const FloatComplexMatrix& a, bool calc_cond); +}; + +FloatComplexMatrix OCTAVE_API chol2inv (const FloatComplexMatrix& r); + +#endif + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 liboctave/fCmplxDET.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/fCmplxDET.cc Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,86 @@ +/* + +Copyright (C) 1994, 1995, 1996, 1997, 2002, 2003, 2004, 2005, 2006, + 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include + +#include "fCmplxDET.h" +#include "lo-mappers.h" +#include "lo-math.h" +#include "oct-cmplx.h" + +bool +FloatComplexDET::value_will_overflow (void) const +{ + return base2 + ? (e2 + 1 > xlog2 (DBL_MAX) ? 1 : 0) + : (e10 + 1 > log10 (DBL_MAX) ? 1 : 0); +} + +bool +FloatComplexDET::value_will_underflow (void) const +{ + return base2 + ? (e2 - 1 < xlog2 (DBL_MIN) ? 1 : 0) + : (e10 - 1 < log10 (DBL_MIN) ? 1 : 0); +} + +void +FloatComplexDET::initialize10 (void) +{ + if (c2 != static_cast (0.0)) + { + float etmp = e2 / xlog2 (static_cast(10)); + e10 = static_cast (xround (etmp)); + etmp -= e10; + c10 = c2 * static_cast (pow (10.0, etmp)); + } +} + +void +FloatComplexDET::initialize2 (void) +{ + if (c10 != static_cast (0.0)) + { + float etmp = e10 / log10 (2.0); + e2 = static_cast (xround (etmp)); + etmp -= e2; + c2 = c10 * xexp2 (etmp); + } +} + +FloatComplex +FloatComplexDET::value (void) const +{ + return base2 ? c2 * xexp2 (static_cast(e2)) : c10 * static_cast (pow (10.0, e10)); +} + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 liboctave/fCmplxDET.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/fCmplxDET.h Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,120 @@ +/* + +Copyright (C) 1994, 1995, 1996, 1997, 2000, 2002, 2004, 2005, 2006, + 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#if !defined (octave_FloatComplexDET_h) +#define octave_FloatComplexDET_h 1 + +#include + +#include "oct-cmplx.h" + +// FIXME -- we could use templates here; compare with dbleDET.h + +class +OCTAVE_API +FloatComplexDET +{ +friend class FloatComplexMatrix; + +public: + + FloatComplexDET (void) : c2 (0), c10 (0), e2 (0), e10 (0), base2 (false) { } + + FloatComplexDET (const FloatComplexDET& a) + : c2 (a.c2), c10 (a.c10), e2 (a.e2), e10 (a.e10), base2 (a.base2) + { } + + FloatComplexDET& operator = (const FloatComplexDET& a) + { + if (this != &a) + { + c2 = a.c2; + e2 = a.e2; + + c10 = a.c10; + e10 = a.e10; + + base2 = a.base2; + } + return *this; + } + + bool value_will_overflow (void) const; + bool value_will_underflow (void) const; + + // These two functions were originally defined in base 10, so we are + // preserving that interface here. + + FloatComplex coefficient (void) const { return coefficient10 (); } + int exponent (void) const { return exponent10 (); } + + FloatComplex coefficient10 (void) const { return c10; } + int exponent10 (void) const { return e10; } + + FloatComplex coefficient2 (void) const { return c2; } + int exponent2 (void) const { return e2; } + + FloatComplex value (void) const; + + friend std::ostream& operator << (std::ostream& os, const FloatComplexDET& a); + +private: + + // Constructed this way, we assume base 2. + + FloatComplexDET (const FloatComplex& c, int e) + : c2 (c), c10 (0), e2 (e), e10 (0), base2 (true) + { + initialize10 (); + } + + // Original interface had only this constructor and it was assumed + // to be base 10, so we are preserving that interface here. + + FloatComplexDET (const FloatComplex *d) + : c2 (0), c10 (d[0]), e2 (0), e10 (static_cast (d[1].real ())), + base2 (false) + { + initialize2 (); + } + + void initialize2 (void); + void initialize10 (void); + + FloatComplex c2; + FloatComplex c10; + + int e2; + int e10; + + // TRUE means the original values were provided in base 2. + bool base2; +}; + +#endif + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 liboctave/fCmplxLU.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/fCmplxLU.cc Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,71 @@ +/* + +Copyright (C) 1994, 1995, 1996, 1997, 1999, 2002, 2003, 2004, 2005, + 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "fCmplxLU.h" +#include "f77-fcn.h" +#include "lo-error.h" + +// Instantiate the base LU class for the types we need. + +#include +#include + +template class base_lu ; + +// Define the constructor for this particular derivation. + +extern "C" +{ + F77_RET_T + F77_FUNC (cgetrf, CGETRF) (const octave_idx_type&, const octave_idx_type&, FloatComplex*, + const octave_idx_type&, octave_idx_type*, octave_idx_type&); +} + +FloatComplexLU::FloatComplexLU (const FloatComplexMatrix& a) +{ + octave_idx_type a_nr = a.rows (); + octave_idx_type a_nc = a.cols (); + octave_idx_type mn = (a_nr < a_nc ? a_nr : a_nc); + + ipvt.resize (mn); + octave_idx_type *pipvt = ipvt.fortran_vec (); + + a_fact = a; + FloatComplex *tmp_data = a_fact.fortran_vec (); + + octave_idx_type info = 0; + + F77_XFCN (cgetrf, CGETRF, (a_nr, a_nc, tmp_data, a_nr, pipvt, info)); + + ipvt -= static_cast (1); +} + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 liboctave/fCmplxLU.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/fCmplxLU.h Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,62 @@ +/* + +Copyright (C) 1994, 1995, 1996, 1997, 2002, 2004, 2005, 2006, 2007 + John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#if !defined (octave_FloatComplexLU_h) +#define octave_FloatComplex_LU_h 1 + +#include "base-lu.h" +#include "fMatrix.h" +#include "fCMatrix.h" + +class +OCTAVE_API +FloatComplexLU : public base_lu +{ +public: + + FloatComplexLU (void) + : base_lu () { } + + FloatComplexLU (const FloatComplexMatrix& a); + + FloatComplexLU (const FloatComplexLU& a) + : base_lu (a) { } + + FloatComplexLU& operator = (const FloatComplexLU& a) + { + if (this != &a) + base_lu :: operator = (a); + + return *this; + } + + ~FloatComplexLU (void) { } +}; + +#endif + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 liboctave/fCmplxSCHUR.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/fCmplxSCHUR.cc Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,142 @@ +/* + +Copyright (C) 1994, 1995, 1996, 1997, 1999, 2000, 2002, 2003, 2004, + 2005, 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "fCmplxSCHUR.h" +#include "f77-fcn.h" +#include "lo-error.h" + +extern "C" +{ + F77_RET_T + F77_FUNC (cgeesx, CGEESX) (F77_CONST_CHAR_ARG_DECL, + F77_CONST_CHAR_ARG_DECL, + FloatComplexSCHUR::select_function, + F77_CONST_CHAR_ARG_DECL, + const octave_idx_type&, FloatComplex*, const octave_idx_type&, octave_idx_type&, + FloatComplex*, FloatComplex*, const octave_idx_type&, float&, + float&, FloatComplex*, const octave_idx_type&, float*, octave_idx_type*, + octave_idx_type& + F77_CHAR_ARG_LEN_DECL + F77_CHAR_ARG_LEN_DECL + F77_CHAR_ARG_LEN_DECL); +} + +static octave_idx_type +select_ana (const FloatComplex& a) +{ + return a.real () < 0.0; +} + +static octave_idx_type +select_dig (const FloatComplex& a) +{ + return (abs (a) < 1.0); +} + +octave_idx_type +FloatComplexSCHUR::init (const FloatComplexMatrix& a, const std::string& ord, + bool calc_unitary) +{ + octave_idx_type a_nr = a.rows (); + octave_idx_type a_nc = a.cols (); + + if (a_nr != a_nc) + { + (*current_liboctave_error_handler) + ("FloatComplexSCHUR requires square matrix"); + return -1; + } + + // Workspace requirements may need to be fixed if any of the + // following change. + + char jobvs; + char sense = 'N'; + char sort = 'N'; + + if (calc_unitary) + jobvs = 'V'; + else + jobvs = 'N'; + + char ord_char = ord.empty () ? 'U' : ord[0]; + + if (ord_char == 'A' || ord_char == 'D' || ord_char == 'a' || ord_char == 'd') + sort = 'S'; + + if (ord_char == 'A' || ord_char == 'a') + selector = select_ana; + else if (ord_char == 'D' || ord_char == 'd') + selector = select_dig; + else + selector = 0; + + octave_idx_type n = a_nc; + octave_idx_type lwork = 8 * n; + octave_idx_type info; + octave_idx_type sdim; + float rconde; + float rcondv; + + schur_mat = a; + if (calc_unitary) + unitary_mat.resize (n, n); + + FloatComplex *s = schur_mat.fortran_vec (); + FloatComplex *q = unitary_mat.fortran_vec (); + + Array rwork (n); + float *prwork = rwork.fortran_vec (); + + Array w (n); + FloatComplex *pw = w.fortran_vec (); + + Array work (lwork); + FloatComplex *pwork = work.fortran_vec (); + + // BWORK is not referenced for non-ordered Schur. + Array bwork ((ord_char == 'N' || ord_char == 'n') ? 0 : n); + octave_idx_type *pbwork = bwork.fortran_vec (); + + F77_XFCN (cgeesx, CGEESX, (F77_CONST_CHAR_ARG2 (&jobvs, 1), + F77_CONST_CHAR_ARG2 (&sort, 1), + selector, + F77_CONST_CHAR_ARG2 (&sense, 1), + n, s, n, sdim, pw, q, n, rconde, rcondv, + pwork, lwork, prwork, pbwork, info + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1))); + + return info; +} + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 liboctave/fCmplxSCHUR.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/fCmplxSCHUR.h Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,88 @@ +/* + +Copyright (C) 1994, 1995, 1996, 1997, 2000, 2002, 2004, 2005, 2006, + 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#if !defined (octave_FloatComplexSCHUR_h) +#define octave_FloatComplexSCHUR_h 1 + +#include +#include + +#include "fCMatrix.h" + +class +OCTAVE_API +FloatComplexSCHUR +{ +public: + + FloatComplexSCHUR (void) + : schur_mat (), unitary_mat () { } + + FloatComplexSCHUR (const FloatComplexMatrix& a, const std::string& ord, + bool calc_unitary = true) + : schur_mat (), unitary_mat () { init (a, ord, calc_unitary); } + + FloatComplexSCHUR (const FloatComplexMatrix& a, const std::string& ord, octave_idx_type& info, + bool calc_unitary = true) + : schur_mat (), unitary_mat () { info = init (a, ord, calc_unitary); } + + FloatComplexSCHUR (const FloatComplexSCHUR& a) + : schur_mat (a.schur_mat), unitary_mat (a.unitary_mat) { } + + FloatComplexSCHUR& operator = (const FloatComplexSCHUR& a) + { + if (this != &a) + { + schur_mat = a.schur_mat; + unitary_mat = a.unitary_mat; + } + return *this; + } + + ~FloatComplexSCHUR (void) { } + + FloatComplexMatrix schur_matrix (void) const { return schur_mat; } + + FloatComplexMatrix unitary_matrix (void) const { return unitary_mat; } + + friend std::ostream& operator << (std::ostream& os, const FloatComplexSCHUR& a); + + typedef octave_idx_type (*select_function) (const FloatComplex&); + +private: + + FloatComplexMatrix schur_mat; + FloatComplexMatrix unitary_mat; + + select_function selector; + + octave_idx_type init (const FloatComplexMatrix& a, const std::string& ord, bool calc_unitary); +}; + +#endif + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 liboctave/fCmplxSVD.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/fCmplxSVD.cc Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,173 @@ +/* + +Copyright (C) 1994, 1995, 1996, 1997, 1999, 2002, 2003, 2004, 2005, + 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "fCmplxSVD.h" +#include "f77-fcn.h" +#include "lo-error.h" + +extern "C" +{ + F77_RET_T + F77_FUNC (cgesvd, CGESVD) (F77_CONST_CHAR_ARG_DECL, + F77_CONST_CHAR_ARG_DECL, + const octave_idx_type&, const octave_idx_type&, FloatComplex*, + const octave_idx_type&, float*, FloatComplex*, const octave_idx_type&, + FloatComplex*, const octave_idx_type&, FloatComplex*, const octave_idx_type&, + float*, octave_idx_type& + F77_CHAR_ARG_LEN_DECL + F77_CHAR_ARG_LEN_DECL); +} + +FloatComplexMatrix +FloatComplexSVD::left_singular_matrix (void) const +{ + if (type_computed == SVD::sigma_only) + { + (*current_liboctave_error_handler) + ("FloatComplexSVD: U not computed because type == SVD::sigma_only"); + return FloatComplexMatrix (); + } + else + return left_sm; +} + +FloatComplexMatrix +FloatComplexSVD::right_singular_matrix (void) const +{ + if (type_computed == SVD::sigma_only) + { + (*current_liboctave_error_handler) + ("FloatComplexSVD: V not computed because type == SVD::sigma_only"); + return FloatComplexMatrix (); + } + else + return right_sm; +} + +octave_idx_type +FloatComplexSVD::init (const FloatComplexMatrix& a, SVD::type svd_type) +{ + octave_idx_type info; + + octave_idx_type m = a.rows (); + octave_idx_type n = a.cols (); + + FloatComplexMatrix atmp = a; + FloatComplex *tmp_data = atmp.fortran_vec (); + + octave_idx_type min_mn = m < n ? m : n; + octave_idx_type max_mn = m > n ? m : n; + + char jobu = 'A'; + char jobv = 'A'; + + octave_idx_type ncol_u = m; + octave_idx_type nrow_vt = n; + octave_idx_type nrow_s = m; + octave_idx_type ncol_s = n; + + switch (svd_type) + { + case SVD::economy: + jobu = jobv = 'S'; + ncol_u = nrow_vt = nrow_s = ncol_s = min_mn; + break; + + case SVD::sigma_only: + + // Note: for this case, both jobu and jobv should be 'N', but + // there seems to be a bug in dgesvd from Lapack V2.0. To + // demonstrate the bug, set both jobu and jobv to 'N' and find + // the singular values of [eye(3), eye(3)]. The result is + // [-sqrt(2), -sqrt(2), -sqrt(2)]. + // + // For Lapack 3.0, this problem seems to be fixed. + + jobu = 'N'; + jobv = 'N'; + ncol_u = nrow_vt = 1; + break; + + default: + break; + } + + type_computed = svd_type; + + if (! (jobu == 'N' || jobu == 'O')) + left_sm.resize (m, ncol_u); + + FloatComplex *u = left_sm.fortran_vec (); + + sigma.resize (nrow_s, ncol_s); + float *s_vec = sigma.fortran_vec (); + + if (! (jobv == 'N' || jobv == 'O')) + right_sm.resize (nrow_vt, n); + + FloatComplex *vt = right_sm.fortran_vec (); + + octave_idx_type lrwork = 5*max_mn; + + Array rwork (lrwork); + + // Ask ZGESVD what the dimension of WORK should be. + + octave_idx_type lwork = -1; + + Array work (1); + + F77_XFCN (cgesvd, CGESVD, (F77_CONST_CHAR_ARG2 (&jobu, 1), + F77_CONST_CHAR_ARG2 (&jobv, 1), + m, n, tmp_data, m, s_vec, u, m, vt, + nrow_vt, work.fortran_vec (), lwork, + rwork.fortran_vec (), info + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1))); + + lwork = static_cast (work(0).real ()); + work.resize (lwork); + + F77_XFCN (cgesvd, CGESVD, (F77_CONST_CHAR_ARG2 (&jobu, 1), + F77_CONST_CHAR_ARG2 (&jobv, 1), + m, n, tmp_data, m, s_vec, u, m, vt, + nrow_vt, work.fortran_vec (), lwork, + rwork.fortran_vec (), info + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1))); + + if (! (jobv == 'N' || jobv == 'O')) + right_sm = right_sm.hermitian (); + + return info; +} + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 liboctave/fCmplxSVD.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/fCmplxSVD.h Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,95 @@ +/* + +Copyright (C) 1994, 1995, 1996, 1997, 2000, 2002, 2003, 2004, 2005, + 2006, 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#if !defined (octave_FloatComplexSVD_h) +#define octave_FloatComplexSVD_h 1 + +#include + +#include "fDiagMatrix.h" +#include "fCMatrix.h" +#include "dbleSVD.h" + +class +OCTAVE_API +FloatComplexSVD +{ +public: + + FloatComplexSVD (void) { } + + FloatComplexSVD (const FloatComplexMatrix& a, SVD::type svd_type = SVD::std) + { + init (a, svd_type); + } + + FloatComplexSVD (const FloatComplexMatrix& a, octave_idx_type& info, + SVD::type svd_type = SVD::std) + { + info = init (a, svd_type); + } + + FloatComplexSVD (const FloatComplexSVD& a) + : type_computed (a.type_computed), + sigma (a.sigma), left_sm (a.left_sm), right_sm (a.right_sm) { } + + FloatComplexSVD& operator = (const FloatComplexSVD& a) + { + if (this != &a) + { + type_computed = a.type_computed; + sigma = a.sigma; + left_sm = a.left_sm; + right_sm = a.right_sm; + } + return *this; + } + + ~FloatComplexSVD (void) { } + + FloatDiagMatrix singular_values (void) const { return sigma; } + + FloatComplexMatrix left_singular_matrix (void) const; + + FloatComplexMatrix right_singular_matrix (void) const; + + friend std::ostream& operator << (std::ostream& os, const FloatComplexSVD& a); + +private: + + SVD::type type_computed; + + FloatDiagMatrix sigma; + FloatComplexMatrix left_sm; + FloatComplexMatrix right_sm; + + octave_idx_type init (const FloatComplexMatrix& a, SVD::type svd_type = SVD::std); +}; + +#endif + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 liboctave/fColVector.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/fColVector.cc Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,345 @@ +// ColumnVector manipulations. +/* + +Copyright (C) 1994, 1995, 1996, 1997, 2000, 2001, 2002, 2003, 2004, + 2005, 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +#include "Array-util.h" +#include "f77-fcn.h" +#include "functor.h" +#include "lo-error.h" +#include "mx-base.h" +#include "mx-inlines.cc" +#include "oct-cmplx.h" + +// Fortran functions we call. + +extern "C" +{ + F77_RET_T + F77_FUNC (sgemv, SGEMV) (F77_CONST_CHAR_ARG_DECL, + const octave_idx_type&, const octave_idx_type&, const float&, + const float*, const octave_idx_type&, const float*, + const octave_idx_type&, const float&, float*, + const octave_idx_type& + F77_CHAR_ARG_LEN_DECL); +} + +// Column Vector class. + +bool +FloatColumnVector::operator == (const FloatColumnVector& a) const +{ + octave_idx_type len = length (); + if (len != a.length ()) + return 0; + return mx_inline_equal (data (), a.data (), len); +} + +bool +FloatColumnVector::operator != (const FloatColumnVector& a) const +{ + return !(*this == a); +} + +FloatColumnVector& +FloatColumnVector::insert (const FloatColumnVector& a, octave_idx_type r) +{ + octave_idx_type a_len = a.length (); + + if (r < 0 || r + a_len > length ()) + { + (*current_liboctave_error_handler) ("range error for insert"); + return *this; + } + + if (a_len > 0) + { + make_unique (); + + for (octave_idx_type i = 0; i < a_len; i++) + xelem (r+i) = a.elem (i); + } + + return *this; +} + +FloatColumnVector& +FloatColumnVector::fill (float val) +{ + octave_idx_type len = length (); + + if (len > 0) + { + make_unique (); + + for (octave_idx_type i = 0; i < len; i++) + xelem (i) = val; + } + + return *this; +} + +FloatColumnVector& +FloatColumnVector::fill (float val, octave_idx_type r1, octave_idx_type r2) +{ + octave_idx_type len = length (); + + if (r1 < 0 || r2 < 0 || r1 >= len || r2 >= len) + { + (*current_liboctave_error_handler) ("range error for fill"); + return *this; + } + + if (r1 > r2) { octave_idx_type tmp = r1; r1 = r2; r2 = tmp; } + + if (r2 >= r1) + { + make_unique (); + + for (octave_idx_type i = r1; i <= r2; i++) + xelem (i) = val; + } + + return *this; +} + +FloatColumnVector +FloatColumnVector::stack (const FloatColumnVector& a) const +{ + octave_idx_type len = length (); + octave_idx_type nr_insert = len; + FloatColumnVector retval (len + a.length ()); + retval.insert (*this, 0); + retval.insert (a, nr_insert); + return retval; +} + +FloatRowVector +FloatColumnVector::transpose (void) const +{ + return MArray::transpose(); +} + +FloatColumnVector +real (const FloatComplexColumnVector& a) +{ + octave_idx_type a_len = a.length (); + FloatColumnVector retval; + if (a_len > 0) + retval = FloatColumnVector (mx_inline_real_dup (a.data (), a_len), a_len); + return retval; +} + +FloatColumnVector +imag (const FloatComplexColumnVector& a) +{ + octave_idx_type a_len = a.length (); + FloatColumnVector retval; + if (a_len > 0) + retval = FloatColumnVector (mx_inline_imag_dup (a.data (), a_len), a_len); + return retval; +} + +// resize is the destructive equivalent for this one + +FloatColumnVector +FloatColumnVector::extract (octave_idx_type r1, octave_idx_type r2) const +{ + if (r1 > r2) { octave_idx_type tmp = r1; r1 = r2; r2 = tmp; } + + octave_idx_type new_r = r2 - r1 + 1; + + FloatColumnVector result (new_r); + + for (octave_idx_type i = 0; i < new_r; i++) + result.xelem (i) = elem (r1+i); + + return result; +} + +FloatColumnVector +FloatColumnVector::extract_n (octave_idx_type r1, octave_idx_type n) const +{ + FloatColumnVector result (n); + + for (octave_idx_type i = 0; i < n; i++) + result.xelem (i) = elem (r1+i); + + return result; +} + +// matrix by column vector -> column vector operations + +FloatColumnVector +operator * (const FloatMatrix& m, const FloatColumnVector& a) +{ + FloatColumnVector retval; + + octave_idx_type nr = m.rows (); + octave_idx_type nc = m.cols (); + + octave_idx_type a_len = a.length (); + + if (nc != a_len) + gripe_nonconformant ("operator *", nr, nc, a_len, 1); + else + { + if (nr == 0 || nc == 0) + retval.resize (nr, 0.0); + else + { + octave_idx_type ld = nr; + + retval.resize (nr); + float *y = retval.fortran_vec (); + + F77_XFCN (sgemv, SGEMV, (F77_CONST_CHAR_ARG2 ("N", 1), + nr, nc, 1.0, m.data (), ld, + a.data (), 1, 0.0, y, 1 + F77_CHAR_ARG_LEN (1))); + } + } + + return retval; +} + +// diagonal matrix by column vector -> column vector operations + +FloatColumnVector +operator * (const FloatDiagMatrix& m, const FloatColumnVector& a) +{ + FloatColumnVector retval; + + octave_idx_type nr = m.rows (); + octave_idx_type nc = m.cols (); + + octave_idx_type a_len = a.length (); + + if (nc != a_len) + gripe_nonconformant ("operator *", nr, nc, a_len, 1); + else + { + if (nr == 0 || nc == 0) + retval.resize (nr, 0.0); + else + { + retval.resize (nr); + + for (octave_idx_type i = 0; i < a_len; i++) + retval.elem (i) = a.elem (i) * m.elem (i, i); + + for (octave_idx_type i = a_len; i < nr; i++) + retval.elem (i) = 0.0; + } + } + + return retval; +} + +// other operations + +FloatColumnVector +FloatColumnVector::map (dmapper fcn) const +{ + return MArray::map (func_ptr (fcn)); +} + +FloatComplexColumnVector +FloatColumnVector::map (cmapper fcn) const +{ + return MArray::map (func_ptr (fcn)); +} + +float +FloatColumnVector::min (void) const +{ + octave_idx_type len = length (); + if (len == 0) + return 0.0; + + float res = elem (0); + + for (octave_idx_type i = 1; i < len; i++) + if (elem (i) < res) + res = elem (i); + + return res; +} + +float +FloatColumnVector::max (void) const +{ + octave_idx_type len = length (); + if (len == 0) + return 0.0; + + float res = elem (0); + + for (octave_idx_type i = 1; i < len; i++) + if (elem (i) > res) + res = elem (i); + + return res; +} + +std::ostream& +operator << (std::ostream& os, const FloatColumnVector& a) +{ +// int field_width = os.precision () + 7; + for (octave_idx_type i = 0; i < a.length (); i++) + os << /* setw (field_width) << */ a.elem (i) << "\n"; + return os; +} + +std::istream& +operator >> (std::istream& is, FloatColumnVector& a) +{ + octave_idx_type len = a.length(); + + if (len < 1) + is.clear (std::ios::badbit); + else + { + float tmp; + for (octave_idx_type i = 0; i < len; i++) + { + is >> tmp; + if (is) + a.elem (i) = tmp; + else + break; + } + } + return is; +} + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 liboctave/fColVector.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/fColVector.h Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,118 @@ +/* + +Copyright (C) 1994, 1995, 1996, 1997, 2000, 2002, 2003, 2004, 2005, + 2006, 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#if !defined (octave_FloatColumnVector_h) +#define octave_FloatColumnVector_h 1 + +#include "MArray.h" + +#include "mx-defs.h" + +class +OCTAVE_API +FloatColumnVector : public MArray +{ +public: + + FloatColumnVector (void) : MArray () { } + + explicit FloatColumnVector (octave_idx_type n) : MArray (n) { } + + FloatColumnVector (octave_idx_type n, float val) : MArray (n, val) { } + + FloatColumnVector (const FloatColumnVector& a) : MArray (a) { } + + FloatColumnVector (const MArray& a) : MArray (a) { } + + FloatColumnVector& operator = (const FloatColumnVector& a) + { + MArray::operator = (a); + return *this; + } + + bool operator == (const FloatColumnVector& a) const; + bool operator != (const FloatColumnVector& a) const; + + // destructive insert/delete/reorder operations + + FloatColumnVector& insert (const FloatColumnVector& a, octave_idx_type r); + + FloatColumnVector& fill (float val); + FloatColumnVector& fill (float val, octave_idx_type r1, octave_idx_type r2); + + FloatColumnVector stack (const FloatColumnVector& a) const; + + FloatRowVector transpose (void) const; + + friend OCTAVE_API FloatColumnVector real (const FloatComplexColumnVector& a); + friend OCTAVE_API FloatColumnVector imag (const FloatComplexColumnVector& a); + + // resize is the destructive equivalent for this one + + FloatColumnVector extract (octave_idx_type r1, octave_idx_type r2) const; + + FloatColumnVector extract_n (octave_idx_type r1, octave_idx_type n) const; + + // matrix by column vector -> column vector operations + + friend OCTAVE_API FloatColumnVector operator * (const FloatMatrix& a, const FloatColumnVector& b); + + // diagonal matrix by column vector -> column vector operations + + friend OCTAVE_API FloatColumnVector operator * (const FloatDiagMatrix& a, const FloatColumnVector& b); + + // other operations + + typedef float (*dmapper) (float); + typedef FloatComplex (*cmapper) (const FloatComplex&); + + FloatColumnVector map (dmapper fcn) const; + FloatComplexColumnVector map (cmapper fcn) const; + + float min (void) const; + float max (void) const; + + // i/o + + friend OCTAVE_API std::ostream& operator << (std::ostream& os, const FloatColumnVector& a); + friend OCTAVE_API std::istream& operator >> (std::istream& is, FloatColumnVector& a); + +private: + + FloatColumnVector (float *d, octave_idx_type l) : MArray (d, l) { } +}; + +// Publish externally used friend functions. + +extern OCTAVE_API FloatColumnVector real (const FloatComplexColumnVector& a); +extern OCTAVE_API FloatColumnVector imag (const FloatComplexColumnVector& a); + +MARRAY_FORWARD_DEFS (MArray, FloatColumnVector, float) + +#endif + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 liboctave/fDiagMatrix.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/fDiagMatrix.cc Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,410 @@ +// FloatDiagMatrix manipulations. +/* + +Copyright (C) 1994, 1995, 1996, 1997, 2000, 2001, 2002, 2003, 2004, + 2005, 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +#include "Array-util.h" +#include "lo-error.h" +#include "mx-base.h" +#include "mx-inlines.cc" +#include "oct-cmplx.h" + +// Diagonal Matrix class. + +bool +FloatDiagMatrix::operator == (const FloatDiagMatrix& a) const +{ + if (rows () != a.rows () || cols () != a.cols ()) + return 0; + + return mx_inline_equal (data (), a.data (), length ()); +} + +bool +FloatDiagMatrix::operator != (const FloatDiagMatrix& a) const +{ + return !(*this == a); +} + +FloatDiagMatrix& +FloatDiagMatrix::fill (float val) +{ + for (octave_idx_type i = 0; i < length (); i++) + elem (i, i) = val; + return *this; +} + +FloatDiagMatrix& +FloatDiagMatrix::fill (float val, octave_idx_type beg, octave_idx_type end) +{ + if (beg < 0 || end >= length () || end < beg) + { + (*current_liboctave_error_handler) ("range error for fill"); + return *this; + } + + for (octave_idx_type i = beg; i <= end; i++) + elem (i, i) = val; + + return *this; +} + +FloatDiagMatrix& +FloatDiagMatrix::fill (const FloatColumnVector& a) +{ + octave_idx_type len = length (); + if (a.length () != len) + { + (*current_liboctave_error_handler) ("range error for fill"); + return *this; + } + + for (octave_idx_type i = 0; i < len; i++) + elem (i, i) = a.elem (i); + + return *this; +} + +FloatDiagMatrix& +FloatDiagMatrix::fill (const FloatRowVector& a) +{ + octave_idx_type len = length (); + if (a.length () != len) + { + (*current_liboctave_error_handler) ("range error for fill"); + return *this; + } + + for (octave_idx_type i = 0; i < len; i++) + elem (i, i) = a.elem (i); + + return *this; +} + +FloatDiagMatrix& +FloatDiagMatrix::fill (const FloatColumnVector& a, octave_idx_type beg) +{ + octave_idx_type a_len = a.length (); + if (beg < 0 || beg + a_len >= length ()) + { + (*current_liboctave_error_handler) ("range error for fill"); + return *this; + } + + for (octave_idx_type i = 0; i < a_len; i++) + elem (i+beg, i+beg) = a.elem (i); + + return *this; +} + +FloatDiagMatrix& +FloatDiagMatrix::fill (const FloatRowVector& a, octave_idx_type beg) +{ + octave_idx_type a_len = a.length (); + if (beg < 0 || beg + a_len >= length ()) + { + (*current_liboctave_error_handler) ("range error for fill"); + return *this; + } + + for (octave_idx_type i = 0; i < a_len; i++) + elem (i+beg, i+beg) = a.elem (i); + + return *this; +} + +FloatDiagMatrix +real (const FloatComplexDiagMatrix& a) +{ + FloatDiagMatrix retval; + octave_idx_type a_len = a.length (); + if (a_len > 0) + retval = FloatDiagMatrix (mx_inline_real_dup (a.data (), a_len), a.rows (), + a.cols ()); + return retval; +} + +FloatDiagMatrix +imag (const FloatComplexDiagMatrix& a) +{ + FloatDiagMatrix retval; + octave_idx_type a_len = a.length (); + if (a_len > 0) + retval = FloatDiagMatrix (mx_inline_imag_dup (a.data (), a_len), a.rows (), + a.cols ()); + return retval; +} + +FloatMatrix +FloatDiagMatrix::extract (octave_idx_type r1, octave_idx_type c1, octave_idx_type r2, octave_idx_type c2) const +{ + if (r1 > r2) { octave_idx_type tmp = r1; r1 = r2; r2 = tmp; } + if (c1 > c2) { octave_idx_type tmp = c1; c1 = c2; c2 = tmp; } + + octave_idx_type new_r = r2 - r1 + 1; + octave_idx_type new_c = c2 - c1 + 1; + + FloatMatrix result (new_r, new_c); + + for (octave_idx_type j = 0; j < new_c; j++) + for (octave_idx_type i = 0; i < new_r; i++) + result.elem (i, j) = elem (r1+i, c1+j); + + return result; +} + +// extract row or column i. + +FloatRowVector +FloatDiagMatrix::row (octave_idx_type i) const +{ + octave_idx_type r = rows (); + octave_idx_type c = cols (); + if (i < 0 || i >= r) + { + (*current_liboctave_error_handler) ("invalid row selection"); + return FloatRowVector (); + } + + FloatRowVector retval (c, 0.0); + if (r <= c || (r > c && i < c)) + retval.elem (i) = elem (i, i); + + return retval; +} + +FloatRowVector +FloatDiagMatrix::row (char *s) const +{ + if (! s) + { + (*current_liboctave_error_handler) ("invalid row selection"); + return FloatRowVector (); + } + + char c = *s; + if (c == 'f' || c == 'F') + return row (static_cast(0)); + else if (c == 'l' || c == 'L') + return row (rows () - 1); + else + { + (*current_liboctave_error_handler) ("invalid row selection"); + return FloatRowVector (); + } +} + +FloatColumnVector +FloatDiagMatrix::column (octave_idx_type i) const +{ + octave_idx_type r = rows (); + octave_idx_type c = cols (); + if (i < 0 || i >= c) + { + (*current_liboctave_error_handler) ("invalid column selection"); + return FloatColumnVector (); + } + + FloatColumnVector retval (r, 0.0); + if (r >= c || (r < c && i < r)) + retval.elem (i) = elem (i, i); + + return retval; +} + +FloatColumnVector +FloatDiagMatrix::column (char *s) const +{ + if (! s) + { + (*current_liboctave_error_handler) ("invalid column selection"); + return FloatColumnVector (); + } + + char c = *s; + if (c == 'f' || c == 'F') + return column (static_cast(0)); + else if (c == 'l' || c == 'L') + return column (cols () - 1); + else + { + (*current_liboctave_error_handler) ("invalid column selection"); + return FloatColumnVector (); + } +} + +FloatDiagMatrix +FloatDiagMatrix::inverse (void) const +{ + int info; + return inverse (info); +} + +FloatDiagMatrix +FloatDiagMatrix::inverse (int &info) const +{ + octave_idx_type r = rows (); + octave_idx_type c = cols (); + octave_idx_type len = length (); + if (r != c) + { + (*current_liboctave_error_handler) ("inverse requires square matrix"); + return FloatDiagMatrix (); + } + + FloatDiagMatrix retval (r, c); + + info = 0; + for (octave_idx_type i = 0; i < len; i++) + { + if (elem (i, i) == 0.0) + { + info = -1; + return *this; + } + else + retval.elem (i, i) = 1.0 / elem (i, i); + } + + return retval; +} + +// diagonal matrix by diagonal matrix -> diagonal matrix operations + +// diagonal matrix by diagonal matrix -> diagonal matrix operations + +FloatDiagMatrix +operator * (const FloatDiagMatrix& a, const FloatDiagMatrix& b) +{ + octave_idx_type a_nr = a.rows (); + octave_idx_type a_nc = a.cols (); + + octave_idx_type b_nr = b.rows (); + octave_idx_type b_nc = b.cols (); + + if (a_nc != b_nr) + { + gripe_nonconformant ("operaotr *", a_nr, a_nc, b_nr, b_nc); + return FloatDiagMatrix (); + } + + if (a_nr == 0 || a_nc == 0 || b_nc == 0) + return FloatDiagMatrix (a_nr, a_nc, 0.0); + + FloatDiagMatrix c (a_nr, b_nc); + + octave_idx_type len = a_nr < b_nc ? a_nr : b_nc; + + for (octave_idx_type i = 0; i < len; i++) + { + float a_element = a.elem (i, i); + float b_element = b.elem (i, i); + + if (a_element == 0.0 || b_element == 0.0) + c.elem (i, i) = 0.0; + else if (a_element == 1.0) + c.elem (i, i) = b_element; + else if (b_element == 1.0) + c.elem (i, i) = a_element; + else + c.elem (i, i) = a_element * b_element; + } + + return c; +} + +// other operations + +FloatColumnVector +FloatDiagMatrix::diag (octave_idx_type k) const +{ + octave_idx_type nnr = rows (); + octave_idx_type nnc = cols (); + + if (nnr == 0 || nnc == 0) + + if (k > 0) + nnc -= k; + else if (k < 0) + nnr += k; + + FloatColumnVector d; + + if (nnr > 0 && nnc > 0) + { + octave_idx_type ndiag = (nnr < nnc) ? nnr : nnc; + + d.resize (ndiag); + + if (k > 0) + { + for (octave_idx_type i = 0; i < ndiag; i++) + d.elem (i) = elem (i, i+k); + } + else if ( k < 0) + { + for (octave_idx_type i = 0; i < ndiag; i++) + d.elem (i) = elem (i-k, i); + } + else + { + for (octave_idx_type i = 0; i < ndiag; i++) + d.elem (i) = elem (i, i); + } + } + else + (*current_liboctave_error_handler) + ("diag: requested diagonal out of range"); + + return d; +} + +std::ostream& +operator << (std::ostream& os, const FloatDiagMatrix& a) +{ +// int field_width = os.precision () + 7; + + for (octave_idx_type i = 0; i < a.rows (); i++) + { + for (octave_idx_type j = 0; j < a.cols (); j++) + { + if (i == j) + os << " " /* setw (field_width) */ << a.elem (i, i); + else + os << " " /* setw (field_width) */ << 0.0; + } + os << "\n"; + } + return os; +} + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 liboctave/fDiagMatrix.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/fDiagMatrix.h Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,119 @@ +/* + +Copyright (C) 1994, 1995, 1996, 1997, 2000, 2002, 2004, 2005, 2006, + 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#if !defined (octave_FloatDiagMatrix_h) +#define octave_FloatDiagMatrix_h 1 + +#include "MDiagArray2.h" + +#include "fRowVector.h" +#include "fColVector.h" + +#include "mx-defs.h" + +class +OCTAVE_API +FloatDiagMatrix : public MDiagArray2 +{ +friend class FloatSVD; +friend class FloatComplexSVD; + +public: + + FloatDiagMatrix (void) : MDiagArray2 () { } + + FloatDiagMatrix (octave_idx_type r, octave_idx_type c) : MDiagArray2 (r, c) { } + + FloatDiagMatrix (octave_idx_type r, octave_idx_type c, float val) : MDiagArray2 (r, c, val) { } + + FloatDiagMatrix (const FloatDiagMatrix& a) : MDiagArray2 (a) { } + + FloatDiagMatrix (const MDiagArray2& a) : MDiagArray2 (a) { } + + explicit FloatDiagMatrix (const FloatRowVector& a) : MDiagArray2 (a) { } + + explicit FloatDiagMatrix (const FloatColumnVector& a) : MDiagArray2 (a) { } + + FloatDiagMatrix& operator = (const FloatDiagMatrix& a) + { + MDiagArray2::operator = (a); + return *this; + } + + bool operator == (const FloatDiagMatrix& a) const; + bool operator != (const FloatDiagMatrix& a) const; + + FloatDiagMatrix& fill (float val); + FloatDiagMatrix& fill (float val, octave_idx_type beg, octave_idx_type end); + FloatDiagMatrix& fill (const FloatColumnVector& a); + FloatDiagMatrix& fill (const FloatRowVector& a); + FloatDiagMatrix& fill (const FloatColumnVector& a, octave_idx_type beg); + FloatDiagMatrix& fill (const FloatRowVector& a, octave_idx_type beg); + + FloatDiagMatrix transpose (void) const { return MDiagArray2::transpose(); } + + friend OCTAVE_API FloatDiagMatrix real (const FloatComplexDiagMatrix& a); + friend OCTAVE_API FloatDiagMatrix imag (const FloatComplexDiagMatrix& a); + + // resize is the destructive analog for this one + + FloatMatrix extract (octave_idx_type r1, octave_idx_type c1, octave_idx_type r2, octave_idx_type c2) const; + + // extract row or column i. + + FloatRowVector row (octave_idx_type i) const; + FloatRowVector row (char *s) const; + + FloatColumnVector column (octave_idx_type i) const; + FloatColumnVector column (char *s) const; + + FloatDiagMatrix inverse (void) const; + FloatDiagMatrix inverse (int& info) const; + + // other operations + + FloatColumnVector diag (octave_idx_type k = 0) const; + + // i/o + + friend OCTAVE_API std::ostream& operator << (std::ostream& os, const FloatDiagMatrix& a); + +private: + + FloatDiagMatrix (float *d, octave_idx_type nr, octave_idx_type nc) : MDiagArray2 (d, nr, nc) { } +}; + +// diagonal matrix by diagonal matrix -> diagonal matrix operations + +FloatDiagMatrix +operator * (const FloatDiagMatrix& a, const FloatDiagMatrix& b); + +MDIAGARRAY2_FORWARD_DEFS (MDiagArray2, FloatDiagMatrix, float) + +#endif + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 liboctave/fEIG.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/fEIG.cc Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,398 @@ +/* + +Copyright (C) 1994, 1995, 1996, 1997, 1998, 2000, 2002, 2003, 2004, + 2005, 2006, 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "fEIG.h" +#include "fColVector.h" +#include "f77-fcn.h" +#include "lo-error.h" + +extern "C" +{ + F77_RET_T + F77_FUNC (sgeev, SGEEV) (F77_CONST_CHAR_ARG_DECL, + F77_CONST_CHAR_ARG_DECL, + const octave_idx_type&, float*, const octave_idx_type&, float*, + float*, float*, const octave_idx_type&, float*, + const octave_idx_type&, float*, const octave_idx_type&, octave_idx_type& + F77_CHAR_ARG_LEN_DECL + F77_CHAR_ARG_LEN_DECL); + + F77_RET_T + F77_FUNC (cgeev, CGEEV) (F77_CONST_CHAR_ARG_DECL, + F77_CONST_CHAR_ARG_DECL, + const octave_idx_type&, FloatComplex*, const octave_idx_type&, FloatComplex*, + FloatComplex*, const octave_idx_type&, FloatComplex*, const octave_idx_type&, + FloatComplex*, const octave_idx_type&, float*, octave_idx_type& + F77_CHAR_ARG_LEN_DECL + F77_CHAR_ARG_LEN_DECL); + + F77_RET_T + F77_FUNC (ssyev, SSYEV) (F77_CONST_CHAR_ARG_DECL, + F77_CONST_CHAR_ARG_DECL, + const octave_idx_type&, float*, const octave_idx_type&, float*, + float*, const octave_idx_type&, octave_idx_type& + F77_CHAR_ARG_LEN_DECL + F77_CHAR_ARG_LEN_DECL); + + F77_RET_T + F77_FUNC (cheev, CHEEV) (F77_CONST_CHAR_ARG_DECL, + F77_CONST_CHAR_ARG_DECL, + const octave_idx_type&, FloatComplex*, const octave_idx_type&, float*, + FloatComplex*, const octave_idx_type&, float*, octave_idx_type& + F77_CHAR_ARG_LEN_DECL + F77_CHAR_ARG_LEN_DECL); +} + +octave_idx_type +FloatEIG::init (const FloatMatrix& a, bool calc_ev) +{ + if (a.any_element_is_inf_or_nan ()) + { + (*current_liboctave_error_handler) + ("EIG: matrix contains Inf or NaN values"); + return -1; + } + + if (a.is_symmetric ()) + return symmetric_init (a, calc_ev); + + octave_idx_type n = a.rows (); + + if (n != a.cols ()) + { + (*current_liboctave_error_handler) ("EIG requires square matrix"); + return -1; + } + + octave_idx_type info = 0; + + FloatMatrix atmp = a; + float *tmp_data = atmp.fortran_vec (); + + Array wr (n); + float *pwr = wr.fortran_vec (); + + Array wi (n); + float *pwi = wi.fortran_vec (); + + volatile octave_idx_type nvr = calc_ev ? n : 0; + FloatMatrix vr (nvr, nvr); + float *pvr = vr.fortran_vec (); + + octave_idx_type lwork = -1; + float dummy_work; + + float *dummy = 0; + octave_idx_type idummy = 1; + + F77_XFCN (sgeev, SGEEV, (F77_CONST_CHAR_ARG2 ("N", 1), + F77_CONST_CHAR_ARG2 (calc_ev ? "V" : "N", 1), + n, tmp_data, n, pwr, pwi, dummy, + idummy, pvr, n, &dummy_work, lwork, info + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1))); + + if (info == 0) + { + lwork = static_cast (dummy_work); + Array work (lwork); + float *pwork = work.fortran_vec (); + + F77_XFCN (sgeev, SGEEV, (F77_CONST_CHAR_ARG2 ("N", 1), + F77_CONST_CHAR_ARG2 (calc_ev ? "V" : "N", 1), + n, tmp_data, n, pwr, pwi, dummy, + idummy, pvr, n, pwork, lwork, info + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1))); + + if (info < 0) + { + (*current_liboctave_error_handler) ("unrecoverable error in sgeev"); + return info; + } + + if (info > 0) + { + (*current_liboctave_error_handler) ("sgeev failed to converge"); + return info; + } + + lambda.resize (n); + v.resize (nvr, nvr); + + for (octave_idx_type j = 0; j < n; j++) + { + if (wi.elem (j) == 0.0) + { + lambda.elem (j) = FloatComplex (wr.elem (j)); + for (octave_idx_type i = 0; i < nvr; i++) + v.elem (i, j) = vr.elem (i, j); + } + else + { + if (j+1 >= n) + { + (*current_liboctave_error_handler) ("EIG: internal error"); + return -1; + } + + lambda.elem(j) = FloatComplex (wr.elem(j), wi.elem(j)); + lambda.elem(j+1) = FloatComplex (wr.elem(j+1), wi.elem(j+1)); + + for (octave_idx_type i = 0; i < nvr; i++) + { + float real_part = vr.elem (i, j); + float imag_part = vr.elem (i, j+1); + v.elem (i, j) = FloatComplex (real_part, imag_part); + v.elem (i, j+1) = FloatComplex (real_part, -imag_part); + } + j++; + } + } + } + else + (*current_liboctave_error_handler) ("sgeev workspace query failed"); + + return info; +} + +octave_idx_type +FloatEIG::symmetric_init (const FloatMatrix& a, bool calc_ev) +{ + octave_idx_type n = a.rows (); + + if (n != a.cols ()) + { + (*current_liboctave_error_handler) ("EIG requires square matrix"); + return -1; + } + + octave_idx_type info = 0; + + FloatMatrix atmp = a; + float *tmp_data = atmp.fortran_vec (); + + FloatColumnVector wr (n); + float *pwr = wr.fortran_vec (); + + octave_idx_type lwork = -1; + float dummy_work; + + F77_XFCN (ssyev, SSYEV, (F77_CONST_CHAR_ARG2 (calc_ev ? "V" : "N", 1), + F77_CONST_CHAR_ARG2 ("U", 1), + n, tmp_data, n, pwr, &dummy_work, lwork, info + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1))); + + if (info == 0) + { + lwork = static_cast (dummy_work); + Array work (lwork); + float *pwork = work.fortran_vec (); + + F77_XFCN (ssyev, SSYEV, (F77_CONST_CHAR_ARG2 (calc_ev ? "V" : "N", 1), + F77_CONST_CHAR_ARG2 ("U", 1), + n, tmp_data, n, pwr, pwork, lwork, info + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1))); + + if (info < 0) + { + (*current_liboctave_error_handler) ("unrecoverable error in ssyev"); + return info; + } + + if (info > 0) + { + (*current_liboctave_error_handler) ("ssyev failed to converge"); + return info; + } + + lambda = FloatComplexColumnVector (wr); + v = calc_ev ? FloatComplexMatrix (atmp) : FloatComplexMatrix (); + } + else + (*current_liboctave_error_handler) ("ssyev workspace query failed"); + + return info; +} + +octave_idx_type +FloatEIG::init (const FloatComplexMatrix& a, bool calc_ev) +{ + if (a.any_element_is_inf_or_nan ()) + { + (*current_liboctave_error_handler) + ("EIG: matrix contains Inf or NaN values"); + return -1; + } + + if (a.is_hermitian ()) + return hermitian_init (a, calc_ev); + + octave_idx_type n = a.rows (); + + if (n != a.cols ()) + { + (*current_liboctave_error_handler) ("EIG requires square matrix"); + return -1; + } + + octave_idx_type info = 0; + + FloatComplexMatrix atmp = a; + FloatComplex *tmp_data = atmp.fortran_vec (); + + FloatComplexColumnVector w (n); + FloatComplex *pw = w.fortran_vec (); + + octave_idx_type nvr = calc_ev ? n : 0; + FloatComplexMatrix vtmp (nvr, nvr); + FloatComplex *pv = vtmp.fortran_vec (); + + octave_idx_type lwork = -1; + FloatComplex dummy_work; + + octave_idx_type lrwork = 2*n; + Array rwork (lrwork); + float *prwork = rwork.fortran_vec (); + + FloatComplex *dummy = 0; + octave_idx_type idummy = 1; + + F77_XFCN (cgeev, CGEEV, (F77_CONST_CHAR_ARG2 ("N", 1), + F77_CONST_CHAR_ARG2 (calc_ev ? "V" : "N", 1), + n, tmp_data, n, pw, dummy, idummy, + pv, n, &dummy_work, lwork, prwork, info + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1))); + + if (info == 0) + { + lwork = static_cast (dummy_work.real ()); + Array work (lwork); + FloatComplex *pwork = work.fortran_vec (); + + F77_XFCN (cgeev, CGEEV, (F77_CONST_CHAR_ARG2 ("N", 1), + F77_CONST_CHAR_ARG2 (calc_ev ? "V" : "N", 1), + n, tmp_data, n, pw, dummy, idummy, + pv, n, pwork, lwork, prwork, info + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1))); + + if (info < 0) + { + (*current_liboctave_error_handler) ("unrecoverable error in cgeev"); + return info; + } + + if (info > 0) + { + (*current_liboctave_error_handler) ("cgeev failed to converge"); + return info; + } + + lambda = w; + v = vtmp; + } + else + (*current_liboctave_error_handler) ("cgeev workspace query failed"); + + return info; +} + +octave_idx_type +FloatEIG::hermitian_init (const FloatComplexMatrix& a, bool calc_ev) +{ + octave_idx_type n = a.rows (); + + if (n != a.cols ()) + { + (*current_liboctave_error_handler) ("EIG requires square matrix"); + return -1; + } + + octave_idx_type info = 0; + + FloatComplexMatrix atmp = a; + FloatComplex *tmp_data = atmp.fortran_vec (); + + FloatColumnVector wr (n); + float *pwr = wr.fortran_vec (); + + octave_idx_type lwork = -1; + FloatComplex dummy_work; + + octave_idx_type lrwork = 3*n; + Array rwork (lrwork); + float *prwork = rwork.fortran_vec (); + + F77_XFCN (cheev, CHEEV, (F77_CONST_CHAR_ARG2 (calc_ev ? "V" : "N", 1), + F77_CONST_CHAR_ARG2 ("U", 1), + n, tmp_data, n, pwr, &dummy_work, lwork, + prwork, info + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1))); + + if (info == 0) + { + lwork = static_cast (dummy_work.real ()); + Array work (lwork); + FloatComplex *pwork = work.fortran_vec (); + + F77_XFCN (cheev, CHEEV, (F77_CONST_CHAR_ARG2 (calc_ev ? "V" : "N", 1), + F77_CONST_CHAR_ARG2 ("U", 1), + n, tmp_data, n, pwr, pwork, lwork, prwork, info + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1))); + + if (info < 0) + { + (*current_liboctave_error_handler) ("unrecoverable error in cheev"); + return info; + } + + if (info > 0) + { + (*current_liboctave_error_handler) ("cheev failed to converge"); + return info; + } + + lambda = FloatComplexColumnVector (wr); + v = calc_ev ? FloatComplexMatrix (atmp) : FloatComplexMatrix (); + } + else + (*current_liboctave_error_handler) ("cheev workspace query failed"); + + return info; +} + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 liboctave/fEIG.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/fEIG.h Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,96 @@ +/* + +Copyright (C) 1994, 1995, 1996, 1997, 2000, 2002, 2004, 2005, 2006, + 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#if !defined (octave_float_EIG_h) +#define octave_float_EIG_h 1 + +#include + +#include "fMatrix.h" +#include "fCMatrix.h" +#include "fCColVector.h" + +class +OCTAVE_API +FloatEIG +{ +friend class FloatMatrix; +friend class FloatComplexMatrix; + +public: + + FloatEIG (void) + : lambda (), v () { } + + FloatEIG (const FloatMatrix& a, bool calc_eigenvectors = true) + { init (a, calc_eigenvectors); } + + FloatEIG (const FloatMatrix& a, octave_idx_type& info, bool calc_eigenvectors = true) + { info = init (a, calc_eigenvectors); } + + FloatEIG (const FloatComplexMatrix& a, bool calc_eigenvectors = true) + { init (a, calc_eigenvectors); } + + FloatEIG (const FloatComplexMatrix& a, octave_idx_type& info, bool calc_eigenvectors = true) + { info = init (a, calc_eigenvectors); } + + FloatEIG (const FloatEIG& a) + : lambda (a.lambda), v (a.v) { } + + FloatEIG& operator = (const FloatEIG& a) + { + if (this != &a) + { + lambda = a.lambda; + v = a.v; + } + return *this; + } + + ~FloatEIG (void) { } + + FloatComplexColumnVector eigenvalues (void) const { return lambda; } + + FloatComplexMatrix eigenvectors (void) const { return v; } + + friend std::ostream& operator << (std::ostream& os, const FloatEIG& a); + +private: + + FloatComplexColumnVector lambda; + FloatComplexMatrix v; + + octave_idx_type init (const FloatMatrix& a, bool calc_eigenvectors); + octave_idx_type init (const FloatComplexMatrix& a, bool calc_eigenvectors); + + octave_idx_type symmetric_init (const FloatMatrix& a, bool calc_eigenvectors); + octave_idx_type hermitian_init (const FloatComplexMatrix& a, bool calc_eigenvectors); +}; + +#endif + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 liboctave/fMatrix.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/fMatrix.cc Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,3406 @@ +// Matrix manipulations. +/* + +Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, + 2003, 2004, 2005, 2006, 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +#include +#include + +#include "Array-util.h" +#include "byte-swap.h" +#include "fMatrix.h" +#include "floatDET.h" +#include "floatSCHUR.h" +#include "floatSVD.h" +#include "floatCHOL.h" +#include "f77-fcn.h" +#include "functor.h" +#include "lo-error.h" +#include "lo-ieee.h" +#include "lo-mappers.h" +#include "lo-utils.h" +#include "mx-base.h" +#include "mx-fm-fdm.h" +#include "mx-fdm-fm.h" +#include "mx-inlines.cc" +#include "oct-cmplx.h" +#include "quit.h" + +#if defined (HAVE_FFTW3) +#include "oct-fftw.h" +#endif + +// Fortran functions we call. + +extern "C" +{ + F77_RET_T + F77_FUNC (xilaenv, XILAENV) (const octave_idx_type&, F77_CONST_CHAR_ARG_DECL, + F77_CONST_CHAR_ARG_DECL, + const octave_idx_type&, const octave_idx_type&, + const octave_idx_type&, const octave_idx_type&, + octave_idx_type& + F77_CHAR_ARG_LEN_DECL F77_CHAR_ARG_LEN_DECL); + + F77_RET_T + F77_FUNC (sgebal, SGEBAL) (F77_CONST_CHAR_ARG_DECL, + const octave_idx_type&, float*, const octave_idx_type&, octave_idx_type&, + octave_idx_type&, float*, octave_idx_type& + F77_CHAR_ARG_LEN_DECL); + + F77_RET_T + F77_FUNC (sgebak, SGEBAK) (F77_CONST_CHAR_ARG_DECL, + F77_CONST_CHAR_ARG_DECL, + const octave_idx_type&, const octave_idx_type&, const octave_idx_type&, float*, + const octave_idx_type&, float*, const octave_idx_type&, octave_idx_type& + F77_CHAR_ARG_LEN_DECL + F77_CHAR_ARG_LEN_DECL); + + + F77_RET_T + F77_FUNC (sgemm, SGEMM) (F77_CONST_CHAR_ARG_DECL, + F77_CONST_CHAR_ARG_DECL, + const octave_idx_type&, const octave_idx_type&, const octave_idx_type&, + const float&, const float*, const octave_idx_type&, + const float*, const octave_idx_type&, const float&, + float*, const octave_idx_type& + F77_CHAR_ARG_LEN_DECL + F77_CHAR_ARG_LEN_DECL); + + F77_RET_T + F77_FUNC (sgemv, SGEMV) (F77_CONST_CHAR_ARG_DECL, + const octave_idx_type&, const octave_idx_type&, const float&, + const float*, const octave_idx_type&, const float*, + const octave_idx_type&, const float&, float*, + const octave_idx_type& + F77_CHAR_ARG_LEN_DECL); + + F77_RET_T + F77_FUNC (xsdot, XSDOT) (const octave_idx_type&, const float*, const octave_idx_type&, + const float*, const octave_idx_type&, float&); + + F77_RET_T + F77_FUNC (sgetrf, SGETRF) (const octave_idx_type&, const octave_idx_type&, float*, const octave_idx_type&, + octave_idx_type*, octave_idx_type&); + + F77_RET_T + F77_FUNC (sgetrs, SGETRS) (F77_CONST_CHAR_ARG_DECL, const octave_idx_type&, const octave_idx_type&, + const float*, const octave_idx_type&, + const octave_idx_type*, float*, const octave_idx_type&, octave_idx_type& + F77_CHAR_ARG_LEN_DECL); + + F77_RET_T + F77_FUNC (sgetri, SGETRI) (const octave_idx_type&, float*, const octave_idx_type&, const octave_idx_type*, + float*, const octave_idx_type&, octave_idx_type&); + + F77_RET_T + F77_FUNC (sgecon, SGECON) (F77_CONST_CHAR_ARG_DECL, const octave_idx_type&, float*, + const octave_idx_type&, const float&, float&, + float*, octave_idx_type*, octave_idx_type& + F77_CHAR_ARG_LEN_DECL); + + F77_RET_T + F77_FUNC (sgelsy, SGELSY) (const octave_idx_type&, const octave_idx_type&, const octave_idx_type&, + float*, const octave_idx_type&, float*, + const octave_idx_type&, octave_idx_type*, float&, octave_idx_type&, + float*, const octave_idx_type&, octave_idx_type&); + + F77_RET_T + F77_FUNC (sgelsd, SGELSD) (const octave_idx_type&, const octave_idx_type&, const octave_idx_type&, + float*, const octave_idx_type&, float*, + const octave_idx_type&, float*, float&, octave_idx_type&, + float*, const octave_idx_type&, octave_idx_type*, + octave_idx_type&); + + F77_RET_T + F77_FUNC (spotrf, SPOTRF) (F77_CONST_CHAR_ARG_DECL, const octave_idx_type&, + float *, const octave_idx_type&, + octave_idx_type& F77_CHAR_ARG_LEN_DECL); + + F77_RET_T + F77_FUNC (spocon, SPOCON) (F77_CONST_CHAR_ARG_DECL, const octave_idx_type&, + float*, const octave_idx_type&, const float&, + float&, float*, octave_idx_type*, + octave_idx_type& F77_CHAR_ARG_LEN_DECL); + F77_RET_T + F77_FUNC (spotrs, SPOTRS) (F77_CONST_CHAR_ARG_DECL, const octave_idx_type&, + const octave_idx_type&, const float*, + const octave_idx_type&, float*, + const octave_idx_type&, octave_idx_type& + F77_CHAR_ARG_LEN_DECL); + + F77_RET_T + F77_FUNC (strtri, STRTRI) (F77_CONST_CHAR_ARG_DECL, F77_CONST_CHAR_ARG_DECL, + const octave_idx_type&, const float*, + const octave_idx_type&, octave_idx_type& + F77_CHAR_ARG_LEN_DECL + F77_CHAR_ARG_LEN_DECL); + F77_RET_T + F77_FUNC (strcon, STRCON) (F77_CONST_CHAR_ARG_DECL, F77_CONST_CHAR_ARG_DECL, + F77_CONST_CHAR_ARG_DECL, const octave_idx_type&, + const float*, const octave_idx_type&, float&, + float*, octave_idx_type*, octave_idx_type& + F77_CHAR_ARG_LEN_DECL + F77_CHAR_ARG_LEN_DECL + F77_CHAR_ARG_LEN_DECL); + F77_RET_T + F77_FUNC (strtrs, STRTRS) (F77_CONST_CHAR_ARG_DECL, F77_CONST_CHAR_ARG_DECL, + F77_CONST_CHAR_ARG_DECL, const octave_idx_type&, + const octave_idx_type&, const float*, + const octave_idx_type&, float*, + const octave_idx_type&, octave_idx_type& + F77_CHAR_ARG_LEN_DECL + F77_CHAR_ARG_LEN_DECL + F77_CHAR_ARG_LEN_DECL); + + // Note that the original complex fft routines were not written for + // float complex arguments. They have been modified by adding an + // implicit float precision (a-h,o-z) statement at the beginning of + // each subroutine. + + F77_RET_T + F77_FUNC (cffti, CFFTI) (const octave_idx_type&, FloatComplex*); + + F77_RET_T + F77_FUNC (cfftf, CFFTF) (const octave_idx_type&, FloatComplex*, FloatComplex*); + + F77_RET_T + F77_FUNC (cfftb, CFFTB) (const octave_idx_type&, FloatComplex*, FloatComplex*); + + F77_RET_T + F77_FUNC (slartg, SLARTG) (const float&, const float&, float&, + float&, float&); + + F77_RET_T + F77_FUNC (strsyl, STRSYL) (F77_CONST_CHAR_ARG_DECL, + F77_CONST_CHAR_ARG_DECL, + const octave_idx_type&, const octave_idx_type&, const octave_idx_type&, + const float*, const octave_idx_type&, const float*, + const octave_idx_type&, const float*, const octave_idx_type&, + float&, octave_idx_type& + F77_CHAR_ARG_LEN_DECL + F77_CHAR_ARG_LEN_DECL); + + F77_RET_T + F77_FUNC (xslange, XSLANGE) (F77_CONST_CHAR_ARG_DECL, const octave_idx_type&, + const octave_idx_type&, const float*, + const octave_idx_type&, float*, float& + F77_CHAR_ARG_LEN_DECL); +} + +// Matrix class. + +FloatMatrix::FloatMatrix (const FloatRowVector& rv) + : MArray2 (1, rv.length (), 0.0) +{ + for (octave_idx_type i = 0; i < rv.length (); i++) + elem (0, i) = rv.elem (i); +} + +FloatMatrix::FloatMatrix (const FloatColumnVector& cv) + : MArray2 (cv.length (), 1, 0.0) +{ + for (octave_idx_type i = 0; i < cv.length (); i++) + elem (i, 0) = cv.elem (i); +} + +FloatMatrix::FloatMatrix (const FloatDiagMatrix& a) + : MArray2 (a.rows (), a.cols (), 0.0) +{ + for (octave_idx_type i = 0; i < a.length (); i++) + elem (i, i) = a.elem (i, i); +} + +// FIXME -- could we use a templated mixed-type copy function +// here? + +FloatMatrix::FloatMatrix (const boolMatrix& a) + : MArray2 (a.rows (), a.cols ()) +{ + for (octave_idx_type i = 0; i < a.rows (); i++) + for (octave_idx_type j = 0; j < a.cols (); j++) + elem (i, j) = a.elem (i, j); +} + +FloatMatrix::FloatMatrix (const charMatrix& a) + : MArray2 (a.rows (), a.cols ()) +{ + for (octave_idx_type i = 0; i < a.rows (); i++) + for (octave_idx_type j = 0; j < a.cols (); j++) + elem (i, j) = a.elem (i, j); +} + +bool +FloatMatrix::operator == (const FloatMatrix& a) const +{ + if (rows () != a.rows () || cols () != a.cols ()) + return false; + + return mx_inline_equal (data (), a.data (), length ()); +} + +bool +FloatMatrix::operator != (const FloatMatrix& a) const +{ + return !(*this == a); +} + +bool +FloatMatrix::is_symmetric (void) const +{ + if (is_square () && rows () > 0) + { + for (octave_idx_type i = 0; i < rows (); i++) + for (octave_idx_type j = i+1; j < cols (); j++) + if (elem (i, j) != elem (j, i)) + return false; + + return true; + } + + return false; +} + +FloatMatrix& +FloatMatrix::insert (const FloatMatrix& a, octave_idx_type r, octave_idx_type c) +{ + Array2::insert (a, r, c); + return *this; +} + +FloatMatrix& +FloatMatrix::insert (const FloatRowVector& a, octave_idx_type r, octave_idx_type c) +{ + octave_idx_type a_len = a.length (); + + if (r < 0 || r >= rows () || c < 0 || c + a_len > cols ()) + { + (*current_liboctave_error_handler) ("range error for insert"); + return *this; + } + + if (a_len > 0) + { + make_unique (); + + for (octave_idx_type i = 0; i < a_len; i++) + xelem (r, c+i) = a.elem (i); + } + + return *this; +} + +FloatMatrix& +FloatMatrix::insert (const FloatColumnVector& a, octave_idx_type r, octave_idx_type c) +{ + octave_idx_type a_len = a.length (); + + if (r < 0 || r + a_len > rows () || c < 0 || c >= cols ()) + { + (*current_liboctave_error_handler) ("range error for insert"); + return *this; + } + + if (a_len > 0) + { + make_unique (); + + for (octave_idx_type i = 0; i < a_len; i++) + xelem (r+i, c) = a.elem (i); + } + + return *this; +} + +FloatMatrix& +FloatMatrix::insert (const FloatDiagMatrix& a, octave_idx_type r, octave_idx_type c) +{ + octave_idx_type a_nr = a.rows (); + octave_idx_type a_nc = a.cols (); + + if (r < 0 || r + a_nr > rows () || c < 0 || c + a_nc > cols ()) + { + (*current_liboctave_error_handler) ("range error for insert"); + return *this; + } + + fill (0.0, r, c, r + a_nr - 1, c + a_nc - 1); + + octave_idx_type a_len = a.length (); + + if (a_len > 0) + { + make_unique (); + + for (octave_idx_type i = 0; i < a_len; i++) + xelem (r+i, c+i) = a.elem (i, i); + } + + return *this; +} + +FloatMatrix& +FloatMatrix::fill (float val) +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + if (nr > 0 && nc > 0) + { + make_unique (); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + xelem (i, j) = val; + } + + return *this; +} + +FloatMatrix& +FloatMatrix::fill (float val, octave_idx_type r1, octave_idx_type c1, octave_idx_type r2, octave_idx_type c2) +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + if (r1 < 0 || r2 < 0 || c1 < 0 || c2 < 0 + || r1 >= nr || r2 >= nr || c1 >= nc || c2 >= nc) + { + (*current_liboctave_error_handler) ("range error for fill"); + return *this; + } + + if (r1 > r2) { octave_idx_type tmp = r1; r1 = r2; r2 = tmp; } + if (c1 > c2) { octave_idx_type tmp = c1; c1 = c2; c2 = tmp; } + + if (r2 >= r1 && c2 >= c1) + { + make_unique (); + + for (octave_idx_type j = c1; j <= c2; j++) + for (octave_idx_type i = r1; i <= r2; i++) + xelem (i, j) = val; + } + + return *this; +} + +FloatMatrix +FloatMatrix::append (const FloatMatrix& a) const +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + if (nr != a.rows ()) + { + (*current_liboctave_error_handler) ("row dimension mismatch for append"); + return FloatMatrix (); + } + + octave_idx_type nc_insert = nc; + FloatMatrix retval (nr, nc + a.cols ()); + retval.insert (*this, 0, 0); + retval.insert (a, 0, nc_insert); + return retval; +} + +FloatMatrix +FloatMatrix::append (const FloatRowVector& a) const +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + if (nr != 1) + { + (*current_liboctave_error_handler) ("row dimension mismatch for append"); + return FloatMatrix (); + } + + octave_idx_type nc_insert = nc; + FloatMatrix retval (nr, nc + a.length ()); + retval.insert (*this, 0, 0); + retval.insert (a, 0, nc_insert); + return retval; +} + +FloatMatrix +FloatMatrix::append (const FloatColumnVector& a) const +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + if (nr != a.length ()) + { + (*current_liboctave_error_handler) ("row dimension mismatch for append"); + return FloatMatrix (); + } + + octave_idx_type nc_insert = nc; + FloatMatrix retval (nr, nc + 1); + retval.insert (*this, 0, 0); + retval.insert (a, 0, nc_insert); + return retval; +} + +FloatMatrix +FloatMatrix::append (const FloatDiagMatrix& a) const +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + if (nr != a.rows ()) + { + (*current_liboctave_error_handler) ("row dimension mismatch for append"); + return *this; + } + + octave_idx_type nc_insert = nc; + FloatMatrix retval (nr, nc + a.cols ()); + retval.insert (*this, 0, 0); + retval.insert (a, 0, nc_insert); + return retval; +} + +FloatMatrix +FloatMatrix::stack (const FloatMatrix& a) const +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + if (nc != a.cols ()) + { + (*current_liboctave_error_handler) + ("column dimension mismatch for stack"); + return FloatMatrix (); + } + + octave_idx_type nr_insert = nr; + FloatMatrix retval (nr + a.rows (), nc); + retval.insert (*this, 0, 0); + retval.insert (a, nr_insert, 0); + return retval; +} + +FloatMatrix +FloatMatrix::stack (const FloatRowVector& a) const +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + if (nc != a.length ()) + { + (*current_liboctave_error_handler) + ("column dimension mismatch for stack"); + return FloatMatrix (); + } + + octave_idx_type nr_insert = nr; + FloatMatrix retval (nr + 1, nc); + retval.insert (*this, 0, 0); + retval.insert (a, nr_insert, 0); + return retval; +} + +FloatMatrix +FloatMatrix::stack (const FloatColumnVector& a) const +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + if (nc != 1) + { + (*current_liboctave_error_handler) + ("column dimension mismatch for stack"); + return FloatMatrix (); + } + + octave_idx_type nr_insert = nr; + FloatMatrix retval (nr + a.length (), nc); + retval.insert (*this, 0, 0); + retval.insert (a, nr_insert, 0); + return retval; +} + +FloatMatrix +FloatMatrix::stack (const FloatDiagMatrix& a) const +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + if (nc != a.cols ()) + { + (*current_liboctave_error_handler) + ("column dimension mismatch for stack"); + return FloatMatrix (); + } + + octave_idx_type nr_insert = nr; + FloatMatrix retval (nr + a.rows (), nc); + retval.insert (*this, 0, 0); + retval.insert (a, nr_insert, 0); + return retval; +} + +FloatMatrix +real (const FloatComplexMatrix& a) +{ + octave_idx_type a_len = a.length (); + FloatMatrix retval; + if (a_len > 0) + retval = FloatMatrix (mx_inline_real_dup (a.data (), a_len), + a.rows (), a.cols ()); + return retval; +} + +FloatMatrix +imag (const FloatComplexMatrix& a) +{ + octave_idx_type a_len = a.length (); + FloatMatrix retval; + if (a_len > 0) + retval = FloatMatrix (mx_inline_imag_dup (a.data (), a_len), + a.rows (), a.cols ()); + return retval; +} + +FloatMatrix +FloatMatrix::extract (octave_idx_type r1, octave_idx_type c1, octave_idx_type r2, octave_idx_type c2) const +{ + if (r1 > r2) { octave_idx_type tmp = r1; r1 = r2; r2 = tmp; } + if (c1 > c2) { octave_idx_type tmp = c1; c1 = c2; c2 = tmp; } + + octave_idx_type new_r = r2 - r1 + 1; + octave_idx_type new_c = c2 - c1 + 1; + + FloatMatrix result (new_r, new_c); + + for (octave_idx_type j = 0; j < new_c; j++) + for (octave_idx_type i = 0; i < new_r; i++) + result.xelem (i, j) = elem (r1+i, c1+j); + + return result; +} + +FloatMatrix +FloatMatrix::extract_n (octave_idx_type r1, octave_idx_type c1, octave_idx_type nr, octave_idx_type nc) const +{ + FloatMatrix result (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + result.xelem (i, j) = elem (r1+i, c1+j); + + return result; +} + +// extract row or column i. + +FloatRowVector +FloatMatrix::row (octave_idx_type i) const +{ + octave_idx_type nc = cols (); + if (i < 0 || i >= rows ()) + { + (*current_liboctave_error_handler) ("invalid row selection"); + return FloatRowVector (); + } + + FloatRowVector retval (nc); + for (octave_idx_type j = 0; j < nc; j++) + retval.xelem (j) = elem (i, j); + + return retval; +} + +FloatColumnVector +FloatMatrix::column (octave_idx_type i) const +{ + octave_idx_type nr = rows (); + if (i < 0 || i >= cols ()) + { + (*current_liboctave_error_handler) ("invalid column selection"); + return FloatColumnVector (); + } + + FloatColumnVector retval (nr); + for (octave_idx_type j = 0; j < nr; j++) + retval.xelem (j) = elem (j, i); + + return retval; +} + +FloatMatrix +FloatMatrix::inverse (void) const +{ + octave_idx_type info; + float rcond; + MatrixType mattype (*this); + return inverse (mattype, info, rcond, 0, 0); +} + +FloatMatrix +FloatMatrix::inverse (octave_idx_type& info) const +{ + float rcond; + MatrixType mattype (*this); + return inverse (mattype, info, rcond, 0, 0); +} + +FloatMatrix +FloatMatrix::inverse (octave_idx_type& info, float& rcond, int force, + int calc_cond) const +{ + MatrixType mattype (*this); + return inverse (mattype, info, rcond, force, calc_cond); +} + +FloatMatrix +FloatMatrix::inverse (MatrixType& mattype) const +{ + octave_idx_type info; + float rcond; + return inverse (mattype, info, rcond, 0, 0); +} + +FloatMatrix +FloatMatrix::inverse (MatrixType &mattype, octave_idx_type& info) const +{ + float rcond; + return inverse (mattype, info, rcond, 0, 0); +} + +FloatMatrix +FloatMatrix::tinverse (MatrixType &mattype, octave_idx_type& info, float& rcond, + int force, int calc_cond) const +{ + FloatMatrix retval; + + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + if (nr != nc || nr == 0 || nc == 0) + (*current_liboctave_error_handler) ("inverse requires square matrix"); + else + { + int typ = mattype.type (); + char uplo = (typ == MatrixType::Lower ? 'L' : 'U'); + char udiag = 'N'; + retval = *this; + float *tmp_data = retval.fortran_vec (); + + F77_XFCN (strtri, STRTRI, (F77_CONST_CHAR_ARG2 (&uplo, 1), + F77_CONST_CHAR_ARG2 (&udiag, 1), + nr, tmp_data, nr, info + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1))); + + // Throw-away extra info LAPACK gives so as to not change output. + rcond = 0.0; + if (info != 0) + info = -1; + else if (calc_cond) + { + octave_idx_type dtrcon_info = 0; + char job = '1'; + + OCTAVE_LOCAL_BUFFER (float, work, 3 * nr); + OCTAVE_LOCAL_BUFFER (octave_idx_type, iwork, nr); + + F77_XFCN (strcon, STRCON, (F77_CONST_CHAR_ARG2 (&job, 1), + F77_CONST_CHAR_ARG2 (&uplo, 1), + F77_CONST_CHAR_ARG2 (&udiag, 1), + nr, tmp_data, nr, rcond, + work, iwork, dtrcon_info + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1))); + + if (dtrcon_info != 0) + info = -1; + } + + if (info == -1 && ! force) + retval = *this; // Restore matrix contents. + } + + return retval; +} + + +FloatMatrix +FloatMatrix::finverse (MatrixType &mattype, octave_idx_type& info, float& rcond, + int force, int calc_cond) const +{ + FloatMatrix retval; + + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + if (nr != nc || nr == 0 || nc == 0) + (*current_liboctave_error_handler) ("inverse requires square matrix"); + else + { + Array ipvt (nr); + octave_idx_type *pipvt = ipvt.fortran_vec (); + + retval = *this; + float *tmp_data = retval.fortran_vec (); + + Array z(1); + octave_idx_type lwork = -1; + + // Query the optimum work array size. + F77_XFCN (sgetri, SGETRI, (nc, tmp_data, nr, pipvt, + z.fortran_vec (), lwork, info)); + + lwork = static_cast (z(0)); + lwork = (lwork < 2 *nc ? 2*nc : lwork); + z.resize (lwork); + float *pz = z.fortran_vec (); + + info = 0; + + // Calculate the norm of the matrix, for later use. + float anorm = 0; + if (calc_cond) + anorm = retval.abs().sum().row(static_cast(0)).max(); + + F77_XFCN (sgetrf, SGETRF, (nc, nc, tmp_data, nr, pipvt, info)); + + // Throw-away extra info LAPACK gives so as to not change output. + rcond = 0.0; + if (info != 0) + info = -1; + else if (calc_cond) + { + octave_idx_type dgecon_info = 0; + + // Now calculate the condition number for non-singular matrix. + char job = '1'; + Array iz (nc); + octave_idx_type *piz = iz.fortran_vec (); + F77_XFCN (sgecon, SGECON, (F77_CONST_CHAR_ARG2 (&job, 1), + nc, tmp_data, nr, anorm, + rcond, pz, piz, dgecon_info + F77_CHAR_ARG_LEN (1))); + + if (dgecon_info != 0) + info = -1; + } + + if (info == -1 && ! force) + retval = *this; // Restore matrix contents. + else + { + octave_idx_type dgetri_info = 0; + + F77_XFCN (sgetri, SGETRI, (nc, tmp_data, nr, pipvt, + pz, lwork, dgetri_info)); + + if (dgetri_info != 0) + info = -1; + } + + if (info != 0) + mattype.mark_as_rectangular(); + } + + return retval; +} + +FloatMatrix +FloatMatrix::inverse (MatrixType &mattype, octave_idx_type& info, float& rcond, + int force, int calc_cond) const +{ + int typ = mattype.type (false); + FloatMatrix ret; + + if (typ == MatrixType::Unknown) + typ = mattype.type (*this); + + if (typ == MatrixType::Upper || typ == MatrixType::Lower) + ret = tinverse (mattype, info, rcond, force, calc_cond); + else + { + if (mattype.is_hermitian ()) + { + FloatCHOL chol (*this, info, calc_cond); + if (info == 0) + { + if (calc_cond) + rcond = chol.rcond (); + else + rcond = 1.0; + ret = chol.inverse (); + } + else + mattype.mark_as_unsymmetric (); + } + + if (!mattype.is_hermitian ()) + ret = finverse(mattype, info, rcond, force, calc_cond); + + if ((mattype.is_hermitian () || calc_cond) && rcond == 0.) + ret = FloatMatrix (rows (), columns (), octave_Float_Inf); + } + + return ret; +} + +FloatMatrix +FloatMatrix::pseudo_inverse (float tol) const +{ + FloatSVD result (*this, SVD::economy); + + FloatDiagMatrix S = result.singular_values (); + FloatMatrix U = result.left_singular_matrix (); + FloatMatrix V = result.right_singular_matrix (); + + FloatColumnVector sigma = S.diag (); + + octave_idx_type r = sigma.length () - 1; + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + if (tol <= 0.0) + { + if (nr > nc) + tol = nr * sigma.elem (0) * DBL_EPSILON; + else + tol = nc * sigma.elem (0) * DBL_EPSILON; + } + + while (r >= 0 && sigma.elem (r) < tol) + r--; + + if (r < 0) + return FloatMatrix (nc, nr, 0.0); + else + { + FloatMatrix Ur = U.extract (0, 0, nr-1, r); + FloatDiagMatrix D = FloatDiagMatrix (sigma.extract (0, r)) . inverse (); + FloatMatrix Vr = V.extract (0, 0, nc-1, r); + return Vr * D * Ur.transpose (); + } +} + +#if defined (HAVE_FFTW3) + +FloatComplexMatrix +FloatMatrix::fourier (void) const +{ + size_t nr = rows (); + size_t nc = cols (); + + FloatComplexMatrix retval (nr, nc); + + size_t npts, nsamples; + + if (nr == 1 || nc == 1) + { + npts = nr > nc ? nr : nc; + nsamples = 1; + } + else + { + npts = nr; + nsamples = nc; + } + + const float *in (fortran_vec ()); + FloatComplex *out (retval.fortran_vec ()); + + octave_fftw::fft (in, out, npts, nsamples); + + return retval; +} + +FloatComplexMatrix +FloatMatrix::ifourier (void) const +{ + size_t nr = rows (); + size_t nc = cols (); + + FloatComplexMatrix retval (nr, nc); + + size_t npts, nsamples; + + if (nr == 1 || nc == 1) + { + npts = nr > nc ? nr : nc; + nsamples = 1; + } + else + { + npts = nr; + nsamples = nc; + } + + FloatComplexMatrix tmp (*this); + FloatComplex *in (tmp.fortran_vec ()); + FloatComplex *out (retval.fortran_vec ()); + + octave_fftw::ifft (in, out, npts, nsamples); + + return retval; +} + +FloatComplexMatrix +FloatMatrix::fourier2d (void) const +{ + dim_vector dv(rows (), cols ()); + + const float *in = fortran_vec (); + FloatComplexMatrix retval (rows (), cols ()); + octave_fftw::fftNd (in, retval.fortran_vec (), 2, dv); + + return retval; +} + +FloatComplexMatrix +FloatMatrix::ifourier2d (void) const +{ + dim_vector dv(rows (), cols ()); + + FloatComplexMatrix retval (*this); + FloatComplex *out (retval.fortran_vec ()); + + octave_fftw::ifftNd (out, out, 2, dv); + + return retval; +} + +#else + +FloatComplexMatrix +FloatMatrix::fourier (void) const +{ + FloatComplexMatrix retval; + + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + octave_idx_type npts, nsamples; + + if (nr == 1 || nc == 1) + { + npts = nr > nc ? nr : nc; + nsamples = 1; + } + else + { + npts = nr; + nsamples = nc; + } + + octave_idx_type nn = 4*npts+15; + + Array wsave (nn); + FloatComplex *pwsave = wsave.fortran_vec (); + + retval = FloatComplexMatrix (*this); + FloatComplex *tmp_data = retval.fortran_vec (); + + F77_FUNC (cffti, CFFTI) (npts, pwsave); + + for (octave_idx_type j = 0; j < nsamples; j++) + { + OCTAVE_QUIT; + + F77_FUNC (cfftf, CFFTF) (npts, &tmp_data[npts*j], pwsave); + } + + return retval; +} + +FloatComplexMatrix +FloatMatrix::ifourier (void) const +{ + FloatComplexMatrix retval; + + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + octave_idx_type npts, nsamples; + + if (nr == 1 || nc == 1) + { + npts = nr > nc ? nr : nc; + nsamples = 1; + } + else + { + npts = nr; + nsamples = nc; + } + + octave_idx_type nn = 4*npts+15; + + Array wsave (nn); + FloatComplex *pwsave = wsave.fortran_vec (); + + retval = FloatComplexMatrix (*this); + FloatComplex *tmp_data = retval.fortran_vec (); + + F77_FUNC (cffti, CFFTI) (npts, pwsave); + + for (octave_idx_type j = 0; j < nsamples; j++) + { + OCTAVE_QUIT; + + F77_FUNC (cfftb, CFFTB) (npts, &tmp_data[npts*j], pwsave); + } + + for (octave_idx_type j = 0; j < npts*nsamples; j++) + tmp_data[j] = tmp_data[j] / static_cast (npts); + + return retval; +} + +FloatComplexMatrix +FloatMatrix::fourier2d (void) const +{ + FloatComplexMatrix retval; + + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + octave_idx_type npts, nsamples; + + if (nr == 1 || nc == 1) + { + npts = nr > nc ? nr : nc; + nsamples = 1; + } + else + { + npts = nr; + nsamples = nc; + } + + octave_idx_type nn = 4*npts+15; + + Array wsave (nn); + FloatComplex *pwsave = wsave.fortran_vec (); + + retval = FloatComplexMatrix (*this); + FloatComplex *tmp_data = retval.fortran_vec (); + + F77_FUNC (cffti, CFFTI) (npts, pwsave); + + for (octave_idx_type j = 0; j < nsamples; j++) + { + OCTAVE_QUIT; + + F77_FUNC (cfftf, CFFTF) (npts, &tmp_data[npts*j], pwsave); + } + + npts = nc; + nsamples = nr; + nn = 4*npts+15; + + wsave.resize (nn); + pwsave = wsave.fortran_vec (); + + Array tmp (npts); + FloatComplex *prow = tmp.fortran_vec (); + + F77_FUNC (cffti, CFFTI) (npts, pwsave); + + for (octave_idx_type j = 0; j < nsamples; j++) + { + OCTAVE_QUIT; + + for (octave_idx_type i = 0; i < npts; i++) + prow[i] = tmp_data[i*nr + j]; + + F77_FUNC (cfftf, CFFTF) (npts, prow, pwsave); + + for (octave_idx_type i = 0; i < npts; i++) + tmp_data[i*nr + j] = prow[i]; + } + + return retval; +} + +FloatComplexMatrix +FloatMatrix::ifourier2d (void) const +{ + FloatComplexMatrix retval; + + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + octave_idx_type npts, nsamples; + + if (nr == 1 || nc == 1) + { + npts = nr > nc ? nr : nc; + nsamples = 1; + } + else + { + npts = nr; + nsamples = nc; + } + + octave_idx_type nn = 4*npts+15; + + Array wsave (nn); + FloatComplex *pwsave = wsave.fortran_vec (); + + retval = FloatComplexMatrix (*this); + FloatComplex *tmp_data = retval.fortran_vec (); + + F77_FUNC (cffti, CFFTI) (npts, pwsave); + + for (octave_idx_type j = 0; j < nsamples; j++) + { + OCTAVE_QUIT; + + F77_FUNC (cfftb, CFFTB) (npts, &tmp_data[npts*j], pwsave); + } + + for (octave_idx_type j = 0; j < npts*nsamples; j++) + tmp_data[j] = tmp_data[j] / static_cast (npts); + + npts = nc; + nsamples = nr; + nn = 4*npts+15; + + wsave.resize (nn); + pwsave = wsave.fortran_vec (); + + Array tmp (npts); + FloatComplex *prow = tmp.fortran_vec (); + + F77_FUNC (cffti, CFFTI) (npts, pwsave); + + for (octave_idx_type j = 0; j < nsamples; j++) + { + OCTAVE_QUIT; + + for (octave_idx_type i = 0; i < npts; i++) + prow[i] = tmp_data[i*nr + j]; + + F77_FUNC (cfftb, CFFTB) (npts, prow, pwsave); + + for (octave_idx_type i = 0; i < npts; i++) + tmp_data[i*nr + j] = prow[i] / static_cast (npts); + } + + return retval; +} + +#endif + +FloatDET +FloatMatrix::determinant (void) const +{ + octave_idx_type info; + float rcond; + return determinant (info, rcond, 0); +} + +FloatDET +FloatMatrix::determinant (octave_idx_type& info) const +{ + float rcond; + return determinant (info, rcond, 0); +} + +FloatDET +FloatMatrix::determinant (octave_idx_type& info, float& rcond, int calc_cond) const +{ + FloatDET retval; + + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + if (nr == 0 || nc == 0) + { + retval = FloatDET (1.0, 0); + } + else + { + Array ipvt (nr); + octave_idx_type *pipvt = ipvt.fortran_vec (); + + FloatMatrix atmp = *this; + float *tmp_data = atmp.fortran_vec (); + + info = 0; + + // Calculate the norm of the matrix, for later use. + float anorm = 0; + if (calc_cond) + anorm = atmp.abs().sum().row(static_cast(0)).max(); + + F77_XFCN (sgetrf, SGETRF, (nr, nr, tmp_data, nr, pipvt, info)); + + // Throw-away extra info LAPACK gives so as to not change output. + rcond = 0.0; + if (info != 0) + { + info = -1; + retval = FloatDET (); + } + else + { + if (calc_cond) + { + // Now calc the condition number for non-singular matrix. + char job = '1'; + Array z (4 * nc); + float *pz = z.fortran_vec (); + Array iz (nc); + octave_idx_type *piz = iz.fortran_vec (); + + F77_XFCN (sgecon, SGECON, (F77_CONST_CHAR_ARG2 (&job, 1), + nc, tmp_data, nr, anorm, + rcond, pz, piz, info + F77_CHAR_ARG_LEN (1))); + } + + if (info != 0) + { + info = -1; + retval = FloatDET (); + } + else + { + float c = 1.0; + int e = 0; + + for (octave_idx_type i = 0; i < nc; i++) + { + if (ipvt(i) != (i+1)) + c = -c; + + c *= atmp(i,i); + + if (c == 0.0) + break; + + while (fabs (c) < 0.5) + { + c *= 2.0; + e--; + } + + while (fabs (c) >= 2.0) + { + c /= 2.0; + e++; + } + } + + retval = FloatDET (c, e); + } + } + } + + return retval; +} + +FloatMatrix +FloatMatrix::utsolve (MatrixType &mattype, const FloatMatrix& b, octave_idx_type& info, + float& rcond, solve_singularity_handler sing_handler, + bool calc_cond) const +{ + FloatMatrix retval; + + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + if (nr != b.rows ()) + (*current_liboctave_error_handler) + ("matrix dimension mismatch solution of linear equations"); + else if (nr == 0 || nc == 0 || b.cols () == 0) + retval = FloatMatrix (nc, b.cols (), 0.0); + else + { + volatile int typ = mattype.type (); + + if (typ == MatrixType::Permuted_Upper || + typ == MatrixType::Upper) + { + octave_idx_type b_nc = b.cols (); + rcond = 1.; + info = 0; + + if (typ == MatrixType::Permuted_Upper) + { + (*current_liboctave_error_handler) + ("permuted triangular matrix not implemented"); + } + else + { + const float *tmp_data = fortran_vec (); + + if (calc_cond) + { + char norm = '1'; + char uplo = 'U'; + char dia = 'N'; + + Array z (3 * nc); + float *pz = z.fortran_vec (); + Array iz (nc); + octave_idx_type *piz = iz.fortran_vec (); + + F77_XFCN (strcon, STRCON, (F77_CONST_CHAR_ARG2 (&norm, 1), + F77_CONST_CHAR_ARG2 (&uplo, 1), + F77_CONST_CHAR_ARG2 (&dia, 1), + nr, tmp_data, nr, rcond, + pz, piz, info + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1))); + + if (info != 0) + info = -2; + + volatile float rcond_plus_one = rcond + 1.0; + + if (rcond_plus_one == 1.0 || xisnan (rcond)) + { + info = -2; + + if (sing_handler) + sing_handler (rcond); + else + (*current_liboctave_error_handler) + ("matrix singular to machine precision, rcond = %g", + rcond); + } + } + + if (info == 0) + { + retval = b; + float *result = retval.fortran_vec (); + + char uplo = 'U'; + char trans = 'N'; + char dia = 'N'; + + F77_XFCN (strtrs, STRTRS, (F77_CONST_CHAR_ARG2 (&uplo, 1), + F77_CONST_CHAR_ARG2 (&trans, 1), + F77_CONST_CHAR_ARG2 (&dia, 1), + nr, b_nc, tmp_data, nr, + result, nr, info + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1))); + } + } + } + else + (*current_liboctave_error_handler) ("incorrect matrix type"); + } + + return retval; +} + +FloatMatrix +FloatMatrix::ltsolve (MatrixType &mattype, const FloatMatrix& b, octave_idx_type& info, + float& rcond, solve_singularity_handler sing_handler, + bool calc_cond) const +{ + FloatMatrix retval; + + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + if (nr != b.rows ()) + (*current_liboctave_error_handler) + ("matrix dimension mismatch solution of linear equations"); + else if (nr == 0 || nc == 0 || b.cols () == 0) + retval = FloatMatrix (nc, b.cols (), 0.0); + else + { + volatile int typ = mattype.type (); + + if (typ == MatrixType::Permuted_Lower || + typ == MatrixType::Lower) + { + octave_idx_type b_nc = b.cols (); + rcond = 1.; + info = 0; + + if (typ == MatrixType::Permuted_Lower) + { + (*current_liboctave_error_handler) + ("permuted triangular matrix not implemented"); + } + else + { + const float *tmp_data = fortran_vec (); + + if (calc_cond) + { + char norm = '1'; + char uplo = 'L'; + char dia = 'N'; + + Array z (3 * nc); + float *pz = z.fortran_vec (); + Array iz (nc); + octave_idx_type *piz = iz.fortran_vec (); + + F77_XFCN (strcon, STRCON, (F77_CONST_CHAR_ARG2 (&norm, 1), + F77_CONST_CHAR_ARG2 (&uplo, 1), + F77_CONST_CHAR_ARG2 (&dia, 1), + nr, tmp_data, nr, rcond, + pz, piz, info + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1))); + + if (info != 0) + info = -2; + + volatile float rcond_plus_one = rcond + 1.0; + + if (rcond_plus_one == 1.0 || xisnan (rcond)) + { + info = -2; + + if (sing_handler) + sing_handler (rcond); + else + (*current_liboctave_error_handler) + ("matrix singular to machine precision, rcond = %g", + rcond); + } + } + + if (info == 0) + { + retval = b; + float *result = retval.fortran_vec (); + + char uplo = 'L'; + char trans = 'N'; + char dia = 'N'; + + F77_XFCN (strtrs, STRTRS, (F77_CONST_CHAR_ARG2 (&uplo, 1), + F77_CONST_CHAR_ARG2 (&trans, 1), + F77_CONST_CHAR_ARG2 (&dia, 1), + nr, b_nc, tmp_data, nr, + result, nr, info + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1))); + } + } + } + else + (*current_liboctave_error_handler) ("incorrect matrix type"); + } + + return retval; +} + +FloatMatrix +FloatMatrix::fsolve (MatrixType &mattype, const FloatMatrix& b, octave_idx_type& info, + float& rcond, solve_singularity_handler sing_handler, + bool calc_cond) const +{ + FloatMatrix retval; + + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + if (nr != nc || nr != b.rows ()) + (*current_liboctave_error_handler) + ("matrix dimension mismatch solution of linear equations"); + else if (nr == 0 || b.cols () == 0) + retval = FloatMatrix (nc, b.cols (), 0.0); + else + { + volatile int typ = mattype.type (); + + // Calculate the norm of the matrix, for later use. + float anorm = -1.; + + if (typ == MatrixType::Hermitian) + { + info = 0; + char job = 'L'; + FloatMatrix atmp = *this; + float *tmp_data = atmp.fortran_vec (); + anorm = atmp.abs().sum().row(static_cast(0)).max(); + + F77_XFCN (spotrf, SPOTRF, (F77_CONST_CHAR_ARG2 (&job, 1), nr, + tmp_data, nr, info + F77_CHAR_ARG_LEN (1))); + + // Throw-away extra info LAPACK gives so as to not change output. + rcond = 0.0; + if (info != 0) + { + info = -2; + + mattype.mark_as_unsymmetric (); + typ = MatrixType::Full; + } + else + { + if (calc_cond) + { + Array z (3 * nc); + float *pz = z.fortran_vec (); + Array iz (nc); + octave_idx_type *piz = iz.fortran_vec (); + + F77_XFCN (spocon, SPOCON, (F77_CONST_CHAR_ARG2 (&job, 1), + nr, tmp_data, nr, anorm, + rcond, pz, piz, info + F77_CHAR_ARG_LEN (1))); + + if (info != 0) + info = -2; + + volatile float rcond_plus_one = rcond + 1.0; + + if (rcond_plus_one == 1.0 || xisnan (rcond)) + { + info = -2; + + if (sing_handler) + sing_handler (rcond); + else + (*current_liboctave_error_handler) + ("matrix singular to machine precision, rcond = %g", + rcond); + } + } + + if (info == 0) + { + retval = b; + float *result = retval.fortran_vec (); + + octave_idx_type b_nc = b.cols (); + + F77_XFCN (spotrs, SPOTRS, (F77_CONST_CHAR_ARG2 (&job, 1), + nr, b_nc, tmp_data, nr, + result, b.rows(), info + F77_CHAR_ARG_LEN (1))); + } + else + { + mattype.mark_as_unsymmetric (); + typ = MatrixType::Full; + } + } + } + + if (typ == MatrixType::Full) + { + info = 0; + + Array ipvt (nr); + octave_idx_type *pipvt = ipvt.fortran_vec (); + + FloatMatrix atmp = *this; + float *tmp_data = atmp.fortran_vec (); + if(anorm < 0.) + anorm = atmp.abs().sum().row(static_cast(0)).max(); + + Array z (4 * nc); + float *pz = z.fortran_vec (); + Array iz (nc); + octave_idx_type *piz = iz.fortran_vec (); + + F77_XFCN (sgetrf, SGETRF, (nr, nr, tmp_data, nr, pipvt, info)); + + // Throw-away extra info LAPACK gives so as to not change output. + rcond = 0.0; + if (info != 0) + { + info = -2; + + if (sing_handler) + sing_handler (rcond); + else + (*current_liboctave_error_handler) + ("matrix singular to machine precision"); + + mattype.mark_as_rectangular (); + } + else + { + if (calc_cond) + { + // Now calculate the condition number for + // non-singular matrix. + char job = '1'; + F77_XFCN (sgecon, SGECON, (F77_CONST_CHAR_ARG2 (&job, 1), + nc, tmp_data, nr, anorm, + rcond, pz, piz, info + F77_CHAR_ARG_LEN (1))); + + if (info != 0) + info = -2; + + volatile float rcond_plus_one = rcond + 1.0; + + if (rcond_plus_one == 1.0 || xisnan (rcond)) + { + info = -2; + + if (sing_handler) + sing_handler (rcond); + else + (*current_liboctave_error_handler) + ("matrix singular to machine precision, rcond = %g", + rcond); + } + } + + if (info == 0) + { + retval = b; + float *result = retval.fortran_vec (); + + octave_idx_type b_nc = b.cols (); + + char job = 'N'; + F77_XFCN (sgetrs, SGETRS, (F77_CONST_CHAR_ARG2 (&job, 1), + nr, b_nc, tmp_data, nr, + pipvt, result, b.rows(), info + F77_CHAR_ARG_LEN (1))); + } + else + mattype.mark_as_rectangular (); + } + } + else if (typ != MatrixType::Hermitian) + (*current_liboctave_error_handler) ("incorrect matrix type"); + } + + return retval; +} + +FloatMatrix +FloatMatrix::solve (MatrixType &typ, const FloatMatrix& b) const +{ + octave_idx_type info; + float rcond; + return solve (typ, b, info, rcond, 0); +} + +FloatMatrix +FloatMatrix::solve (MatrixType &typ, const FloatMatrix& b, octave_idx_type& info, + float& rcond) const +{ + return solve (typ, b, info, rcond, 0); +} + +FloatMatrix +FloatMatrix::solve (MatrixType &mattype, const FloatMatrix& b, octave_idx_type& info, + float& rcond, solve_singularity_handler sing_handler, + bool singular_fallback) const +{ + FloatMatrix retval; + int typ = mattype.type (); + + if (typ == MatrixType::Unknown) + typ = mattype.type (*this); + + // Only calculate the condition number for LU/Cholesky + if (typ == MatrixType::Upper || typ == MatrixType::Permuted_Upper) + retval = utsolve (mattype, b, info, rcond, sing_handler, false); + else if (typ == MatrixType::Lower || typ == MatrixType::Permuted_Lower) + retval = ltsolve (mattype, b, info, rcond, sing_handler, false); + else if (typ == MatrixType::Full || typ == MatrixType::Hermitian) + retval = fsolve (mattype, b, info, rcond, sing_handler, true); + else if (typ != MatrixType::Rectangular) + { + (*current_liboctave_error_handler) ("unknown matrix type"); + return FloatMatrix (); + } + + // Rectangular or one of the above solvers flags a singular matrix + if (singular_fallback && mattype.type () == MatrixType::Rectangular) + { + octave_idx_type rank; + retval = lssolve (b, info, rank, rcond); + } + + return retval; +} + +FloatComplexMatrix +FloatMatrix::solve (MatrixType &typ, const FloatComplexMatrix& b) const +{ + FloatComplexMatrix tmp (*this); + return tmp.solve (typ, b); +} + +FloatComplexMatrix +FloatMatrix::solve (MatrixType &typ, const FloatComplexMatrix& b, + octave_idx_type& info) const +{ + FloatComplexMatrix tmp (*this); + return tmp.solve (typ, b, info); +} + +FloatComplexMatrix +FloatMatrix::solve (MatrixType &typ, const FloatComplexMatrix& b, octave_idx_type& info, + float& rcond) const +{ + FloatComplexMatrix tmp (*this); + return tmp.solve (typ, b, info, rcond); +} + +FloatComplexMatrix +FloatMatrix::solve (MatrixType &typ, const FloatComplexMatrix& b, octave_idx_type& info, + float& rcond, solve_singularity_handler sing_handler, + bool singular_fallback) const +{ + FloatComplexMatrix tmp (*this); + return tmp.solve (typ, b, info, rcond, sing_handler, singular_fallback); +} + +FloatColumnVector +FloatMatrix::solve (MatrixType &typ, const FloatColumnVector& b) const +{ + octave_idx_type info; float rcond; + return solve (typ, b, info, rcond); +} + +FloatColumnVector +FloatMatrix::solve (MatrixType &typ, const FloatColumnVector& b, + octave_idx_type& info) const +{ + float rcond; + return solve (typ, b, info, rcond); +} + +FloatColumnVector +FloatMatrix::solve (MatrixType &typ, const FloatColumnVector& b, octave_idx_type& info, + float& rcond) const +{ + return solve (typ, b, info, rcond, 0); +} + +FloatColumnVector +FloatMatrix::solve (MatrixType &typ, const FloatColumnVector& b, octave_idx_type& info, + float& rcond, solve_singularity_handler sing_handler) const +{ + FloatMatrix tmp (b); + return solve (typ, tmp, info, rcond, sing_handler).column(static_cast (0)); +} + +FloatComplexColumnVector +FloatMatrix::solve (MatrixType &typ, const FloatComplexColumnVector& b) const +{ + FloatComplexMatrix tmp (*this); + return tmp.solve (typ, b); +} + +FloatComplexColumnVector +FloatMatrix::solve (MatrixType &typ, const FloatComplexColumnVector& b, + octave_idx_type& info) const +{ + FloatComplexMatrix tmp (*this); + return tmp.solve (typ, b, info); +} + +FloatComplexColumnVector +FloatMatrix::solve (MatrixType &typ, const FloatComplexColumnVector& b, + octave_idx_type& info, float& rcond) const +{ + FloatComplexMatrix tmp (*this); + return tmp.solve (typ, b, info, rcond); +} + +FloatComplexColumnVector +FloatMatrix::solve (MatrixType &typ, const FloatComplexColumnVector& b, + octave_idx_type& info, float& rcond, + solve_singularity_handler sing_handler) const +{ + FloatComplexMatrix tmp (*this); + return tmp.solve(typ, b, info, rcond, sing_handler); +} + +FloatMatrix +FloatMatrix::solve (const FloatMatrix& b) const +{ + octave_idx_type info; + float rcond; + return solve (b, info, rcond, 0); +} + +FloatMatrix +FloatMatrix::solve (const FloatMatrix& b, octave_idx_type& info) const +{ + float rcond; + return solve (b, info, rcond, 0); +} + +FloatMatrix +FloatMatrix::solve (const FloatMatrix& b, octave_idx_type& info, float& rcond) const +{ + return solve (b, info, rcond, 0); +} + +FloatMatrix +FloatMatrix::solve (const FloatMatrix& b, octave_idx_type& info, + float& rcond, solve_singularity_handler sing_handler) const +{ + MatrixType mattype (*this); + return solve (mattype, b, info, rcond, sing_handler); +} + +FloatComplexMatrix +FloatMatrix::solve (const FloatComplexMatrix& b) const +{ + FloatComplexMatrix tmp (*this); + return tmp.solve (b); +} + +FloatComplexMatrix +FloatMatrix::solve (const FloatComplexMatrix& b, octave_idx_type& info) const +{ + FloatComplexMatrix tmp (*this); + return tmp.solve (b, info); +} + +FloatComplexMatrix +FloatMatrix::solve (const FloatComplexMatrix& b, octave_idx_type& info, float& rcond) const +{ + FloatComplexMatrix tmp (*this); + return tmp.solve (b, info, rcond); +} + +FloatComplexMatrix +FloatMatrix::solve (const FloatComplexMatrix& b, octave_idx_type& info, float& rcond, + solve_singularity_handler sing_handler) const +{ + FloatComplexMatrix tmp (*this); + return tmp.solve (b, info, rcond, sing_handler); +} + +FloatColumnVector +FloatMatrix::solve (const FloatColumnVector& b) const +{ + octave_idx_type info; float rcond; + return solve (b, info, rcond); +} + +FloatColumnVector +FloatMatrix::solve (const FloatColumnVector& b, octave_idx_type& info) const +{ + float rcond; + return solve (b, info, rcond); +} + +FloatColumnVector +FloatMatrix::solve (const FloatColumnVector& b, octave_idx_type& info, float& rcond) const +{ + return solve (b, info, rcond, 0); +} + +FloatColumnVector +FloatMatrix::solve (const FloatColumnVector& b, octave_idx_type& info, float& rcond, + solve_singularity_handler sing_handler) const +{ + MatrixType mattype (*this); + return solve (mattype, b, info, rcond, sing_handler); +} + +FloatComplexColumnVector +FloatMatrix::solve (const FloatComplexColumnVector& b) const +{ + FloatComplexMatrix tmp (*this); + return tmp.solve (b); +} + +FloatComplexColumnVector +FloatMatrix::solve (const FloatComplexColumnVector& b, octave_idx_type& info) const +{ + FloatComplexMatrix tmp (*this); + return tmp.solve (b, info); +} + +FloatComplexColumnVector +FloatMatrix::solve (const FloatComplexColumnVector& b, octave_idx_type& info, float& rcond) const +{ + FloatComplexMatrix tmp (*this); + return tmp.solve (b, info, rcond); +} + +FloatComplexColumnVector +FloatMatrix::solve (const FloatComplexColumnVector& b, octave_idx_type& info, float& rcond, + solve_singularity_handler sing_handler) const +{ + FloatComplexMatrix tmp (*this); + return tmp.solve (b, info, rcond, sing_handler); +} + +FloatMatrix +FloatMatrix::lssolve (const FloatMatrix& b) const +{ + octave_idx_type info; + octave_idx_type rank; + float rcond; + return lssolve (b, info, rank, rcond); +} + +FloatMatrix +FloatMatrix::lssolve (const FloatMatrix& b, octave_idx_type& info) const +{ + octave_idx_type rank; + float rcond; + return lssolve (b, info, rank, rcond); +} + +FloatMatrix +FloatMatrix::lssolve (const FloatMatrix& b, octave_idx_type& info, + octave_idx_type& rank) const +{ + float rcond; + return lssolve (b, info, rank, rcond); +} + +FloatMatrix +FloatMatrix::lssolve (const FloatMatrix& b, octave_idx_type& info, + octave_idx_type& rank, float &rcond) const +{ + FloatMatrix retval; + + octave_idx_type nrhs = b.cols (); + + octave_idx_type m = rows (); + octave_idx_type n = cols (); + + if (m != b.rows ()) + (*current_liboctave_error_handler) + ("matrix dimension mismatch solution of linear equations"); + else if (m == 0 || n == 0 || b.cols () == 0) + retval = FloatMatrix (n, b.cols (), 0.0); + else + { + volatile octave_idx_type minmn = (m < n ? m : n); + octave_idx_type maxmn = m > n ? m : n; + rcond = -1.0; + if (m != n) + { + retval = FloatMatrix (maxmn, nrhs, 0.0); + + for (octave_idx_type j = 0; j < nrhs; j++) + for (octave_idx_type i = 0; i < m; i++) + retval.elem (i, j) = b.elem (i, j); + } + else + retval = b; + + FloatMatrix atmp = *this; + float *tmp_data = atmp.fortran_vec (); + + float *pretval = retval.fortran_vec (); + Array s (minmn); + float *ps = s.fortran_vec (); + + // Ask DGELSD what the dimension of WORK should be. + octave_idx_type lwork = -1; + + Array work (1); + + octave_idx_type smlsiz; + F77_FUNC (xilaenv, XILAENV) (9, F77_CONST_CHAR_ARG2 ("SGELSD", 6), + F77_CONST_CHAR_ARG2 (" ", 1), + 0, 0, 0, 0, smlsiz + F77_CHAR_ARG_LEN (6) + F77_CHAR_ARG_LEN (1)); + + octave_idx_type mnthr; + F77_FUNC (xilaenv, XILAENV) (6, F77_CONST_CHAR_ARG2 ("SGELSD", 6), + F77_CONST_CHAR_ARG2 (" ", 1), + m, n, nrhs, -1, mnthr + F77_CHAR_ARG_LEN (6) + F77_CHAR_ARG_LEN (1)); + + // We compute the size of iwork because DGELSD in older versions + // of LAPACK does not return it on a query call. + float dminmn = static_cast (minmn); + float dsmlsizp1 = static_cast (smlsiz+1); +#if defined (HAVE_LOG2) + float tmp = log2 (dminmn / dsmlsizp1); +#else + float tmp = log (dminmn / dsmlsizp1) / log (2.0); +#endif + octave_idx_type nlvl = static_cast (tmp) + 1; + if (nlvl < 0) + nlvl = 0; + + octave_idx_type liwork = 3 * minmn * nlvl + 11 * minmn; + if (liwork < 1) + liwork = 1; + Array iwork (liwork); + octave_idx_type* piwork = iwork.fortran_vec (); + + F77_XFCN (sgelsd, SGELSD, (m, n, nrhs, tmp_data, m, pretval, maxmn, + ps, rcond, rank, work.fortran_vec (), + lwork, piwork, info)); + + // The workspace query is broken in at least LAPACK 3.0.0 + // through 3.1.1 when n >= mnthr. The obtuse formula below + // should provide sufficient workspace for DGELSD to operate + // efficiently. + if (n >= mnthr) + { + const octave_idx_type wlalsd + = 9*m + 2*m*smlsiz + 8*m*nlvl + m*nrhs + (smlsiz+1)*(smlsiz+1); + + octave_idx_type addend = m; + + if (2*m-4 > addend) + addend = 2*m-4; + + if (nrhs > addend) + addend = nrhs; + + if (n-3*m > addend) + addend = n-3*m; + + if (wlalsd > addend) + addend = wlalsd; + + const octave_idx_type lworkaround = 4*m + m*m + addend; + + if (work(0) < lworkaround) + work(0) = lworkaround; + } + else if (m >= n) + { + octave_idx_type lworkaround + = 12*n + 2*n*smlsiz + 8*n*nlvl + n*nrhs + (smlsiz+1)*(smlsiz+1); + + if (work(0) < lworkaround) + work(0) = lworkaround; + } + + lwork = static_cast (work(0)); + work.resize (lwork); + + F77_XFCN (sgelsd, SGELSD, (m, n, nrhs, tmp_data, m, pretval, + maxmn, ps, rcond, rank, + work.fortran_vec (), lwork, + piwork, info)); + + if (rank < minmn) + (*current_liboctave_warning_handler) + ("dgelsd: rank deficient %dx%d matrix, rank = %d", m, n, rank); + if (s.elem (0) == 0.0) + rcond = 0.0; + else + rcond = s.elem (minmn - 1) / s.elem (0); + + retval.resize (n, nrhs); + } + + return retval; +} + +FloatComplexMatrix +FloatMatrix::lssolve (const FloatComplexMatrix& b) const +{ + FloatComplexMatrix tmp (*this); + octave_idx_type info; + octave_idx_type rank; + float rcond; + return tmp.lssolve (b, info, rank, rcond); +} + +FloatComplexMatrix +FloatMatrix::lssolve (const FloatComplexMatrix& b, octave_idx_type& info) const +{ + FloatComplexMatrix tmp (*this); + octave_idx_type rank; + float rcond; + return tmp.lssolve (b, info, rank, rcond); +} + +FloatComplexMatrix +FloatMatrix::lssolve (const FloatComplexMatrix& b, octave_idx_type& info, + octave_idx_type& rank) const +{ + FloatComplexMatrix tmp (*this); + float rcond; + return tmp.lssolve (b, info, rank, rcond); +} + +FloatComplexMatrix +FloatMatrix::lssolve (const FloatComplexMatrix& b, octave_idx_type& info, + octave_idx_type& rank, float& rcond) const +{ + FloatComplexMatrix tmp (*this); + return tmp.lssolve (b, info, rank, rcond); +} + +FloatColumnVector +FloatMatrix::lssolve (const FloatColumnVector& b) const +{ + octave_idx_type info; + octave_idx_type rank; + float rcond; + return lssolve (b, info, rank, rcond); +} + +FloatColumnVector +FloatMatrix::lssolve (const FloatColumnVector& b, octave_idx_type& info) const +{ + octave_idx_type rank; + float rcond; + return lssolve (b, info, rank, rcond); +} + +FloatColumnVector +FloatMatrix::lssolve (const FloatColumnVector& b, octave_idx_type& info, + octave_idx_type& rank) const +{ + float rcond; + return lssolve (b, info, rank, rcond); +} + +FloatColumnVector +FloatMatrix::lssolve (const FloatColumnVector& b, octave_idx_type& info, + octave_idx_type& rank, float &rcond) const +{ + FloatColumnVector retval; + + octave_idx_type nrhs = 1; + + octave_idx_type m = rows (); + octave_idx_type n = cols (); + + if (m != b.length ()) + (*current_liboctave_error_handler) + ("matrix dimension mismatch solution of linear equations"); + else if (m == 0 || n == 0) + retval = FloatColumnVector (n, 0.0); + else + { + volatile octave_idx_type minmn = (m < n ? m : n); + octave_idx_type maxmn = m > n ? m : n; + rcond = -1.0; + + if (m != n) + { + retval = FloatColumnVector (maxmn, 0.0); + + for (octave_idx_type i = 0; i < m; i++) + retval.elem (i) = b.elem (i); + } + else + retval = b; + + FloatMatrix atmp = *this; + float *tmp_data = atmp.fortran_vec (); + + float *pretval = retval.fortran_vec (); + Array s (minmn); + float *ps = s.fortran_vec (); + + // Ask DGELSD what the dimension of WORK should be. + octave_idx_type lwork = -1; + + Array work (1); + + octave_idx_type smlsiz; + F77_FUNC (xilaenv, XILAENV) (9, F77_CONST_CHAR_ARG2 ("SGELSD", 6), + F77_CONST_CHAR_ARG2 (" ", 1), + 0, 0, 0, 0, smlsiz + F77_CHAR_ARG_LEN (6) + F77_CHAR_ARG_LEN (1)); + + // We compute the size of iwork because DGELSD in older versions + // of LAPACK does not return it on a query call. + float dminmn = static_cast (minmn); + float dsmlsizp1 = static_cast (smlsiz+1); +#if defined (HAVE_LOG2) + float tmp = log2 (dminmn / dsmlsizp1); +#else + float tmp = log (dminmn / dsmlsizp1) / log (2.0); +#endif + octave_idx_type nlvl = static_cast (tmp) + 1; + if (nlvl < 0) + nlvl = 0; + + octave_idx_type liwork = 3 * minmn * nlvl + 11 * minmn; + if (liwork < 1) + liwork = 1; + Array iwork (liwork); + octave_idx_type* piwork = iwork.fortran_vec (); + + F77_XFCN (sgelsd, SGELSD, (m, n, nrhs, tmp_data, m, pretval, maxmn, + ps, rcond, rank, work.fortran_vec (), + lwork, piwork, info)); + + lwork = static_cast (work(0)); + work.resize (lwork); + + F77_XFCN (sgelsd, SGELSD, (m, n, nrhs, tmp_data, m, pretval, + maxmn, ps, rcond, rank, + work.fortran_vec (), lwork, + piwork, info)); + + if (rank < minmn) + { + if (rank < minmn) + (*current_liboctave_warning_handler) + ("dgelsd: rank deficient %dx%d matrix, rank = %d", m, n, rank); + if (s.elem (0) == 0.0) + rcond = 0.0; + else + rcond = s.elem (minmn - 1) / s.elem (0); + } + + retval.resize (n, nrhs); + } + + return retval; +} + +FloatComplexColumnVector +FloatMatrix::lssolve (const FloatComplexColumnVector& b) const +{ + FloatComplexMatrix tmp (*this); + octave_idx_type info; + octave_idx_type rank; + float rcond; + return tmp.lssolve (b, info, rank, rcond); +} + +FloatComplexColumnVector +FloatMatrix::lssolve (const FloatComplexColumnVector& b, octave_idx_type& info) const +{ + FloatComplexMatrix tmp (*this); + octave_idx_type rank; + float rcond; + return tmp.lssolve (b, info, rank, rcond); +} + +FloatComplexColumnVector +FloatMatrix::lssolve (const FloatComplexColumnVector& b, octave_idx_type& info, + octave_idx_type& rank) const +{ + FloatComplexMatrix tmp (*this); + float rcond; + return tmp.lssolve (b, info, rank, rcond); +} + +FloatComplexColumnVector +FloatMatrix::lssolve (const FloatComplexColumnVector& b, octave_idx_type& info, + octave_idx_type& rank, float &rcond) const +{ + FloatComplexMatrix tmp (*this); + return tmp.lssolve (b, info, rank, rcond); +} + +// Constants for matrix exponential calculation. + +static float padec [] = +{ + 5.0000000000000000e-1, + 1.1666666666666667e-1, + 1.6666666666666667e-2, + 1.6025641025641026e-3, + 1.0683760683760684e-4, + 4.8562548562548563e-6, + 1.3875013875013875e-7, + 1.9270852604185938e-9, +}; + +static void +solve_singularity_warning (float rcond) +{ + (*current_liboctave_warning_handler) + ("singular matrix encountered in expm calculation, rcond = %g", + rcond); +} + +FloatMatrix +FloatMatrix::expm (void) const +{ + FloatMatrix retval; + + FloatMatrix m = *this; + + if (numel () == 1) + return FloatMatrix (1, 1, exp (m(0))); + + octave_idx_type nc = columns (); + + // Preconditioning step 1: trace normalization to reduce dynamic + // range of poles, but avoid making stable eigenvalues unstable. + + // trace shift value + volatile float trshift = 0.0; + + for (octave_idx_type i = 0; i < nc; i++) + trshift += m.elem (i, i); + + trshift /= nc; + + if (trshift > 0.0) + { + for (octave_idx_type i = 0; i < nc; i++) + m.elem (i, i) -= trshift; + } + + // Preconditioning step 2: balancing; code follows development + // in AEPBAL + + float *p_m = m.fortran_vec (); + + octave_idx_type info, ilo, ihi, ilos, ihis; + Array dpermute (nc); + Array dscale (nc); + + // permutation first + char job = 'P'; + F77_XFCN (sgebal, SGEBAL, (F77_CONST_CHAR_ARG2 (&job, 1), + nc, p_m, nc, ilo, ihi, + dpermute.fortran_vec (), info + F77_CHAR_ARG_LEN (1))); + + // then scaling + job = 'S'; + F77_XFCN (sgebal, SGEBAL, (F77_CONST_CHAR_ARG2 (&job, 1), + nc, p_m, nc, ilos, ihis, + dscale.fortran_vec (), info + F77_CHAR_ARG_LEN (1))); + + // Preconditioning step 3: scaling. + + FloatColumnVector work(nc); + float inf_norm; + + F77_XFCN (xslange, XSLANGE, (F77_CONST_CHAR_ARG2 ("I", 1), + nc, nc, m.fortran_vec (), nc, + work.fortran_vec (), inf_norm + F77_CHAR_ARG_LEN (1))); + + octave_idx_type sqpow = static_cast (inf_norm > 0.0 + ? (1.0 + log (inf_norm) / log (2.0)) + : 0.0); + + // Check whether we need to square at all. + + if (sqpow < 0) + sqpow = 0; + + if (sqpow > 0) + { + if (sqpow > 1023) + sqpow = 1023; + + float scale_factor = 1.0; + for (octave_idx_type i = 0; i < sqpow; i++) + scale_factor *= 2.0; + + m = m / scale_factor; + } + + // npp, dpp: pade' approx polynomial matrices. + + FloatMatrix npp (nc, nc, 0.0); + float *pnpp = npp.fortran_vec (); + FloatMatrix dpp = npp; + float *pdpp = dpp.fortran_vec (); + + // Now powers a^8 ... a^1. + + octave_idx_type minus_one_j = -1; + for (octave_idx_type j = 7; j >= 0; j--) + { + for (octave_idx_type i = 0; i < nc; i++) + { + octave_idx_type k = i * nc + i; + pnpp[k] += padec[j]; + pdpp[k] += minus_one_j * padec[j]; + } + + npp = m * npp; + pnpp = npp.fortran_vec (); + + dpp = m * dpp; + pdpp = dpp.fortran_vec (); + + minus_one_j *= -1; + } + + // Zero power. + + dpp = -dpp; + for (octave_idx_type j = 0; j < nc; j++) + { + npp.elem (j, j) += 1.0; + dpp.elem (j, j) += 1.0; + } + + // Compute pade approximation = inverse (dpp) * npp. + + float rcond; + retval = dpp.solve (npp, info, rcond, solve_singularity_warning); + + if (info < 0) + return retval; + + // Reverse preconditioning step 3: repeated squaring. + + while (sqpow) + { + retval = retval * retval; + sqpow--; + } + + // Reverse preconditioning step 2: inverse balancing. + // apply inverse scaling to computed exponential + for (octave_idx_type i = 0; i < nc; i++) + for (octave_idx_type j = 0; j < nc; j++) + retval(i,j) *= dscale(i) / dscale(j); + + OCTAVE_QUIT; + + // construct balancing permutation vector + Array iperm (nc); + for (octave_idx_type i = 0; i < nc; i++) + iperm(i) = i; // identity permutation + + // leading permutations in forward order + for (octave_idx_type i = 0; i < (ilo-1); i++) + { + octave_idx_type swapidx = static_cast (dpermute(i)) - 1; + octave_idx_type tmp = iperm(i); + iperm(i) = iperm (swapidx); + iperm(swapidx) = tmp; + } + + // construct inverse balancing permutation vector + Array invpvec (nc); + for (octave_idx_type i = 0; i < nc; i++) + invpvec(iperm(i)) = i; // Thanks to R. A. Lippert for this method + + OCTAVE_QUIT; + + FloatMatrix tmpMat = retval; + for (octave_idx_type i = 0; i < nc; i++) + for (octave_idx_type j = 0; j < nc; j++) + retval(i,j) = tmpMat(invpvec(i),invpvec(j)); + + OCTAVE_QUIT; + + for (octave_idx_type i = 0; i < nc; i++) + iperm(i) = i; // identity permutation + + // trailing permutations must be done in reverse order + for (octave_idx_type i = nc - 1; i >= ihi; i--) + { + octave_idx_type swapidx = static_cast (dpermute(i)) - 1; + octave_idx_type tmp = iperm(i); + iperm(i) = iperm(swapidx); + iperm(swapidx) = tmp; + } + + // construct inverse balancing permutation vector + for (octave_idx_type i = 0; i < nc; i++) + invpvec(iperm(i)) = i; // Thanks to R. A. Lippert for this method + + OCTAVE_QUIT; + + tmpMat = retval; + for (octave_idx_type i = 0; i < nc; i++) + for (octave_idx_type j = 0; j < nc; j++) + retval(i,j) = tmpMat(invpvec(i),invpvec(j)); + + // Reverse preconditioning step 1: fix trace normalization. + + if (trshift > 0.0) + retval = expf (trshift) * retval; + + return retval; +} + +FloatMatrix& +FloatMatrix::operator += (const FloatDiagMatrix& a) +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + octave_idx_type a_nr = a.rows (); + octave_idx_type a_nc = a.cols (); + + if (nr != a_nr || nc != a_nc) + { + gripe_nonconformant ("operator +=", nr, nc, a_nr, a_nc); + return *this; + } + + for (octave_idx_type i = 0; i < a.length (); i++) + elem (i, i) += a.elem (i, i); + + return *this; +} + +FloatMatrix& +FloatMatrix::operator -= (const FloatDiagMatrix& a) +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + octave_idx_type a_nr = a.rows (); + octave_idx_type a_nc = a.cols (); + + if (nr != a_nr || nc != a_nc) + { + gripe_nonconformant ("operator -=", nr, nc, a_nr, a_nc); + return *this; + } + + for (octave_idx_type i = 0; i < a.length (); i++) + elem (i, i) -= a.elem (i, i); + + return *this; +} + +// unary operations + +boolMatrix +FloatMatrix::operator ! (void) const +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + boolMatrix b (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + b.elem (i, j) = ! elem (i, j); + + return b; +} + +// column vector by row vector -> matrix operations + +FloatMatrix +operator * (const FloatColumnVector& v, const FloatRowVector& a) +{ + FloatMatrix retval; + + octave_idx_type len = v.length (); + + if (len != 0) + { + octave_idx_type a_len = a.length (); + + retval.resize (len, a_len); + float *c = retval.fortran_vec (); + + F77_XFCN (sgemm, SGEMM, (F77_CONST_CHAR_ARG2 ("N", 1), + F77_CONST_CHAR_ARG2 ("N", 1), + len, a_len, 1, 1.0, v.data (), len, + a.data (), 1, 0.0, c, len + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1))); + } + + return retval; +} + +// other operations. + +FloatMatrix +FloatMatrix::map (dmapper fcn) const +{ + return MArray2::map (func_ptr (fcn)); +} + +FloatComplexMatrix +FloatMatrix::map (cmapper fcn) const +{ + return MArray2::map (func_ptr (fcn)); +} + +boolMatrix +FloatMatrix::map (bmapper fcn) const +{ + return MArray2::map (func_ptr (fcn)); +} + +bool +FloatMatrix::any_element_is_negative (bool neg_zero) const +{ + octave_idx_type nel = nelem (); + + if (neg_zero) + { + for (octave_idx_type i = 0; i < nel; i++) + if (lo_ieee_signbit (elem (i))) + return true; + } + else + { + for (octave_idx_type i = 0; i < nel; i++) + if (elem (i) < 0) + return true; + } + + return false; +} + + +bool +FloatMatrix::any_element_is_inf_or_nan (void) const +{ + octave_idx_type nel = nelem (); + + for (octave_idx_type i = 0; i < nel; i++) + { + float val = elem (i); + if (xisinf (val) || xisnan (val)) + return true; + } + + return false; +} + +bool +FloatMatrix::any_element_not_one_or_zero (void) const +{ + octave_idx_type nel = nelem (); + + for (octave_idx_type i = 0; i < nel; i++) + { + float val = elem (i); + if (val != 0 && val != 1) + return true; + } + + return false; +} + +bool +FloatMatrix::all_elements_are_int_or_inf_or_nan (void) const +{ + octave_idx_type nel = nelem (); + + for (octave_idx_type i = 0; i < nel; i++) + { + float val = elem (i); + if (xisnan (val) || D_NINT (val) == val) + continue; + else + return false; + } + + return true; +} + +// Return nonzero if any element of M is not an integer. Also extract +// the largest and smallest values and return them in MAX_VAL and MIN_VAL. + +bool +FloatMatrix::all_integers (float& max_val, float& min_val) const +{ + octave_idx_type nel = nelem (); + + if (nel > 0) + { + max_val = elem (0); + min_val = elem (0); + } + else + return false; + + for (octave_idx_type i = 0; i < nel; i++) + { + float val = elem (i); + + if (val > max_val) + max_val = val; + + if (val < min_val) + min_val = val; + + if (D_NINT (val) != val) + return false; + } + + return true; +} + +bool +FloatMatrix::too_large_for_float (void) const +{ + octave_idx_type nel = nelem (); + + for (octave_idx_type i = 0; i < nel; i++) + { + float val = elem (i); + + if (! (xisnan (val) || xisinf (val)) + && fabs (val) > FLT_MAX) + return true; + } + + return false; +} + +// FIXME Do these really belong here? Maybe they should be +// in a base class? + +boolMatrix +FloatMatrix::all (int dim) const +{ + MX_ALL_OP (dim); +} + +boolMatrix +FloatMatrix::any (int dim) const +{ + MX_ANY_OP (dim); +} + +FloatMatrix +FloatMatrix::cumprod (int dim) const +{ + MX_CUMULATIVE_OP (FloatMatrix, float, *=); +} + +FloatMatrix +FloatMatrix::cumsum (int dim) const +{ + MX_CUMULATIVE_OP (FloatMatrix, float, +=); +} + +FloatMatrix +FloatMatrix::prod (int dim) const +{ + MX_REDUCTION_OP (FloatMatrix, *=, 1.0, 1.0); +} + +FloatMatrix +FloatMatrix::sum (int dim) const +{ + MX_REDUCTION_OP (FloatMatrix, +=, 0.0, 0.0); +} + +FloatMatrix +FloatMatrix::sumsq (int dim) const +{ +#define ROW_EXPR \ + float d = elem (i, j); \ + retval.elem (i, 0) += d * d + +#define COL_EXPR \ + float d = elem (i, j); \ + retval.elem (0, j) += d * d + + MX_BASE_REDUCTION_OP (FloatMatrix, ROW_EXPR, COL_EXPR, 0.0, 0.0); + +#undef ROW_EXPR +#undef COL_EXPR +} + +FloatMatrix +FloatMatrix::abs (void) const +{ + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + FloatMatrix retval (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + retval (i, j) = fabs (elem (i, j)); + + return retval; +} + +FloatMatrix +FloatMatrix::diag (octave_idx_type k) const +{ + return MArray2::diag (k); +} + +FloatColumnVector +FloatMatrix::row_min (void) const +{ + Array dummy_idx; + return row_min (dummy_idx); +} + +FloatColumnVector +FloatMatrix::row_min (Array& idx_arg) const +{ + FloatColumnVector result; + + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + if (nr > 0 && nc > 0) + { + result.resize (nr); + idx_arg.resize (nr); + + for (octave_idx_type i = 0; i < nr; i++) + { + octave_idx_type idx_j; + + float tmp_min = octave_Float_NaN; + + for (idx_j = 0; idx_j < nc; idx_j++) + { + tmp_min = elem (i, idx_j); + + if (! xisnan (tmp_min)) + break; + } + + for (octave_idx_type j = idx_j+1; j < nc; j++) + { + float tmp = elem (i, j); + + if (xisnan (tmp)) + continue; + else if (tmp < tmp_min) + { + idx_j = j; + tmp_min = tmp; + } + } + + result.elem (i) = tmp_min; + idx_arg.elem (i) = xisnan (tmp_min) ? 0 : idx_j; + } + } + + return result; +} + +FloatColumnVector +FloatMatrix::row_max (void) const +{ + Array dummy_idx; + return row_max (dummy_idx); +} + +FloatColumnVector +FloatMatrix::row_max (Array& idx_arg) const +{ + FloatColumnVector result; + + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + if (nr > 0 && nc > 0) + { + result.resize (nr); + idx_arg.resize (nr); + + for (octave_idx_type i = 0; i < nr; i++) + { + octave_idx_type idx_j; + + float tmp_max = octave_Float_NaN; + + for (idx_j = 0; idx_j < nc; idx_j++) + { + tmp_max = elem (i, idx_j); + + if (! xisnan (tmp_max)) + break; + } + + for (octave_idx_type j = idx_j+1; j < nc; j++) + { + float tmp = elem (i, j); + + if (xisnan (tmp)) + continue; + else if (tmp > tmp_max) + { + idx_j = j; + tmp_max = tmp; + } + } + + result.elem (i) = tmp_max; + idx_arg.elem (i) = xisnan (tmp_max) ? 0 : idx_j; + } + } + + return result; +} + +FloatRowVector +FloatMatrix::column_min (void) const +{ + Array dummy_idx; + return column_min (dummy_idx); +} + +FloatRowVector +FloatMatrix::column_min (Array& idx_arg) const +{ + FloatRowVector result; + + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + if (nr > 0 && nc > 0) + { + result.resize (nc); + idx_arg.resize (nc); + + for (octave_idx_type j = 0; j < nc; j++) + { + octave_idx_type idx_i; + + float tmp_min = octave_Float_NaN; + + for (idx_i = 0; idx_i < nr; idx_i++) + { + tmp_min = elem (idx_i, j); + + if (! xisnan (tmp_min)) + break; + } + + for (octave_idx_type i = idx_i+1; i < nr; i++) + { + float tmp = elem (i, j); + + if (xisnan (tmp)) + continue; + else if (tmp < tmp_min) + { + idx_i = i; + tmp_min = tmp; + } + } + + result.elem (j) = tmp_min; + idx_arg.elem (j) = xisnan (tmp_min) ? 0 : idx_i; + } + } + + return result; +} + +FloatRowVector +FloatMatrix::column_max (void) const +{ + Array dummy_idx; + return column_max (dummy_idx); +} + +FloatRowVector +FloatMatrix::column_max (Array& idx_arg) const +{ + FloatRowVector result; + + octave_idx_type nr = rows (); + octave_idx_type nc = cols (); + + if (nr > 0 && nc > 0) + { + result.resize (nc); + idx_arg.resize (nc); + + for (octave_idx_type j = 0; j < nc; j++) + { + octave_idx_type idx_i; + + float tmp_max = octave_Float_NaN; + + for (idx_i = 0; idx_i < nr; idx_i++) + { + tmp_max = elem (idx_i, j); + + if (! xisnan (tmp_max)) + break; + } + + for (octave_idx_type i = idx_i+1; i < nr; i++) + { + float tmp = elem (i, j); + + if (xisnan (tmp)) + continue; + else if (tmp > tmp_max) + { + idx_i = i; + tmp_max = tmp; + } + } + + result.elem (j) = tmp_max; + idx_arg.elem (j) = xisnan (tmp_max) ? 0 : idx_i; + } + } + + return result; +} + +std::ostream& +operator << (std::ostream& os, const FloatMatrix& a) +{ + for (octave_idx_type i = 0; i < a.rows (); i++) + { + for (octave_idx_type j = 0; j < a.cols (); j++) + { + os << " "; + octave_write_float (os, a.elem (i, j)); + } + os << "\n"; + } + return os; +} + +std::istream& +operator >> (std::istream& is, FloatMatrix& a) +{ + octave_idx_type nr = a.rows (); + octave_idx_type nc = a.cols (); + + if (nr < 1 || nc < 1) + is.clear (std::ios::badbit); + else + { + float tmp; + for (octave_idx_type i = 0; i < nr; i++) + for (octave_idx_type j = 0; j < nc; j++) + { + tmp = octave_read_float (is); + if (is) + a.elem (i, j) = tmp; + else + goto done; + } + } + + done: + + return is; +} + +FloatMatrix +Givens (float x, float y) +{ + float cc, s, temp_r; + + F77_FUNC (slartg, SLARTG) (x, y, cc, s, temp_r); + + FloatMatrix g (2, 2); + + g.elem (0, 0) = cc; + g.elem (1, 1) = cc; + g.elem (0, 1) = s; + g.elem (1, 0) = -s; + + return g; +} + +FloatMatrix +Sylvester (const FloatMatrix& a, const FloatMatrix& b, const FloatMatrix& c) +{ + FloatMatrix retval; + + // FIXME -- need to check that a, b, and c are all the same + // size. + + // Compute Schur decompositions. + + FloatSCHUR as (a, "U"); + FloatSCHUR bs (b, "U"); + + // Transform c to new coordinates. + + FloatMatrix ua = as.unitary_matrix (); + FloatMatrix sch_a = as.schur_matrix (); + + FloatMatrix ub = bs.unitary_matrix (); + FloatMatrix sch_b = bs.schur_matrix (); + + FloatMatrix cx = ua.transpose () * c * ub; + + // Solve the sylvester equation, back-transform, and return the + // solution. + + octave_idx_type a_nr = a.rows (); + octave_idx_type b_nr = b.rows (); + + float scale; + octave_idx_type info; + + float *pa = sch_a.fortran_vec (); + float *pb = sch_b.fortran_vec (); + float *px = cx.fortran_vec (); + + F77_XFCN (strsyl, STRSYL, (F77_CONST_CHAR_ARG2 ("N", 1), + F77_CONST_CHAR_ARG2 ("N", 1), + 1, a_nr, b_nr, pa, a_nr, pb, + b_nr, px, a_nr, scale, info + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1))); + + + // FIXME -- check info? + + retval = -ua*cx*ub.transpose (); + + return retval; +} + +// matrix by matrix -> matrix operations + +/* Simple Dot Product, Matrix-Vector and Matrix-Matrix Unit tests +%!assert([1 2 3] * [ 4 ; 5 ; 6], 32, 1e-14) +%!assert([1 2 ; 3 4 ] * [5 ; 6], [17 ; 39 ], 1e-14) +%!assert([1 2 ; 3 4 ] * [5 6 ; 7 8], [19 22; 43 50], 1e-14) +*/ + +/* Test some simple identities +%!shared M, cv, rv +%! M = randn(10,10); +%! cv = randn(10,1); +%! rv = randn(1,10); +%!assert([M*cv,M*cv],M*[cv,cv],1e-14) +%!assert([rv*M;rv*M],[rv;rv]*M,1e-14) +%!assert(2*rv*cv,[rv,rv]*[cv;cv],1e-14) +*/ + + +FloatMatrix +operator * (const FloatMatrix& m, const FloatMatrix& a) +{ + FloatMatrix retval; + + octave_idx_type nr = m.rows (); + octave_idx_type nc = m.cols (); + + octave_idx_type a_nr = a.rows (); + octave_idx_type a_nc = a.cols (); + + if (nc != a_nr) + gripe_nonconformant ("operator *", nr, nc, a_nr, a_nc); + else + { + if (nr == 0 || nc == 0 || a_nc == 0) + retval.resize (nr, a_nc, 0.0); + else + { + octave_idx_type ld = nr; + octave_idx_type lda = a_nr; + + retval.resize (nr, a_nc); + float *c = retval.fortran_vec (); + + if (a_nc == 1) + { + if (nr == 1) + F77_FUNC (xsdot, XSDOT) (nc, m.data (), 1, a.data (), 1, *c); + else + { + F77_XFCN (sgemv, SGEMV, (F77_CONST_CHAR_ARG2 ("N", 1), + nr, nc, 1.0, m.data (), ld, + a.data (), 1, 0.0, c, 1 + F77_CHAR_ARG_LEN (1))); + } + } + else + { + F77_XFCN (sgemm, SGEMM, (F77_CONST_CHAR_ARG2 ("N", 1), + F77_CONST_CHAR_ARG2 ("N", 1), + nr, a_nc, nc, 1.0, m.data (), + ld, a.data (), lda, 0.0, c, nr + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1))); + } + } + } + + return retval; +} + +// FIXME -- it would be nice to share code among the min/max +// functions below. + +#define EMPTY_RETURN_CHECK(T) \ + if (nr == 0 || nc == 0) \ + return T (nr, nc); + +FloatMatrix +min (float d, const FloatMatrix& m) +{ + octave_idx_type nr = m.rows (); + octave_idx_type nc = m.columns (); + + EMPTY_RETURN_CHECK (FloatMatrix); + + FloatMatrix result (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + { + OCTAVE_QUIT; + result (i, j) = xmin (d, m (i, j)); + } + + return result; +} + +FloatMatrix +min (const FloatMatrix& m, float d) +{ + octave_idx_type nr = m.rows (); + octave_idx_type nc = m.columns (); + + EMPTY_RETURN_CHECK (FloatMatrix); + + FloatMatrix result (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + { + OCTAVE_QUIT; + result (i, j) = xmin (m (i, j), d); + } + + return result; +} + +FloatMatrix +min (const FloatMatrix& a, const FloatMatrix& b) +{ + octave_idx_type nr = a.rows (); + octave_idx_type nc = a.columns (); + + if (nr != b.rows () || nc != b.columns ()) + { + (*current_liboctave_error_handler) + ("two-arg min expecting args of same size"); + return FloatMatrix (); + } + + EMPTY_RETURN_CHECK (FloatMatrix); + + FloatMatrix result (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + { + OCTAVE_QUIT; + result (i, j) = xmin (a (i, j), b (i, j)); + } + + return result; +} + +FloatMatrix +max (float d, const FloatMatrix& m) +{ + octave_idx_type nr = m.rows (); + octave_idx_type nc = m.columns (); + + EMPTY_RETURN_CHECK (FloatMatrix); + + FloatMatrix result (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + { + OCTAVE_QUIT; + result (i, j) = xmax (d, m (i, j)); + } + + return result; +} + +FloatMatrix +max (const FloatMatrix& m, float d) +{ + octave_idx_type nr = m.rows (); + octave_idx_type nc = m.columns (); + + EMPTY_RETURN_CHECK (FloatMatrix); + + FloatMatrix result (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + { + OCTAVE_QUIT; + result (i, j) = xmax (m (i, j), d); + } + + return result; +} + +FloatMatrix +max (const FloatMatrix& a, const FloatMatrix& b) +{ + octave_idx_type nr = a.rows (); + octave_idx_type nc = a.columns (); + + if (nr != b.rows () || nc != b.columns ()) + { + (*current_liboctave_error_handler) + ("two-arg max expecting args of same size"); + return FloatMatrix (); + } + + EMPTY_RETURN_CHECK (FloatMatrix); + + FloatMatrix result (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + { + OCTAVE_QUIT; + result (i, j) = xmax (a (i, j), b (i, j)); + } + + return result; +} + +MS_CMP_OPS(FloatMatrix, , float, ) +MS_BOOL_OPS(FloatMatrix, float, 0.0) + +SM_CMP_OPS(float, , FloatMatrix, ) +SM_BOOL_OPS(float, FloatMatrix, 0.0) + +MM_CMP_OPS(FloatMatrix, , FloatMatrix, ) +MM_BOOL_OPS(FloatMatrix, FloatMatrix, 0.0) + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 liboctave/fMatrix.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/fMatrix.h Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,369 @@ +/* + +Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2002, 2003, + 2004, 2005, 2006, 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#if !defined (octave_FloatMatrix_int_h) +#define octave_FloatMatrix_int_h 1 + +#include "MArray2.h" +#include "MDiagArray2.h" +#include "MatrixType.h" + +#include "mx-defs.h" +#include "mx-op-defs.h" + +class +OCTAVE_API +FloatMatrix : public MArray2 +{ +public: + + typedef void (*solve_singularity_handler) (float rcond); + + FloatMatrix (void) : MArray2 () { } + + FloatMatrix (octave_idx_type r, octave_idx_type c) : MArray2 (r, c) { } + + FloatMatrix (octave_idx_type r, octave_idx_type c, float val) : MArray2 (r, c, val) { } + + FloatMatrix (const dim_vector& dv) : MArray2 (dv) { } + + FloatMatrix (const dim_vector& dv, float val) : MArray2 (dv, val) { } + + FloatMatrix (const FloatMatrix& a) : MArray2 (a) { } + + template + FloatMatrix (const MArray2& a) : MArray2 (a) { } + + template + FloatMatrix (const Array2& a) : MArray2 (a) { } + + explicit FloatMatrix (const FloatRowVector& rv); + + explicit FloatMatrix (const FloatColumnVector& cv); + + explicit FloatMatrix (const FloatDiagMatrix& a); + + explicit FloatMatrix (const boolMatrix& a); + + explicit FloatMatrix (const charMatrix& a); + + FloatMatrix& operator = (const FloatMatrix& a) + { + MArray2::operator = (a); + return *this; + } + + bool operator == (const FloatMatrix& a) const; + bool operator != (const FloatMatrix& a) const; + + bool is_symmetric (void) const; + + // destructive insert/delete/reorder operations + + FloatMatrix& insert (const FloatMatrix& a, octave_idx_type r, octave_idx_type c); + FloatMatrix& insert (const FloatRowVector& a, octave_idx_type r, octave_idx_type c); + FloatMatrix& insert (const FloatColumnVector& a, octave_idx_type r, octave_idx_type c); + FloatMatrix& insert (const FloatDiagMatrix& a, octave_idx_type r, octave_idx_type c); + + FloatMatrix& fill (float val); + FloatMatrix& fill (float val, octave_idx_type r1, octave_idx_type c1, octave_idx_type r2, octave_idx_type c2); + + FloatMatrix append (const FloatMatrix& a) const; + FloatMatrix append (const FloatRowVector& a) const; + FloatMatrix append (const FloatColumnVector& a) const; + FloatMatrix append (const FloatDiagMatrix& a) const; + + FloatMatrix stack (const FloatMatrix& a) const; + FloatMatrix stack (const FloatRowVector& a) const; + FloatMatrix stack (const FloatColumnVector& a) const; + FloatMatrix stack (const FloatDiagMatrix& a) const; + + friend OCTAVE_API FloatMatrix real (const FloatComplexMatrix& a); + friend OCTAVE_API FloatMatrix imag (const FloatComplexMatrix& a); + + FloatMatrix transpose (void) const { return MArray2::transpose (); } + + // resize is the destructive equivalent for this one + + FloatMatrix extract (octave_idx_type r1, octave_idx_type c1, octave_idx_type r2, octave_idx_type c2) const; + + FloatMatrix extract_n (octave_idx_type r1, octave_idx_type c1, octave_idx_type nr, octave_idx_type nc) const; + + // extract row or column i. + + FloatRowVector row (octave_idx_type i) const; + + FloatColumnVector column (octave_idx_type i) const; + +private: + FloatMatrix tinverse (MatrixType &mattype, octave_idx_type& info, float& rcond, + int force, int calc_cond) const; + + FloatMatrix finverse (MatrixType &mattype, octave_idx_type& info, float& rcond, + int force, int calc_cond) const; + +public: + FloatMatrix inverse (void) const; + FloatMatrix inverse (octave_idx_type& info) const; + FloatMatrix inverse (octave_idx_type& info, float& rcond, int force = 0, + int calc_cond = 1) const; + + FloatMatrix inverse (MatrixType &mattype) const; + FloatMatrix inverse (MatrixType &mattype, octave_idx_type& info) const; + FloatMatrix inverse (MatrixType &mattype, octave_idx_type& info, float& rcond, + int force = 0, int calc_cond = 1) const; + + FloatMatrix pseudo_inverse (float tol = 0.0) const; + + FloatComplexMatrix fourier (void) const; + FloatComplexMatrix ifourier (void) const; + + FloatComplexMatrix fourier2d (void) const; + FloatComplexMatrix ifourier2d (void) const; + + FloatDET determinant (void) const; + FloatDET determinant (octave_idx_type& info) const; + FloatDET determinant (octave_idx_type& info, float& rcond, int calc_cond = 1) const; + +private: + // Upper triangular matrix solvers + FloatMatrix utsolve (MatrixType &typ, const FloatMatrix& b, octave_idx_type& info, + float& rcond, solve_singularity_handler sing_handler, + bool calc_cond = false) const; + + // Lower triangular matrix solvers + FloatMatrix ltsolve (MatrixType &typ, const FloatMatrix& b, octave_idx_type& info, + float& rcond, solve_singularity_handler sing_handler, + bool calc_cond = false) const; + + // Full matrix solvers (lu/cholesky) + FloatMatrix fsolve (MatrixType &typ, const FloatMatrix& b, octave_idx_type& info, + float& rcond, solve_singularity_handler sing_handler, + bool calc_cond = false) const; + +public: + // Generic interface to solver with no probing of type + FloatMatrix solve (MatrixType &typ, const FloatMatrix& b) const; + FloatMatrix solve (MatrixType &typ, const FloatMatrix& b, octave_idx_type& info) const; + FloatMatrix solve (MatrixType &typ, const FloatMatrix& b, octave_idx_type& info, + float& rcond) const; + FloatMatrix solve (MatrixType &typ, const FloatMatrix& b, octave_idx_type& info, + float& rcond, solve_singularity_handler sing_handler, + bool singular_fallback = true) const; + + FloatComplexMatrix solve (MatrixType &typ, const FloatComplexMatrix& b) const; + FloatComplexMatrix solve (MatrixType &typ, const FloatComplexMatrix& b, + octave_idx_type& info) const; + FloatComplexMatrix solve (MatrixType &typ, const FloatComplexMatrix& b, + octave_idx_type& info, float& rcond) const; + FloatComplexMatrix solve (MatrixType &typ, const FloatComplexMatrix& b, + octave_idx_type& info, float& rcond, + solve_singularity_handler sing_handler, + bool singular_fallback = true) const; + + FloatColumnVector solve (MatrixType &typ, const FloatColumnVector& b) const; + FloatColumnVector solve (MatrixType &typ, const FloatColumnVector& b, + octave_idx_type& info) const; + FloatColumnVector solve (MatrixType &typ, const FloatColumnVector& b, + octave_idx_type& info, float& rcond) const; + FloatColumnVector solve (MatrixType &typ, const FloatColumnVector& b, + octave_idx_type& info, float& rcond, + solve_singularity_handler sing_handler) const; + + FloatComplexColumnVector solve (MatrixType &typ, + const FloatComplexColumnVector& b) const; + FloatComplexColumnVector solve (MatrixType &typ, const FloatComplexColumnVector& b, + octave_idx_type& info) const; + FloatComplexColumnVector solve (MatrixType &typ, const FloatComplexColumnVector& b, + octave_idx_type& info, float& rcond) const; + FloatComplexColumnVector solve (MatrixType &typ, const FloatComplexColumnVector& b, + octave_idx_type& info, float& rcond, + solve_singularity_handler sing_handler) const; + + // Generic interface to solver with probing of type + FloatMatrix solve (const FloatMatrix& b) const; + FloatMatrix solve (const FloatMatrix& b, octave_idx_type& info) const; + FloatMatrix solve (const FloatMatrix& b, octave_idx_type& info, float& rcond) const; + FloatMatrix solve (const FloatMatrix& b, octave_idx_type& info, float& rcond, + solve_singularity_handler sing_handler) const; + + FloatComplexMatrix solve (const FloatComplexMatrix& b) const; + FloatComplexMatrix solve (const FloatComplexMatrix& b, octave_idx_type& info) const; + FloatComplexMatrix solve (const FloatComplexMatrix& b, octave_idx_type& info, float& rcond) const; + FloatComplexMatrix solve (const FloatComplexMatrix& b, octave_idx_type& info, float& rcond, + solve_singularity_handler sing_handler) const; + + FloatColumnVector solve (const FloatColumnVector& b) const; + FloatColumnVector solve (const FloatColumnVector& b, octave_idx_type& info) const; + FloatColumnVector solve (const FloatColumnVector& b, octave_idx_type& info, float& rcond) const; + FloatColumnVector solve (const FloatColumnVector& b, octave_idx_type& info, float& rcond, + solve_singularity_handler sing_handler) const; + + FloatComplexColumnVector solve (const FloatComplexColumnVector& b) const; + FloatComplexColumnVector solve (const FloatComplexColumnVector& b, octave_idx_type& info) const; + FloatComplexColumnVector solve (const FloatComplexColumnVector& b, octave_idx_type& info, + float& rcond) const; + FloatComplexColumnVector solve (const FloatComplexColumnVector& b, octave_idx_type& info, + float& rcond, + solve_singularity_handler sing_handler) const; + + // Singular solvers + FloatMatrix lssolve (const FloatMatrix& b) const; + FloatMatrix lssolve (const FloatMatrix& b, octave_idx_type& info) const; + FloatMatrix lssolve (const FloatMatrix& b, octave_idx_type& info, + octave_idx_type& rank) const; + FloatMatrix lssolve (const FloatMatrix& b, octave_idx_type& info, + octave_idx_type& rank, float& rcond) const; + + FloatComplexMatrix lssolve (const FloatComplexMatrix& b) const; + FloatComplexMatrix lssolve (const FloatComplexMatrix& b, octave_idx_type& info) const; + FloatComplexMatrix lssolve (const FloatComplexMatrix& b, octave_idx_type& info, + octave_idx_type& rank) const; + FloatComplexMatrix lssolve (const FloatComplexMatrix& b, octave_idx_type& info, + octave_idx_type& rank, float &rcond) const; + + FloatColumnVector lssolve (const FloatColumnVector& b) const; + FloatColumnVector lssolve (const FloatColumnVector& b, octave_idx_type& info) const; + FloatColumnVector lssolve (const FloatColumnVector& b, octave_idx_type& info, + octave_idx_type& rank) const; + FloatColumnVector lssolve (const FloatColumnVector& b, octave_idx_type& info, + octave_idx_type& rank, float& rcond) const; + + FloatComplexColumnVector lssolve (const FloatComplexColumnVector& b) const; + FloatComplexColumnVector lssolve (const FloatComplexColumnVector& b, + octave_idx_type& info) const; + FloatComplexColumnVector lssolve (const FloatComplexColumnVector& b, + octave_idx_type& info, + octave_idx_type& rank) const; + FloatComplexColumnVector lssolve (const FloatComplexColumnVector& b, + octave_idx_type& info, + octave_idx_type& rank, float& rcond) const; + + FloatMatrix expm (void) const; + + FloatMatrix& operator += (const FloatDiagMatrix& a); + FloatMatrix& operator -= (const FloatDiagMatrix& a); + + // unary operations + + boolMatrix operator ! (void) const; + + // other operations + + typedef float (*dmapper) (float); + typedef FloatComplex (*cmapper) (const FloatComplex&); + typedef bool (*bmapper) (float); + + FloatMatrix map (dmapper fcn) const; + FloatComplexMatrix map (cmapper fcn) const; + boolMatrix map (bmapper fcn) const; + + bool any_element_is_negative (bool = false) const; + bool any_element_is_inf_or_nan (void) const; + bool any_element_not_one_or_zero (void) const; + bool all_elements_are_int_or_inf_or_nan (void) const; + bool all_integers (float& max_val, float& min_val) const; + bool too_large_for_float (void) const; + + boolMatrix all (int dim = -1) const; + boolMatrix any (int dim = -1) const; + + FloatMatrix cumprod (int dim = -1) const; + FloatMatrix cumsum (int dim = -1) const; + FloatMatrix prod (int dim = -1) const; + FloatMatrix sum (int dim = -1) const; + FloatMatrix sumsq (int dim = -1) const; + FloatMatrix abs (void) const; + + FloatMatrix diag (octave_idx_type k = 0) const; + + FloatColumnVector row_min (void) const; + FloatColumnVector row_max (void) const; + + FloatColumnVector row_min (Array& index) const; + FloatColumnVector row_max (Array& index) const; + + FloatRowVector column_min (void) const; + FloatRowVector column_max (void) const; + + FloatRowVector column_min (Array& index) const; + FloatRowVector column_max (Array& index) const; + + // i/o + + friend OCTAVE_API std::ostream& operator << (std::ostream& os, const FloatMatrix& a); + friend OCTAVE_API std::istream& operator >> (std::istream& is, FloatMatrix& a); + + static float resize_fill_value (void) { return 0; } + +private: + + FloatMatrix (float *d, octave_idx_type r, octave_idx_type c) : MArray2 (d, r, c) { } +}; + +// Publish externally used friend functions. + +extern OCTAVE_API FloatMatrix real (const FloatComplexMatrix& a); +extern OCTAVE_API FloatMatrix imag (const FloatComplexMatrix& a); + +// column vector by row vector -> matrix operations + +extern OCTAVE_API FloatMatrix operator * (const FloatColumnVector& a, const FloatRowVector& b); + +// Other functions. + +extern OCTAVE_API FloatMatrix Givens (float, float); + +extern OCTAVE_API FloatMatrix Sylvester (const FloatMatrix&, const FloatMatrix&, const FloatMatrix&); + +extern OCTAVE_API FloatMatrix operator * (const FloatMatrix& a, const FloatMatrix& b); + +extern OCTAVE_API FloatMatrix min (float d, const FloatMatrix& m); +extern OCTAVE_API FloatMatrix min (const FloatMatrix& m, float d); +extern OCTAVE_API FloatMatrix min (const FloatMatrix& a, const FloatMatrix& b); + +extern OCTAVE_API FloatMatrix max (float d, const FloatMatrix& m); +extern OCTAVE_API FloatMatrix max (const FloatMatrix& m, float d); +extern OCTAVE_API FloatMatrix max (const FloatMatrix& a, const FloatMatrix& b); + +MS_CMP_OP_DECLS (FloatMatrix, float, OCTAVE_API) +MS_BOOL_OP_DECLS (FloatMatrix, float, OCTAVE_API) + +SM_CMP_OP_DECLS (float, FloatMatrix, OCTAVE_API) +SM_BOOL_OP_DECLS (float, FloatMatrix, OCTAVE_API) + +MM_CMP_OP_DECLS (FloatMatrix, FloatMatrix, OCTAVE_API) +MM_BOOL_OP_DECLS (FloatMatrix, FloatMatrix, OCTAVE_API) + +MARRAY_FORWARD_DEFS (MArray2, FloatMatrix, float) + +template +void read_int (std::istream& is, bool swap_bytes, T& val); + +#endif + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 liboctave/fNDArray.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/fNDArray.cc Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,1182 @@ +// N-D Array manipulations. +/* + +Copyright (C) 1996, 1997, 2003, 2004, 2005, 2006, 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +#include + +#include "Array-util.h" +#include "fNDArray.h" +#include "functor.h" +#include "mx-base.h" +#include "f77-fcn.h" +#include "lo-error.h" +#include "lo-ieee.h" +#include "lo-mappers.h" + +#if defined (HAVE_FFTW3) +#include "oct-fftw.h" + +FloatComplexNDArray +FloatNDArray::fourier (int dim) const +{ + dim_vector dv = dims (); + + if (dim > dv.length () || dim < 0) + return FloatComplexNDArray (); + + octave_idx_type stride = 1; + octave_idx_type n = dv(dim); + + for (int i = 0; i < dim; i++) + stride *= dv(i); + + octave_idx_type howmany = numel () / dv (dim); + howmany = (stride == 1 ? howmany : (howmany > stride ? stride : howmany)); + octave_idx_type nloop = (stride == 1 ? 1 : numel () / dv (dim) / stride); + octave_idx_type dist = (stride == 1 ? n : 1); + + const float *in (fortran_vec ()); + FloatComplexNDArray retval (dv); + FloatComplex *out (retval.fortran_vec ()); + + // Need to be careful here about the distance between fft's + for (octave_idx_type k = 0; k < nloop; k++) + octave_fftw::fft (in + k * stride * n, out + k * stride * n, + n, howmany, stride, dist); + + return retval; +} + +FloatComplexNDArray +FloatNDArray::ifourier (int dim) const +{ + dim_vector dv = dims (); + + if (dim > dv.length () || dim < 0) + return FloatComplexNDArray (); + + octave_idx_type stride = 1; + octave_idx_type n = dv(dim); + + for (int i = 0; i < dim; i++) + stride *= dv(i); + + octave_idx_type howmany = numel () / dv (dim); + howmany = (stride == 1 ? howmany : (howmany > stride ? stride : howmany)); + octave_idx_type nloop = (stride == 1 ? 1 : numel () / dv (dim) / stride); + octave_idx_type dist = (stride == 1 ? n : 1); + + FloatComplexNDArray retval (*this); + FloatComplex *out (retval.fortran_vec ()); + + // Need to be careful here about the distance between fft's + for (octave_idx_type k = 0; k < nloop; k++) + octave_fftw::ifft (out + k * stride * n, out + k * stride * n, + n, howmany, stride, dist); + + return retval; +} + +FloatComplexNDArray +FloatNDArray::fourier2d (void) const +{ + dim_vector dv = dims(); + if (dv.length () < 2) + return FloatComplexNDArray (); + + dim_vector dv2(dv(0), dv(1)); + const float *in = fortran_vec (); + FloatComplexNDArray retval (dv); + FloatComplex *out = retval.fortran_vec (); + octave_idx_type howmany = numel() / dv(0) / dv(1); + octave_idx_type dist = dv(0) * dv(1); + + for (octave_idx_type i=0; i < howmany; i++) + octave_fftw::fftNd (in + i*dist, out + i*dist, 2, dv2); + + return retval; +} + +FloatComplexNDArray +FloatNDArray::ifourier2d (void) const +{ + dim_vector dv = dims(); + if (dv.length () < 2) + return FloatComplexNDArray (); + + dim_vector dv2(dv(0), dv(1)); + FloatComplexNDArray retval (*this); + FloatComplex *out = retval.fortran_vec (); + octave_idx_type howmany = numel() / dv(0) / dv(1); + octave_idx_type dist = dv(0) * dv(1); + + for (octave_idx_type i=0; i < howmany; i++) + octave_fftw::ifftNd (out + i*dist, out + i*dist, 2, dv2); + + return retval; +} + +FloatComplexNDArray +FloatNDArray::fourierNd (void) const +{ + dim_vector dv = dims (); + int rank = dv.length (); + + const float *in (fortran_vec ()); + FloatComplexNDArray retval (dv); + FloatComplex *out (retval.fortran_vec ()); + + octave_fftw::fftNd (in, out, rank, dv); + + return retval; +} + +FloatComplexNDArray +FloatNDArray::ifourierNd (void) const +{ + dim_vector dv = dims (); + int rank = dv.length (); + + FloatComplexNDArray tmp (*this); + FloatComplex *in (tmp.fortran_vec ()); + FloatComplexNDArray retval (dv); + FloatComplex *out (retval.fortran_vec ()); + + octave_fftw::ifftNd (in, out, rank, dv); + + return retval; +} + +#else + +extern "C" +{ + // Note that the original complex fft routines were not written for + // float complex arguments. They have been modified by adding an + // implicit float precision (a-h,o-z) statement at the beginning of + // each subroutine. + + F77_RET_T + F77_FUNC (cffti, CFFTI) (const octave_idx_type&, FloatComplex*); + + F77_RET_T + F77_FUNC (cfftf, CFFTF) (const octave_idx_type&, FloatComplex*, FloatComplex*); + + F77_RET_T + F77_FUNC (cfftb, CFFTB) (const octave_idx_type&, FloatComplex*, FloatComplex*); +} + +FloatComplexNDArray +FloatNDArray::fourier (int dim) const +{ + dim_vector dv = dims (); + + if (dim > dv.length () || dim < 0) + return FloatComplexNDArray (); + + FloatComplexNDArray retval (dv); + octave_idx_type npts = dv(dim); + octave_idx_type nn = 4*npts+15; + Array wsave (nn); + FloatComplex *pwsave = wsave.fortran_vec (); + + OCTAVE_LOCAL_BUFFER (FloatComplex, tmp, npts); + + octave_idx_type stride = 1; + + for (int i = 0; i < dim; i++) + stride *= dv(i); + + octave_idx_type howmany = numel () / npts; + howmany = (stride == 1 ? howmany : (howmany > stride ? stride : howmany)); + octave_idx_type nloop = (stride == 1 ? 1 : numel () / npts / stride); + octave_idx_type dist = (stride == 1 ? npts : 1); + + F77_FUNC (cffti, CFFTI) (npts, pwsave); + + for (octave_idx_type k = 0; k < nloop; k++) + { + for (octave_idx_type j = 0; j < howmany; j++) + { + OCTAVE_QUIT; + + for (octave_idx_type i = 0; i < npts; i++) + tmp[i] = elem((i + k*npts)*stride + j*dist); + + F77_FUNC (cfftf, CFFTF) (npts, tmp, pwsave); + + for (octave_idx_type i = 0; i < npts; i++) + retval ((i + k*npts)*stride + j*dist) = tmp[i]; + } + } + + return retval; +} + +FloatComplexNDArray +FloatNDArray::ifourier (int dim) const +{ + dim_vector dv = dims (); + + if (dim > dv.length () || dim < 0) + return FloatComplexNDArray (); + + FloatComplexNDArray retval (dv); + octave_idx_type npts = dv(dim); + octave_idx_type nn = 4*npts+15; + Array wsave (nn); + FloatComplex *pwsave = wsave.fortran_vec (); + + OCTAVE_LOCAL_BUFFER (FloatComplex, tmp, npts); + + octave_idx_type stride = 1; + + for (int i = 0; i < dim; i++) + stride *= dv(i); + + octave_idx_type howmany = numel () / npts; + howmany = (stride == 1 ? howmany : (howmany > stride ? stride : howmany)); + octave_idx_type nloop = (stride == 1 ? 1 : numel () / npts / stride); + octave_idx_type dist = (stride == 1 ? npts : 1); + + F77_FUNC (cffti, CFFTI) (npts, pwsave); + + for (octave_idx_type k = 0; k < nloop; k++) + { + for (octave_idx_type j = 0; j < howmany; j++) + { + OCTAVE_QUIT; + + for (octave_idx_type i = 0; i < npts; i++) + tmp[i] = elem((i + k*npts)*stride + j*dist); + + F77_FUNC (cfftb, CFFTB) (npts, tmp, pwsave); + + for (octave_idx_type i = 0; i < npts; i++) + retval ((i + k*npts)*stride + j*dist) = tmp[i] / + static_cast (npts); + } + } + + return retval; +} + +FloatComplexNDArray +FloatNDArray::fourier2d (void) const +{ + dim_vector dv = dims(); + dim_vector dv2 (dv(0), dv(1)); + int rank = 2; + FloatComplexNDArray retval (*this); + octave_idx_type stride = 1; + + for (int i = 0; i < rank; i++) + { + octave_idx_type npts = dv2(i); + octave_idx_type nn = 4*npts+15; + Array wsave (nn); + FloatComplex *pwsave = wsave.fortran_vec (); + Array row (npts); + FloatComplex *prow = row.fortran_vec (); + + octave_idx_type howmany = numel () / npts; + howmany = (stride == 1 ? howmany : + (howmany > stride ? stride : howmany)); + octave_idx_type nloop = (stride == 1 ? 1 : numel () / npts / stride); + octave_idx_type dist = (stride == 1 ? npts : 1); + + F77_FUNC (cffti, CFFTI) (npts, pwsave); + + for (octave_idx_type k = 0; k < nloop; k++) + { + for (octave_idx_type j = 0; j < howmany; j++) + { + OCTAVE_QUIT; + + for (octave_idx_type l = 0; l < npts; l++) + prow[l] = retval ((l + k*npts)*stride + j*dist); + + F77_FUNC (cfftf, CFFTF) (npts, prow, pwsave); + + for (octave_idx_type l = 0; l < npts; l++) + retval ((l + k*npts)*stride + j*dist) = prow[l]; + } + } + + stride *= dv2(i); + } + + return retval; +} + +FloatComplexNDArray +FloatNDArray::ifourier2d (void) const +{ + dim_vector dv = dims(); + dim_vector dv2 (dv(0), dv(1)); + int rank = 2; + FloatComplexNDArray retval (*this); + octave_idx_type stride = 1; + + for (int i = 0; i < rank; i++) + { + octave_idx_type npts = dv2(i); + octave_idx_type nn = 4*npts+15; + Array wsave (nn); + FloatComplex *pwsave = wsave.fortran_vec (); + Array row (npts); + FloatComplex *prow = row.fortran_vec (); + + octave_idx_type howmany = numel () / npts; + howmany = (stride == 1 ? howmany : + (howmany > stride ? stride : howmany)); + octave_idx_type nloop = (stride == 1 ? 1 : numel () / npts / stride); + octave_idx_type dist = (stride == 1 ? npts : 1); + + F77_FUNC (cffti, CFFTI) (npts, pwsave); + + for (octave_idx_type k = 0; k < nloop; k++) + { + for (octave_idx_type j = 0; j < howmany; j++) + { + OCTAVE_QUIT; + + for (octave_idx_type l = 0; l < npts; l++) + prow[l] = retval ((l + k*npts)*stride + j*dist); + + F77_FUNC (cfftb, CFFTB) (npts, prow, pwsave); + + for (octave_idx_type l = 0; l < npts; l++) + retval ((l + k*npts)*stride + j*dist) = prow[l] / + static_cast (npts); + } + } + + stride *= dv2(i); + } + + return retval; +} + +FloatComplexNDArray +FloatNDArray::fourierNd (void) const +{ + dim_vector dv = dims (); + int rank = dv.length (); + FloatComplexNDArray retval (*this); + octave_idx_type stride = 1; + + for (int i = 0; i < rank; i++) + { + octave_idx_type npts = dv(i); + octave_idx_type nn = 4*npts+15; + Array wsave (nn); + FloatComplex *pwsave = wsave.fortran_vec (); + Array row (npts); + FloatComplex *prow = row.fortran_vec (); + + octave_idx_type howmany = numel () / npts; + howmany = (stride == 1 ? howmany : + (howmany > stride ? stride : howmany)); + octave_idx_type nloop = (stride == 1 ? 1 : numel () / npts / stride); + octave_idx_type dist = (stride == 1 ? npts : 1); + + F77_FUNC (cffti, CFFTI) (npts, pwsave); + + for (octave_idx_type k = 0; k < nloop; k++) + { + for (octave_idx_type j = 0; j < howmany; j++) + { + OCTAVE_QUIT; + + for (octave_idx_type l = 0; l < npts; l++) + prow[l] = retval ((l + k*npts)*stride + j*dist); + + F77_FUNC (cfftf, CFFTF) (npts, prow, pwsave); + + for (octave_idx_type l = 0; l < npts; l++) + retval ((l + k*npts)*stride + j*dist) = prow[l]; + } + } + + stride *= dv(i); + } + + return retval; +} + +FloatComplexNDArray +FloatNDArray::ifourierNd (void) const +{ + dim_vector dv = dims (); + int rank = dv.length (); + FloatComplexNDArray retval (*this); + octave_idx_type stride = 1; + + for (int i = 0; i < rank; i++) + { + octave_idx_type npts = dv(i); + octave_idx_type nn = 4*npts+15; + Array wsave (nn); + FloatComplex *pwsave = wsave.fortran_vec (); + Array row (npts); + FloatComplex *prow = row.fortran_vec (); + + octave_idx_type howmany = numel () / npts; + howmany = (stride == 1 ? howmany : + (howmany > stride ? stride : howmany)); + octave_idx_type nloop = (stride == 1 ? 1 : numel () / npts / stride); + octave_idx_type dist = (stride == 1 ? npts : 1); + + F77_FUNC (cffti, CFFTI) (npts, pwsave); + + for (octave_idx_type k = 0; k < nloop; k++) + { + for (octave_idx_type j = 0; j < howmany; j++) + { + OCTAVE_QUIT; + + for (octave_idx_type l = 0; l < npts; l++) + prow[l] = retval ((l + k*npts)*stride + j*dist); + + F77_FUNC (cfftb, CFFTB) (npts, prow, pwsave); + + for (octave_idx_type l = 0; l < npts; l++) + retval ((l + k*npts)*stride + j*dist) = prow[l] / + static_cast (npts); + } + } + + stride *= dv(i); + } + + return retval; +} + +#endif + +// unary operations + +boolNDArray +FloatNDArray::operator ! (void) const +{ + boolNDArray b (dims ()); + + for (octave_idx_type i = 0; i < length (); i++) + b.elem (i) = ! elem (i); + + return b; +} + +bool +FloatNDArray::any_element_is_negative (bool neg_zero) const +{ + octave_idx_type nel = nelem (); + + if (neg_zero) + { + for (octave_idx_type i = 0; i < nel; i++) + if (lo_ieee_signbit (elem (i))) + return true; + } + else + { + for (octave_idx_type i = 0; i < nel; i++) + if (elem (i) < 0) + return true; + } + + return false; +} + + +bool +FloatNDArray::any_element_is_inf_or_nan (void) const +{ + octave_idx_type nel = nelem (); + + for (octave_idx_type i = 0; i < nel; i++) + { + float val = elem (i); + if (xisinf (val) || xisnan (val)) + return true; + } + + return false; +} + +bool +FloatNDArray::any_element_not_one_or_zero (void) const +{ + octave_idx_type nel = nelem (); + + for (octave_idx_type i = 0; i < nel; i++) + { + float val = elem (i); + if (val != 0 && val != 1) + return true; + } + + return false; +} + +bool +FloatNDArray::all_elements_are_zero (void) const +{ + octave_idx_type nel = nelem (); + + for (octave_idx_type i = 0; i < nel; i++) + if (elem (i) != 0) + return false; + + return true; +} + +bool +FloatNDArray::all_elements_are_int_or_inf_or_nan (void) const +{ + octave_idx_type nel = nelem (); + + for (octave_idx_type i = 0; i < nel; i++) + { + float val = elem (i); + if (xisnan (val) || D_NINT (val) == val) + continue; + else + return false; + } + + return true; +} + +// Return nonzero if any element of M is not an integer. Also extract +// the largest and smallest values and return them in MAX_VAL and MIN_VAL. + +bool +FloatNDArray::all_integers (float& max_val, float& min_val) const +{ + octave_idx_type nel = nelem (); + + if (nel > 0) + { + max_val = elem (0); + min_val = elem (0); + } + else + return false; + + for (octave_idx_type i = 0; i < nel; i++) + { + float val = elem (i); + + if (val > max_val) + max_val = val; + + if (val < min_val) + min_val = val; + + if (D_NINT (val) != val) + return false; + } + + return true; +} + +bool +FloatNDArray::too_large_for_float (void) const +{ + octave_idx_type nel = nelem (); + + for (octave_idx_type i = 0; i < nel; i++) + { + float val = elem (i); + + if (! (xisnan (val) || xisinf (val)) + && fabs (val) > FLT_MAX) + return true; + } + + return false; +} + +// FIXME -- this is not quite the right thing. + +boolNDArray +FloatNDArray::all (int dim) const +{ + MX_ND_ANY_ALL_REDUCTION (MX_ND_ALL_EVAL (MX_ND_ALL_EXPR), true); +} + +boolNDArray +FloatNDArray::any (int dim) const +{ + MX_ND_ANY_ALL_REDUCTION + (MX_ND_ANY_EVAL (elem (iter_idx) != 0 + && ! lo_ieee_isnan (elem (iter_idx))), false); +} + +FloatNDArray +FloatNDArray::cumprod (int dim) const +{ + MX_ND_CUMULATIVE_OP (FloatNDArray, float, 1, *); +} + +FloatNDArray +FloatNDArray::cumsum (int dim) const +{ + MX_ND_CUMULATIVE_OP (FloatNDArray, float, 0, +); +} + +FloatNDArray +FloatNDArray::prod (int dim) const +{ + MX_ND_REDUCTION (retval(result_idx) *= elem (iter_idx), 1, FloatNDArray); +} + +FloatNDArray +FloatNDArray::sumsq (int dim) const +{ + MX_ND_REDUCTION (retval(result_idx) += std::pow (elem (iter_idx), 2), 0, FloatNDArray); +} + +FloatNDArray +FloatNDArray::sum (int dim) const +{ + MX_ND_REDUCTION (retval(result_idx) += elem (iter_idx), 0, FloatNDArray); +} + +FloatNDArray +FloatNDArray::max (int dim) const +{ + ArrayN dummy_idx; + return max (dummy_idx, dim); +} + +FloatNDArray +FloatNDArray::max (ArrayN& idx_arg, int dim) const +{ + dim_vector dv = dims (); + dim_vector dr = dims (); + + if (dv.numel () == 0 || dim > dv.length () || dim < 0) + return FloatNDArray (); + + dr(dim) = 1; + + FloatNDArray result (dr); + idx_arg.resize (dr); + + octave_idx_type x_stride = 1; + octave_idx_type x_len = dv(dim); + for (int i = 0; i < dim; i++) + x_stride *= dv(i); + + for (octave_idx_type i = 0; i < dr.numel (); i++) + { + octave_idx_type x_offset; + if (x_stride == 1) + x_offset = i * x_len; + else + { + octave_idx_type x_offset2 = 0; + x_offset = i; + while (x_offset >= x_stride) + { + x_offset -= x_stride; + x_offset2++; + } + x_offset += x_offset2 * x_stride * x_len; + } + + octave_idx_type idx_j; + + float tmp_max = octave_Float_NaN; + + for (idx_j = 0; idx_j < x_len; idx_j++) + { + tmp_max = elem (idx_j * x_stride + x_offset); + + if (! xisnan (tmp_max)) + break; + } + + for (octave_idx_type j = idx_j+1; j < x_len; j++) + { + float tmp = elem (j * x_stride + x_offset); + + if (xisnan (tmp)) + continue; + else if (tmp > tmp_max) + { + idx_j = j; + tmp_max = tmp; + } + } + + result.elem (i) = tmp_max; + idx_arg.elem (i) = xisnan (tmp_max) ? 0 : idx_j; + } + + result.chop_trailing_singletons (); + idx_arg.chop_trailing_singletons (); + + return result; +} + +FloatNDArray +FloatNDArray::min (int dim) const +{ + ArrayN dummy_idx; + return min (dummy_idx, dim); +} + +FloatNDArray +FloatNDArray::min (ArrayN& idx_arg, int dim) const +{ + dim_vector dv = dims (); + dim_vector dr = dims (); + + if (dv.numel () == 0 || dim > dv.length () || dim < 0) + return FloatNDArray (); + + dr(dim) = 1; + + FloatNDArray result (dr); + idx_arg.resize (dr); + + octave_idx_type x_stride = 1; + octave_idx_type x_len = dv(dim); + for (int i = 0; i < dim; i++) + x_stride *= dv(i); + + for (octave_idx_type i = 0; i < dr.numel (); i++) + { + octave_idx_type x_offset; + if (x_stride == 1) + x_offset = i * x_len; + else + { + octave_idx_type x_offset2 = 0; + x_offset = i; + while (x_offset >= x_stride) + { + x_offset -= x_stride; + x_offset2++; + } + x_offset += x_offset2 * x_stride * x_len; + } + + octave_idx_type idx_j; + + float tmp_min = octave_Float_NaN; + + for (idx_j = 0; idx_j < x_len; idx_j++) + { + tmp_min = elem (idx_j * x_stride + x_offset); + + if (! xisnan (tmp_min)) + break; + } + + for (octave_idx_type j = idx_j+1; j < x_len; j++) + { + float tmp = elem (j * x_stride + x_offset); + + if (xisnan (tmp)) + continue; + else if (tmp < tmp_min) + { + idx_j = j; + tmp_min = tmp; + } + } + + result.elem (i) = tmp_min; + idx_arg.elem (i) = xisnan (tmp_min) ? 0 : idx_j; + } + + result.chop_trailing_singletons (); + idx_arg.chop_trailing_singletons (); + + return result; +} + +FloatNDArray +FloatNDArray::concat (const FloatNDArray& rb, const Array& ra_idx) +{ + if (rb.numel () > 0) + insert (rb, ra_idx); + return *this; +} + +FloatComplexNDArray +FloatNDArray::concat (const FloatComplexNDArray& rb, const Array& ra_idx) +{ + FloatComplexNDArray retval (*this); + if (rb.numel () > 0) + retval.insert (rb, ra_idx); + return retval; +} + +charNDArray +FloatNDArray::concat (const charNDArray& rb, const Array& ra_idx) +{ + charNDArray retval (dims ()); + octave_idx_type nel = numel (); + + for (octave_idx_type i = 0; i < nel; i++) + { + float d = elem (i); + + if (xisnan (d)) + { + (*current_liboctave_error_handler) + ("invalid conversion from NaN to character"); + return retval; + } + else + { + octave_idx_type ival = NINTbig (d); + + if (ival < 0 || ival > UCHAR_MAX) + // FIXME -- is there something + // better we could do? Should we warn the user? + ival = 0; + + retval.elem (i) = static_cast(ival); + } + } + + if (rb.numel () == 0) + return retval; + + retval.insert (rb, ra_idx); + return retval; +} + +FloatNDArray +real (const FloatComplexNDArray& a) +{ + octave_idx_type a_len = a.length (); + FloatNDArray retval; + if (a_len > 0) + retval = FloatNDArray (mx_inline_real_dup (a.data (), a_len), a.dims ()); + return retval; +} + +FloatNDArray +imag (const FloatComplexNDArray& a) +{ + octave_idx_type a_len = a.length (); + FloatNDArray retval; + if (a_len > 0) + retval = FloatNDArray (mx_inline_imag_dup (a.data (), a_len), a.dims ()); + return retval; +} + +FloatNDArray& +FloatNDArray::insert (const FloatNDArray& a, octave_idx_type r, octave_idx_type c) +{ + Array::insert (a, r, c); + return *this; +} + +FloatNDArray& +FloatNDArray::insert (const FloatNDArray& a, const Array& ra_idx) +{ + Array::insert (a, ra_idx); + return *this; +} + +FloatNDArray +FloatNDArray::abs (void) const +{ + FloatNDArray retval (dims ()); + + octave_idx_type nel = nelem (); + + for (octave_idx_type i = 0; i < nel; i++) + retval(i) = fabs (elem (i)); + + return retval; +} + +Matrix +FloatNDArray::matrix_value (void) const +{ + Matrix retval; + + int nd = ndims (); + + switch (nd) + { + case 1: + retval = Matrix (Array2 (*this, dimensions(0), 1)); + break; + + case 2: + retval = Matrix (Array2 (*this, dimensions(0), dimensions(1))); + break; + + default: + (*current_liboctave_error_handler) + ("invalid conversion of FloatNDArray to Matrix"); + break; + } + + return retval; +} + +void +FloatNDArray::increment_index (Array& ra_idx, + const dim_vector& dimensions, + int start_dimension) +{ + ::increment_index (ra_idx, dimensions, start_dimension); +} + +octave_idx_type +FloatNDArray::compute_index (Array& ra_idx, + const dim_vector& dimensions) +{ + return ::compute_index (ra_idx, dimensions); +} + +FloatNDArray +FloatNDArray::diag (octave_idx_type k) const +{ + return MArrayN::diag (k); +} + +FloatNDArray +FloatNDArray::map (dmapper fcn) const +{ + return MArrayN::map (func_ptr (fcn)); +} + +FloatComplexNDArray +FloatNDArray::map (cmapper fcn) const +{ + return MArrayN::map (func_ptr (fcn)); +} + +boolNDArray +FloatNDArray::map (bmapper fcn) const +{ + return MArrayN::map (func_ptr (fcn)); +} + +// This contains no information on the array structure !!! +std::ostream& +operator << (std::ostream& os, const FloatNDArray& a) +{ + octave_idx_type nel = a.nelem (); + + for (octave_idx_type i = 0; i < nel; i++) + { + os << " "; + octave_write_float (os, a.elem (i)); + os << "\n"; + } + return os; +} + +std::istream& +operator >> (std::istream& is, FloatNDArray& a) +{ + octave_idx_type nel = a.nelem (); + + if (nel < 1 ) + is.clear (std::ios::badbit); + else + { + float tmp; + for (octave_idx_type i = 0; i < nel; i++) + { + tmp = octave_read_float (is); + if (is) + a.elem (i) = tmp; + else + goto done; + } + } + + done: + + return is; +} + +// FIXME -- it would be nice to share code among the min/max +// functions below. + +#define EMPTY_RETURN_CHECK(T) \ + if (nel == 0) \ + return T (dv); + +FloatNDArray +min (float d, const FloatNDArray& m) +{ + dim_vector dv = m.dims (); + octave_idx_type nel = dv.numel (); + + EMPTY_RETURN_CHECK (FloatNDArray); + + FloatNDArray result (dv); + + for (octave_idx_type i = 0; i < nel; i++) + { + OCTAVE_QUIT; + result (i) = xmin (d, m (i)); + } + + return result; +} + +FloatNDArray +min (const FloatNDArray& m, float d) +{ + dim_vector dv = m.dims (); + octave_idx_type nel = dv.numel (); + + EMPTY_RETURN_CHECK (FloatNDArray); + + FloatNDArray result (dv); + + for (octave_idx_type i = 0; i < nel; i++) + { + OCTAVE_QUIT; + result (i) = xmin (d, m (i)); + } + + return result; +} + +FloatNDArray +min (const FloatNDArray& a, const FloatNDArray& b) +{ + dim_vector dv = a.dims (); + octave_idx_type nel = dv.numel (); + + if (dv != b.dims ()) + { + (*current_liboctave_error_handler) + ("two-arg min expecting args of same size"); + return FloatNDArray (); + } + + EMPTY_RETURN_CHECK (FloatNDArray); + + FloatNDArray result (dv); + + for (octave_idx_type i = 0; i < nel; i++) + { + OCTAVE_QUIT; + result (i) = xmin (a (i), b (i)); + } + + return result; +} + +FloatNDArray +max (float d, const FloatNDArray& m) +{ + dim_vector dv = m.dims (); + octave_idx_type nel = dv.numel (); + + EMPTY_RETURN_CHECK (FloatNDArray); + + FloatNDArray result (dv); + + for (octave_idx_type i = 0; i < nel; i++) + { + OCTAVE_QUIT; + result (i) = xmax (d, m (i)); + } + + return result; +} + +FloatNDArray +max (const FloatNDArray& m, float d) +{ + dim_vector dv = m.dims (); + octave_idx_type nel = dv.numel (); + + EMPTY_RETURN_CHECK (FloatNDArray); + + FloatNDArray result (dv); + + for (octave_idx_type i = 0; i < nel; i++) + { + OCTAVE_QUIT; + result (i) = xmax (d, m (i)); + } + + return result; +} + +FloatNDArray +max (const FloatNDArray& a, const FloatNDArray& b) +{ + dim_vector dv = a.dims (); + octave_idx_type nel = dv.numel (); + + if (dv != b.dims ()) + { + (*current_liboctave_error_handler) + ("two-arg max expecting args of same size"); + return FloatNDArray (); + } + + EMPTY_RETURN_CHECK (FloatNDArray); + + FloatNDArray result (dv); + + for (octave_idx_type i = 0; i < nel; i++) + { + OCTAVE_QUIT; + result (i) = xmax (a (i), b (i)); + } + + return result; +} + +NDS_CMP_OPS(FloatNDArray, , float, ) +NDS_BOOL_OPS(FloatNDArray, float, static_cast (0.0)) + +SND_CMP_OPS(float, , FloatNDArray, ) +SND_BOOL_OPS(float, FloatNDArray, static_cast (0.0)) + +NDND_CMP_OPS(FloatNDArray, , FloatNDArray, ) +NDND_BOOL_OPS(FloatNDArray, FloatNDArray, static_cast (0.0)) + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 liboctave/fNDArray.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/fNDArray.h Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,176 @@ +/* + +Copyright (C) 1996, 1997, 2003, 2004, 2005, 2006, 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#if !defined (octave_FloatNDArray_h) +#define octave_FloatNDArray_h 1 + +#include "MArrayN.h" +#include "fMatrix.h" +#include "intNDArray.h" + +#include "mx-defs.h" +#include "mx-op-defs.h" + +class +OCTAVE_API +FloatNDArray : public MArrayN +{ +public: + + FloatNDArray (void) : MArrayN () { } + + FloatNDArray (const dim_vector& dv) : MArrayN (dv) { } + + FloatNDArray (const dim_vector& dv, float val) + : MArrayN (dv, val) { } + + FloatNDArray (const FloatNDArray& a) : MArrayN (a) { } + + FloatNDArray (const FloatMatrix& a) : MArrayN (a) { } + + template + FloatNDArray (const MArrayN& a) : MArrayN (a) { } + + template + FloatNDArray (const ArrayN& a) : MArrayN (a) { } + + template + explicit FloatNDArray (const intNDArray& a) : MArrayN (a) { } + + FloatNDArray& operator = (const FloatNDArray& a) + { + MArrayN::operator = (a); + return *this; + } + + // unary operations + + boolNDArray operator ! (void) const; + + bool any_element_is_negative (bool = false) const; + bool any_element_is_inf_or_nan (void) const; + bool any_element_not_one_or_zero (void) const; + bool all_elements_are_zero (void) const; + bool all_elements_are_int_or_inf_or_nan (void) const; + bool all_integers (float& max_val, float& min_val) const; + bool too_large_for_float (void) const; + + // FIXME -- this is not quite the right thing. + + boolNDArray all (int dim = -1) const; + boolNDArray any (int dim = -1) const; + + FloatNDArray cumprod (int dim = -1) const; + FloatNDArray cumsum (int dim = -1) const; + FloatNDArray prod (int dim = -1) const; + FloatNDArray sum (int dim = -1) const; + FloatNDArray sumsq (int dim = -1) const; + FloatNDArray concat (const FloatNDArray& rb, const Array& ra_idx); + FloatComplexNDArray concat (const FloatComplexNDArray& rb, const Array& ra_idx); + charNDArray concat (const charNDArray& rb, const Array& ra_idx); + + FloatNDArray max (int dim = 0) const; + FloatNDArray max (ArrayN& index, int dim = 0) const; + FloatNDArray min (int dim = 0) const; + FloatNDArray min (ArrayN& index, int dim = 0) const; + + FloatNDArray& insert (const FloatNDArray& a, octave_idx_type r, octave_idx_type c); + FloatNDArray& insert (const FloatNDArray& a, const Array& ra_idx); + + FloatNDArray abs (void) const; + + FloatComplexNDArray fourier (int dim = 1) const; + FloatComplexNDArray ifourier (int dim = 1) const; + + FloatComplexNDArray fourier2d (void) const; + FloatComplexNDArray ifourier2d (void) const; + + FloatComplexNDArray fourierNd (void) const; + FloatComplexNDArray ifourierNd (void) const; + + friend OCTAVE_API FloatNDArray real (const FloatComplexNDArray& a); + friend OCTAVE_API FloatNDArray imag (const FloatComplexNDArray& a); + + Matrix matrix_value (void) const; + + FloatNDArray squeeze (void) const { return MArrayN::squeeze (); } + + static void increment_index (Array& ra_idx, + const dim_vector& dimensions, + int start_dimension = 0); + + static octave_idx_type compute_index (Array& ra_idx, + const dim_vector& dimensions); + + // i/o + + friend OCTAVE_API std::ostream& operator << (std::ostream& os, const FloatNDArray& a); + friend OCTAVE_API std::istream& operator >> (std::istream& is, FloatNDArray& a); + + static float resize_fill_value (void) { return 0; } + + FloatNDArray diag (octave_idx_type k = 0) const; + + typedef float (*dmapper) (float); + typedef FloatComplex (*cmapper) (const FloatComplex&); + typedef bool (*bmapper) (float); + + FloatNDArray map (dmapper fcn) const; + FloatComplexNDArray map (cmapper fcn) const; + boolNDArray map (bmapper fcn) const; + +private: + + FloatNDArray (float *d, const dim_vector& dv) : MArrayN (d, dv) { } +}; + +// Publish externally used friend functions. + +extern OCTAVE_API FloatNDArray real (const FloatComplexNDArray& a); +extern OCTAVE_API FloatNDArray imag (const FloatComplexNDArray& a); + +extern OCTAVE_API FloatNDArray min (float d, const FloatNDArray& m); +extern OCTAVE_API FloatNDArray min (const FloatNDArray& m, float d); +extern OCTAVE_API FloatNDArray min (const FloatNDArray& a, const FloatNDArray& b); + +extern OCTAVE_API FloatNDArray max (float d, const FloatNDArray& m); +extern OCTAVE_API FloatNDArray max (const FloatNDArray& m, float d); +extern OCTAVE_API FloatNDArray max (const FloatNDArray& a, const FloatNDArray& b); + +NDS_CMP_OP_DECLS (FloatNDArray, float, OCTAVE_API) +NDS_BOOL_OP_DECLS (FloatNDArray, float, OCTAVE_API) + +SND_CMP_OP_DECLS (float, FloatNDArray, OCTAVE_API) +SND_BOOL_OP_DECLS (float, FloatNDArray, OCTAVE_API) + +NDND_CMP_OP_DECLS (FloatNDArray, FloatNDArray, OCTAVE_API) +NDND_BOOL_OP_DECLS (FloatNDArray, FloatNDArray, OCTAVE_API) + +MARRAY_FORWARD_DEFS (MArrayN, FloatNDArray, float) + +#endif + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 liboctave/fRowVector.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/fRowVector.cc Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,366 @@ +// RowVector manipulations. +/* + +Copyright (C) 1994, 1995, 1996, 1997, 1999, 2000, 2001, 2002, 2003, + 2004, 2005, 2006, 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +#include "Array-util.h" +#include "f77-fcn.h" +#include "functor.h" +#include "lo-error.h" +#include "mx-base.h" +#include "mx-inlines.cc" +#include "oct-cmplx.h" + +// Fortran functions we call. + +extern "C" +{ + F77_RET_T + F77_FUNC (sgemv, SGEMV) (F77_CONST_CHAR_ARG_DECL, + const octave_idx_type&, const octave_idx_type&, const float&, + const float*, const octave_idx_type&, const float*, + const octave_idx_type&, const float&, float*, const octave_idx_type& + F77_CHAR_ARG_LEN_DECL); + F77_RET_T + F77_FUNC (xsdot, XSDOT) (const octave_idx_type&, const float*, const octave_idx_type&, + const float*, const octave_idx_type&, float&); +} + +// Row Vector class. + +bool +FloatRowVector::operator == (const FloatRowVector& a) const +{ + octave_idx_type len = length (); + if (len != a.length ()) + return 0; + return mx_inline_equal (data (), a.data (), len); +} + +bool +FloatRowVector::operator != (const FloatRowVector& a) const +{ + return !(*this == a); +} + +FloatRowVector& +FloatRowVector::insert (const FloatRowVector& a, octave_idx_type c) +{ + octave_idx_type a_len = a.length (); + + if (c < 0 || c + a_len > length ()) + { + (*current_liboctave_error_handler) ("range error for insert"); + return *this; + } + + if (a_len > 0) + { + make_unique (); + + for (octave_idx_type i = 0; i < a_len; i++) + xelem (c+i) = a.elem (i); + } + + return *this; +} + +FloatRowVector& +FloatRowVector::fill (float val) +{ + octave_idx_type len = length (); + + if (len > 0) + { + make_unique (); + + for (octave_idx_type i = 0; i < len; i++) + xelem (i) = val; + } + + return *this; +} + +FloatRowVector& +FloatRowVector::fill (float val, octave_idx_type c1, octave_idx_type c2) +{ + octave_idx_type len = length (); + + if (c1 < 0 || c2 < 0 || c1 >= len || c2 >= len) + { + (*current_liboctave_error_handler) ("range error for fill"); + return *this; + } + + if (c1 > c2) { octave_idx_type tmp = c1; c1 = c2; c2 = tmp; } + + if (c2 >= c1) + { + make_unique (); + + for (octave_idx_type i = c1; i <= c2; i++) + xelem (i) = val; + } + + return *this; +} + +FloatRowVector +FloatRowVector::append (const FloatRowVector& a) const +{ + octave_idx_type len = length (); + octave_idx_type nc_insert = len; + FloatRowVector retval (len + a.length ()); + retval.insert (*this, 0); + retval.insert (a, nc_insert); + return retval; +} + +FloatColumnVector +FloatRowVector::transpose (void) const +{ + return MArray::transpose(); +} + +FloatRowVector +real (const FloatComplexRowVector& a) +{ + octave_idx_type a_len = a.length (); + FloatRowVector retval; + if (a_len > 0) + retval = FloatRowVector (mx_inline_real_dup (a.data (), a_len), a_len); + return retval; +} + +FloatRowVector +imag (const FloatComplexRowVector& a) +{ + octave_idx_type a_len = a.length (); + FloatRowVector retval; + if (a_len > 0) + retval = FloatRowVector (mx_inline_imag_dup (a.data (), a_len), a_len); + return retval; +} + +FloatRowVector +FloatRowVector::extract (octave_idx_type c1, octave_idx_type c2) const +{ + if (c1 > c2) { octave_idx_type tmp = c1; c1 = c2; c2 = tmp; } + + octave_idx_type new_c = c2 - c1 + 1; + + FloatRowVector result (new_c); + + for (octave_idx_type i = 0; i < new_c; i++) + result.xelem (i) = elem (c1+i); + + return result; +} + +FloatRowVector +FloatRowVector::extract_n (octave_idx_type r1, octave_idx_type n) const +{ + FloatRowVector result (n); + + for (octave_idx_type i = 0; i < n; i++) + result.xelem (i) = elem (r1+i); + + return result; +} + +// row vector by matrix -> row vector + +FloatRowVector +operator * (const FloatRowVector& v, const FloatMatrix& a) +{ + FloatRowVector retval; + + octave_idx_type len = v.length (); + + octave_idx_type a_nr = a.rows (); + octave_idx_type a_nc = a.cols (); + + if (a_nr != len) + gripe_nonconformant ("operator *", 1, len, a_nr, a_nc); + else + { + if (len == 0) + retval.resize (a_nc, 0.0); + else + { + // Transpose A to form A'*x == (x'*A)' + + octave_idx_type ld = a_nr; + + retval.resize (a_nc); + float *y = retval.fortran_vec (); + + F77_XFCN (sgemv, SGEMV, (F77_CONST_CHAR_ARG2 ("T", 1), + a_nr, a_nc, 1.0, a.data (), + ld, v.data (), 1, 0.0, y, 1 + F77_CHAR_ARG_LEN (1))); + } + } + + return retval; +} + +// other operations + +FloatRowVector +FloatRowVector::map (dmapper fcn) const +{ + return MArray::map (func_ptr (fcn)); +} + +FloatComplexRowVector +FloatRowVector::map (cmapper fcn) const +{ + return MArray::map (func_ptr (fcn)); +} + +float +FloatRowVector::min (void) const +{ + octave_idx_type len = length (); + if (len == 0) + return 0; + + float res = elem (0); + + for (octave_idx_type i = 1; i < len; i++) + if (elem (i) < res) + res = elem (i); + + return res; +} + +float +FloatRowVector::max (void) const +{ + octave_idx_type len = length (); + if (len == 0) + return 0; + + float res = elem (0); + + for (octave_idx_type i = 1; i < len; i++) + if (elem (i) > res) + res = elem (i); + + return res; +} + +std::ostream& +operator << (std::ostream& os, const FloatRowVector& a) +{ +// int field_width = os.precision () + 7; + + for (octave_idx_type i = 0; i < a.length (); i++) + os << " " /* setw (field_width) */ << a.elem (i); + return os; +} + +std::istream& +operator >> (std::istream& is, FloatRowVector& a) +{ + octave_idx_type len = a.length(); + + if (len < 1) + is.clear (std::ios::badbit); + else + { + float tmp; + for (octave_idx_type i = 0; i < len; i++) + { + is >> tmp; + if (is) + a.elem (i) = tmp; + else + break; + } + } + return is; +} + +// other operations + +FloatRowVector +linspace (float x1, float x2, octave_idx_type n) +{ + FloatRowVector retval; + + if (n > 1) + { + retval.resize (n); + float delta = (x2 - x1) / (n - 1); + retval.elem (0) = x1; + for (octave_idx_type i = 1; i < n-1; i++) + retval.elem (i) = x1 + i * delta; + retval.elem (n-1) = x2; + } + else + { + retval.resize (1); + retval.elem (0) = x2; + } + + return retval; +} + +// row vector by column vector -> scalar + +float +operator * (const FloatRowVector& v, const FloatColumnVector& a) +{ + float retval = 0.0; + + octave_idx_type len = v.length (); + + octave_idx_type a_len = a.length (); + + if (len != a_len) + gripe_nonconformant ("operator *", len, a_len); + else if (len != 0) + F77_FUNC (xsdot, XSDOT) (len, v.data (), 1, a.data (), 1, retval); + + return retval; +} + +FloatComplex +operator * (const FloatRowVector& v, const FloatComplexColumnVector& a) +{ + FloatComplexRowVector tmp (v); + return tmp * a; +} + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 liboctave/fRowVector.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/fRowVector.h Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,119 @@ +/* + +Copyright (C) 1994, 1995, 1996, 1997, 2000, 2002, 2003, 2004, 2005, + 2006, 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#if !defined (octave_FloatRowVector_h) +#define octave_FloatRowVector_h 1 + +#include "MArray.h" + +#include "mx-defs.h" + +class +OCTAVE_API +FloatRowVector : public MArray +{ +public: + + FloatRowVector (void) : MArray () { } + + explicit FloatRowVector (octave_idx_type n) : MArray (n) { } + + FloatRowVector (octave_idx_type n, float val) : MArray (n, val) { } + + FloatRowVector (const FloatRowVector& a) : MArray (a) { } + + FloatRowVector (const MArray& a) : MArray (a) { } + + FloatRowVector& operator = (const FloatRowVector& a) + { + MArray::operator = (a); + return *this; + } + + bool operator == (const FloatRowVector& a) const; + bool operator != (const FloatRowVector& a) const; + + // destructive insert/delete/reorder operations + + FloatRowVector& insert (const FloatRowVector& a, octave_idx_type c); + + FloatRowVector& fill (float val); + FloatRowVector& fill (float val, octave_idx_type c1, octave_idx_type c2); + + FloatRowVector append (const FloatRowVector& a) const; + + FloatColumnVector transpose (void) const; + + friend OCTAVE_API FloatRowVector real (const FloatComplexRowVector& a); + friend OCTAVE_API FloatRowVector imag (const FloatComplexRowVector& a); + + // resize is the destructive equivalent for this one + + FloatRowVector extract (octave_idx_type c1, octave_idx_type c2) const; + + FloatRowVector extract_n (octave_idx_type c1, octave_idx_type n) const; + + // row vector by matrix -> row vector + + friend OCTAVE_API FloatRowVector operator * (const FloatRowVector& a, const FloatMatrix& b); + + // other operations + + typedef float (*dmapper) (float); + typedef FloatComplex (*cmapper) (const FloatComplex&); + + FloatRowVector map (dmapper fcn) const; + FloatComplexRowVector map (cmapper fcn) const; + + float min (void) const; + float max (void) const; + + // i/o + + friend OCTAVE_API std::ostream& operator << (std::ostream& os, const FloatRowVector& a); + friend OCTAVE_API std::istream& operator >> (std::istream& is, FloatRowVector& a); + +private: + + FloatRowVector (float *d, octave_idx_type l) : MArray (d, l) { } +}; + +// row vector by column vector -> scalar + +float OCTAVE_API operator * (const FloatRowVector& a, const FloatColumnVector& b); + +Complex OCTAVE_API operator * (const FloatRowVector& a, const ComplexColumnVector& b); + +// other operations + +OCTAVE_API FloatRowVector linspace (float x1, float x2, octave_idx_type n); + +MARRAY_FORWARD_DEFS (MArray, FloatRowVector, float) + +#endif + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 liboctave/floatCHOL.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/floatCHOL.cc Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,291 @@ +/* + +Copyright (C) 1994, 1995, 1996, 1997, 2002, 2003, 2004, 2005, 2007 + John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +// updating/downdating by Jaroslav Hajek 2008 + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +#include "fRowVector.h" +#include "floatCHOL.h" +#include "f77-fcn.h" +#include "lo-error.h" + +extern "C" +{ + F77_RET_T + F77_FUNC (spotrf, SPOTRF) (F77_CONST_CHAR_ARG_DECL, const octave_idx_type&, + float*, const octave_idx_type&, octave_idx_type& + F77_CHAR_ARG_LEN_DECL); + + F77_RET_T + F77_FUNC (spotri, SPOTRI) (F77_CONST_CHAR_ARG_DECL, const octave_idx_type&, + float*, const octave_idx_type&, octave_idx_type& + F77_CHAR_ARG_LEN_DECL); + + F77_RET_T + F77_FUNC (spocon, SPOCON) (F77_CONST_CHAR_ARG_DECL, const octave_idx_type&, + float*, const octave_idx_type&, const float&, + float&, float*, octave_idx_type*, + octave_idx_type& F77_CHAR_ARG_LEN_DECL); + F77_RET_T + F77_FUNC (sch1up, SCH1UP) (const octave_idx_type&, float*, float*, float*); + + F77_RET_T + F77_FUNC (sch1dn, SCH1DN) (const octave_idx_type&, float*, float*, float*, + octave_idx_type&); + + F77_RET_T + F77_FUNC (sqrshc, SQRSHC) (const octave_idx_type&, const octave_idx_type&, const octave_idx_type&, + float*, float*, const octave_idx_type&, const octave_idx_type&); + + F77_RET_T + F77_FUNC (schinx, SCHINX) (const octave_idx_type&, const float*, float*, const octave_idx_type&, + const float*, octave_idx_type&); + + F77_RET_T + F77_FUNC (schdex, SCHDEX) (const octave_idx_type&, const float*, float*, const octave_idx_type&); +} + +octave_idx_type +FloatCHOL::init (const FloatMatrix& a, bool calc_cond) +{ + octave_idx_type a_nr = a.rows (); + octave_idx_type a_nc = a.cols (); + + if (a_nr != a_nc) + { + (*current_liboctave_error_handler) ("FloatCHOL requires square matrix"); + return -1; + } + + octave_idx_type n = a_nc; + octave_idx_type info; + + chol_mat = a; + float *h = chol_mat.fortran_vec (); + + // Calculate the norm of the matrix, for later use. + float anorm = 0; + if (calc_cond) + anorm = chol_mat.abs().sum().row(static_cast(0)).max(); + + F77_XFCN (spotrf, SPOTRF, (F77_CONST_CHAR_ARG2 ("U", 1), + n, h, n, info + F77_CHAR_ARG_LEN (1))); + + xrcond = 0.0; + if (info != 0) + info = -1; + else if (calc_cond) + { + octave_idx_type spocon_info = 0; + + // Now calculate the condition number for non-singular matrix. + Array z (3*n); + float *pz = z.fortran_vec (); + Array iz (n); + octave_idx_type *piz = iz.fortran_vec (); + F77_XFCN (spocon, SPOCON, (F77_CONST_CHAR_ARG2 ("U", 1), n, h, + n, anorm, xrcond, pz, piz, spocon_info + F77_CHAR_ARG_LEN (1))); + + if (spocon_info != 0) + info = -1; + } + else + { + // If someone thinks of a more graceful way of doing this (or + // faster for that matter :-)), please let me know! + + if (n > 1) + for (octave_idx_type j = 0; j < a_nc; j++) + for (octave_idx_type i = j+1; i < a_nr; i++) + chol_mat.xelem (i, j) = 0.0; + } + + return info; +} + +static FloatMatrix +chol2inv_internal (const FloatMatrix& r) +{ + FloatMatrix retval; + + octave_idx_type r_nr = r.rows (); + octave_idx_type r_nc = r.cols (); + + if (r_nr == r_nc) + { + octave_idx_type n = r_nc; + octave_idx_type info = 0; + + FloatMatrix tmp = r; + float *v = tmp.fortran_vec(); + + if (info == 0) + { + F77_XFCN (spotri, SPOTRI, (F77_CONST_CHAR_ARG2 ("U", 1), n, + v, n, info + F77_CHAR_ARG_LEN (1))); + + // If someone thinks of a more graceful way of doing this (or + // faster for that matter :-)), please let me know! + + if (n > 1) + for (octave_idx_type j = 0; j < r_nc; j++) + for (octave_idx_type i = j+1; i < r_nr; i++) + tmp.xelem (i, j) = tmp.xelem (j, i); + + retval = tmp; + } + } + else + (*current_liboctave_error_handler) ("chol2inv requires square matrix"); + + return retval; +} + +// Compute the inverse of a matrix using the Cholesky factorization. +FloatMatrix +FloatCHOL::inverse (void) const +{ + return chol2inv_internal (chol_mat); +} + +void +FloatCHOL::set (const FloatMatrix& R) +{ + if (R.is_square ()) + chol_mat = R; + else + (*current_liboctave_error_handler) ("FloatCHOL requires square matrix"); +} + +void +FloatCHOL::update (const FloatMatrix& u) +{ + octave_idx_type n = chol_mat.rows (); + + if (u.length () == n) + { + FloatMatrix tmp = u; + + OCTAVE_LOCAL_BUFFER (float, w, n); + + F77_XFCN (sch1up, SCH1UP, (n, chol_mat.fortran_vec (), + tmp.fortran_vec (), w)); + } + else + (*current_liboctave_error_handler) ("FloatCHOL update dimension mismatch"); +} + +octave_idx_type +FloatCHOL::downdate (const FloatMatrix& u) +{ + octave_idx_type info = -1; + + octave_idx_type n = chol_mat.rows (); + + if (u.length () == n) + { + FloatMatrix tmp = u; + + OCTAVE_LOCAL_BUFFER (float, w, n); + + F77_XFCN (sch1dn, SCH1DN, (n, chol_mat.fortran_vec (), + tmp.fortran_vec (), w, info)); + } + else + (*current_liboctave_error_handler) ("FloatCHOL downdate dimension mismatch"); + + return info; +} + +octave_idx_type +FloatCHOL::insert_sym (const FloatMatrix& u, octave_idx_type j) +{ + octave_idx_type info = -1; + + octave_idx_type n = chol_mat.rows (); + + if (u.length () != n+1) + (*current_liboctave_error_handler) ("FloatCHOL insert dimension mismatch"); + else if (j < 0 || j > n) + (*current_liboctave_error_handler) ("FloatCHOL insert index out of range"); + else + { + FloatMatrix chol_mat1 (n+1, n+1); + + F77_XFCN (schinx, SCHINX, (n, chol_mat.data (), chol_mat1.fortran_vec (), + j+1, u.data (), info)); + + chol_mat = chol_mat1; + } + + return info; +} + +void +FloatCHOL::delete_sym (octave_idx_type j) +{ + octave_idx_type n = chol_mat.rows (); + + if (j < 0 || j > n-1) + (*current_liboctave_error_handler) ("FloatCHOL delete index out of range"); + else + { + FloatMatrix chol_mat1 (n-1, n-1); + + F77_XFCN (schdex, SCHDEX, (n, chol_mat.data (), chol_mat1.fortran_vec (), j+1)); + + chol_mat = chol_mat1; + } +} + +void +FloatCHOL::shift_sym (octave_idx_type i, octave_idx_type j) +{ + octave_idx_type n = chol_mat.rows (); + float dummy; + + if (i < 0 || i > n-1 || j < 0 || j > n-1) + (*current_liboctave_error_handler) ("FloatCHOL shift index out of range"); + else + F77_XFCN (sqrshc, SQRSHC, (0, n, n, &dummy, chol_mat.fortran_vec (), i+1, j+1)); +} + +FloatMatrix +chol2inv (const FloatMatrix& r) +{ + return chol2inv_internal (r); +} + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 liboctave/floatCHOL.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/floatCHOL.h Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,96 @@ +/* + +Copyright (C) 1994, 1995, 1996, 1997, 2000, 2002, 2004, 2005, 2006, + 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +// updating/downdating by Jaroslav Hajek 2008 + +#if !defined (octave_FloatCHOL_h) +#define octave_FloatCHOL_h 1 + +#include + +#include "fMatrix.h" + +class +OCTAVE_API +FloatCHOL +{ +public: + + FloatCHOL (void) : chol_mat () { } + + FloatCHOL (const FloatMatrix& a, bool calc_cond = false) { init (a, calc_cond); } + + FloatCHOL (const FloatMatrix& a, octave_idx_type& info, bool calc_cond = false) + { info = init (a, calc_cond); } + + FloatCHOL (const FloatCHOL& a) : chol_mat (a.chol_mat), xrcond (a.xrcond) { } + + FloatCHOL& operator = (const FloatCHOL& a) + { + if (this != &a) + { + chol_mat = a.chol_mat; + xrcond = a.xrcond; + } + return *this; + } + + FloatMatrix chol_matrix (void) const { return chol_mat; } + + float rcond (void) const { return xrcond; } + + // Compute the inverse of a matrix using the Cholesky factorization. + FloatMatrix inverse (void) const; + + void set (const FloatMatrix& R); + + void update (const FloatMatrix& u); + + octave_idx_type downdate (const FloatMatrix& u); + + octave_idx_type insert_sym (const FloatMatrix& u, octave_idx_type j); + + void delete_sym (octave_idx_type j); + + void shift_sym (octave_idx_type i, octave_idx_type j); + + friend OCTAVE_API std::ostream& operator << (std::ostream& os, const FloatCHOL& a); + +private: + + FloatMatrix chol_mat; + + float xrcond; + + octave_idx_type init (const FloatMatrix& a, bool calc_cond); +}; + +FloatMatrix OCTAVE_API chol2inv (const FloatMatrix& r); + +#endif + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 liboctave/floatDET.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/floatDET.cc Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,84 @@ +/* + +Copyright (C) 1994, 1995, 1996, 1997, 2002, 2003, 2004, 2005, 2006, + 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +#include "floatDET.h" +#include "lo-mappers.h" +#include "lo-math.h" + +bool +FloatDET::value_will_overflow (void) const +{ + return base2 + ? (e2 + 1 > xlog2 (DBL_MAX) ? 1 : 0) + : (e10 + 1 > log10 (DBL_MAX) ? 1 : 0); +} + +bool +FloatDET::value_will_underflow (void) const +{ + return base2 + ? (e2 - 1 < xlog2 (DBL_MIN) ? 1 : 0) + : (e10 - 1 < log10 (DBL_MIN) ? 1 : 0); +} + +void +FloatDET::initialize10 (void) +{ + if (c2 != 0.0) + { + float etmp = e2 / xlog2 (static_cast(10)); + e10 = static_cast (xround (etmp)); + etmp -= e10; + c10 = c2 * pow (10.0, etmp); + } +} + +void +FloatDET::initialize2 (void) +{ + if (c10 != 0.0) + { + float etmp = e10 / log10 (2.0); + e2 = static_cast (xround (etmp)); + etmp -= e2; + c2 = c10 * xexp2 (etmp); + } +} + +float +FloatDET::value (void) const +{ + return base2 ? c2 * xexp2 (static_cast(e2)) : c10 * pow (10.0, e10); +} + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 liboctave/floatDET.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/floatDET.h Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,117 @@ +/* + +Copyright (C) 1994, 1995, 1996, 1997, 2000, 2002, 2004, 2005, 2006, + 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#if !defined (octave_FloatDET_h) +#define octave_FloatDET_h 1 + +#include + +// FIXME -- we could use templates here; compare with CmplxFloatDET.h + +class +OCTAVE_API +FloatDET +{ +friend class FloatMatrix; + +public: + + FloatDET (void) : c2 (0), c10 (0), e2 (0), e10 (0), base2 (false) { } + + FloatDET (const FloatDET& a) + : c2 (a.c2), c10 (a.c10), e2 (a.e2), e10 (a.e10), base2 (a.base2) + { } + + FloatDET& operator = (const FloatDET& a) + { + if (this != &a) + { + c2 = a.c2; + e2 = a.e2; + + c10 = a.c10; + e10 = a.e10; + + base2 = a.base2; + } + return *this; + } + + bool value_will_overflow (void) const; + bool value_will_underflow (void) const; + + // These two functions were originally defined in base 10, so we are + // preserving that interface here. + + float coefficient (void) const { return coefficient10 (); } + int exponent (void) const { return exponent10 (); } + + float coefficient10 (void) const { return c10; } + int exponent10 (void) const { return e10; } + + float coefficient2 (void) const { return c2; } + int exponent2 (void) const { return e2; } + + float value (void) const; + + friend std::ostream& operator << (std::ostream& os, const FloatDET& a); + +private: + + // Constructed this way, we assume base 2. + + FloatDET (float c, int e) + : c2 (c), c10 (0), e2 (e), e10 (0), base2 (true) + { + initialize10 (); + } + + // Original interface had only this constructor and it was assumed + // to be base 10, so we are preserving that interface here. + + FloatDET (const float *d) + : c2 (0), c10 (d[0]), e2 (0), e10 (static_cast (d[1])), base2 (false) + { + initialize2 (); + } + + void initialize2 (void); + void initialize10 (void); + + float c2; + float c10; + + int e2; + int e10; + + // TRUE means the original values were provided in base 2. + bool base2; +}; + +#endif + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 liboctave/floatLU.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/floatLU.cc Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,71 @@ +/* + +Copyright (C) 1994, 1995, 1996, 1997, 1999, 2002, 2003, 2004, 2005, + 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "floatLU.h" +#include "f77-fcn.h" +#include "lo-error.h" + +// Instantiate the base LU class for the types we need. + +#include +#include + +template class base_lu ; + +// Define the constructor for this particular derivation. + +extern "C" +{ + F77_RET_T + F77_FUNC (sgetrf, SGETRF) (const octave_idx_type&, const octave_idx_type&, float*, + const octave_idx_type&, octave_idx_type*, octave_idx_type&); +} + +FloatLU::FloatLU (const FloatMatrix& a) +{ + octave_idx_type a_nr = a.rows (); + octave_idx_type a_nc = a.cols (); + octave_idx_type mn = (a_nr < a_nc ? a_nr : a_nc); + + ipvt.resize (mn); + octave_idx_type *pipvt = ipvt.fortran_vec (); + + a_fact = a; + float *tmp_data = a_fact.fortran_vec (); + + octave_idx_type info = 0; + + F77_XFCN (sgetrf, SGETRF, (a_nr, a_nc, tmp_data, a_nr, pipvt, info)); + + ipvt -= static_cast (1); +} + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 liboctave/floatLU.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/floatLU.h Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,59 @@ +/* + +Copyright (C) 1994, 1995, 1996, 1997, 2002, 2004, 2005, 2006, 2007 + John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#if !defined (octave_FloatLU_h) +#define octave_FloatLU_h 1 + +#include "base-lu.h" +#include "fMatrix.h" + +class +OCTAVE_API +FloatLU : public base_lu +{ +public: + + FloatLU (void) : base_lu () { } + + FloatLU (const FloatMatrix& a); + + FloatLU (const FloatLU& a) : base_lu (a) { } + + FloatLU& operator = (const FloatLU& a) + { + if (this != &a) + base_lu :: operator = (a); + + return *this; + } + + ~FloatLU (void) { } +}; + +#endif + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 liboctave/floatSCHUR.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/floatSCHUR.cc Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,156 @@ +/* + +Copyright (C) 1994, 1995, 1996, 1997, 1999, 2000, 2002, 2003, 2004, + 2005, 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +#include "floatSCHUR.h" +#include "f77-fcn.h" +#include "lo-error.h" + +extern "C" +{ + F77_RET_T + F77_FUNC (sgeesx, SGEESX) (F77_CONST_CHAR_ARG_DECL, + F77_CONST_CHAR_ARG_DECL, + FloatSCHUR::select_function, + F77_CONST_CHAR_ARG_DECL, + const octave_idx_type&, float*, const octave_idx_type&, octave_idx_type&, + float*, float*, float*, const octave_idx_type&, + float&, float&, float*, const octave_idx_type&, + octave_idx_type*, const octave_idx_type&, octave_idx_type*, octave_idx_type& + F77_CHAR_ARG_LEN_DECL + F77_CHAR_ARG_LEN_DECL + F77_CHAR_ARG_LEN_DECL); +} + +static octave_idx_type +select_ana (const float& a, const float&) +{ + return (a < 0.0); +} + +static octave_idx_type +select_dig (const float& a, const float& b) +{ + return (hypot (a, b) < 1.0); +} + +octave_idx_type +FloatSCHUR::init (const FloatMatrix& a, const std::string& ord, bool calc_unitary) +{ + octave_idx_type a_nr = a.rows (); + octave_idx_type a_nc = a.cols (); + + if (a_nr != a_nc) + { + (*current_liboctave_error_handler) ("FloatSCHUR requires square matrix"); + return -1; + } + + // Workspace requirements may need to be fixed if any of the + // following change. + + char jobvs; + char sense = 'N'; + char sort = 'N'; + + if (calc_unitary) + jobvs = 'V'; + else + jobvs = 'N'; + + char ord_char = ord.empty () ? 'U' : ord[0]; + + if (ord_char == 'A' || ord_char == 'D' || ord_char == 'a' || ord_char == 'd') + sort = 'S'; + + if (ord_char == 'A' || ord_char == 'a') + selector = select_ana; + else if (ord_char == 'D' || ord_char == 'd') + selector = select_dig; + else + selector = 0; + + octave_idx_type n = a_nc; + octave_idx_type lwork = 8 * n; + octave_idx_type liwork = 1; + octave_idx_type info; + octave_idx_type sdim; + float rconde; + float rcondv; + + schur_mat = a; + + if (calc_unitary) + unitary_mat.resize (n, n); + + float *s = schur_mat.fortran_vec (); + float *q = unitary_mat.fortran_vec (); + + Array wr (n); + float *pwr = wr.fortran_vec (); + + Array wi (n); + float *pwi = wi.fortran_vec (); + + Array work (lwork); + float *pwork = work.fortran_vec (); + + // BWORK is not referenced for the non-ordered Schur routine. + Array bwork ((ord_char == 'N' || ord_char == 'n') ? 0 : n); + octave_idx_type *pbwork = bwork.fortran_vec (); + + Array iwork (liwork); + octave_idx_type *piwork = iwork.fortran_vec (); + + F77_XFCN (sgeesx, SGEESX, (F77_CONST_CHAR_ARG2 (&jobvs, 1), + F77_CONST_CHAR_ARG2 (&sort, 1), + selector, + F77_CONST_CHAR_ARG2 (&sense, 1), + n, s, n, sdim, pwr, pwi, q, n, rconde, rcondv, + pwork, lwork, piwork, liwork, pbwork, info + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1))); + + return info; +} + +std::ostream& +operator << (std::ostream& os, const FloatSCHUR& a) +{ + os << a.schur_matrix () << "\n"; + os << a.unitary_matrix () << "\n"; + + return os; +} + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 liboctave/floatSCHUR.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/floatSCHUR.h Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,87 @@ +/* + +Copyright (C) 1994, 1995, 1996, 1997, 2000, 2002, 2004, 2005, 2006, + 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#if !defined (octave_FloatSCHUR_h) +#define octave_FloatSCHUR_h 1 + +#include +#include + +#include "fMatrix.h" + +class +OCTAVE_API +FloatSCHUR +{ +public: + + FloatSCHUR (void) + : schur_mat (), unitary_mat () { } + + FloatSCHUR (const FloatMatrix& a, const std::string& ord, bool calc_unitary = true) + : schur_mat (), unitary_mat () { init (a, ord, calc_unitary); } + + FloatSCHUR (const FloatMatrix& a, const std::string& ord, int& info, + bool calc_unitary = true) + : schur_mat (), unitary_mat () { info = init (a, ord, calc_unitary); } + + FloatSCHUR (const FloatSCHUR& a) + : schur_mat (a.schur_mat), unitary_mat (a.unitary_mat) { } + + FloatSCHUR& operator = (const FloatSCHUR& a) + { + if (this != &a) + { + schur_mat = a.schur_mat; + unitary_mat = a.unitary_mat; + } + return *this; + } + + ~FloatSCHUR (void) { } + + FloatMatrix schur_matrix (void) const { return schur_mat; } + + FloatMatrix unitary_matrix (void) const { return unitary_mat; } + + friend std::ostream& operator << (std::ostream& os, const FloatSCHUR& a); + + typedef octave_idx_type (*select_function) (const float&, const float&); + +private: + + FloatMatrix schur_mat; + FloatMatrix unitary_mat; + + select_function selector; + + octave_idx_type init (const FloatMatrix& a, const std::string& ord, bool calc_unitary); +}; + +#endif + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 liboctave/floatSVD.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/floatSVD.cc Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,177 @@ +/* + +Copyright (C) 1994, 1995, 1996, 1997, 1999, 2000, 2002, 2003, 2004, + 2005, 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +#include "floatSVD.h" +#include "f77-fcn.h" + +extern "C" +{ + F77_RET_T + F77_FUNC (sgesvd, SGESVD) (F77_CONST_CHAR_ARG_DECL, + F77_CONST_CHAR_ARG_DECL, + const octave_idx_type&, const octave_idx_type&, float*, + const octave_idx_type&, float*, float*, + const octave_idx_type&, float*, const octave_idx_type&, + float*, const octave_idx_type&, octave_idx_type& + F77_CHAR_ARG_LEN_DECL + F77_CHAR_ARG_LEN_DECL); +} + +FloatMatrix +FloatSVD::left_singular_matrix (void) const +{ + if (type_computed == SVD::sigma_only) + { + (*current_liboctave_error_handler) + ("FloatSVD: U not computed because type == SVD::sigma_only"); + return FloatMatrix (); + } + else + return left_sm; +} + +FloatMatrix +FloatSVD::right_singular_matrix (void) const +{ + if (type_computed == SVD::sigma_only) + { + (*current_liboctave_error_handler) + ("FloatSVD: V not computed because type == SVD::sigma_only"); + return FloatMatrix (); + } + else + return right_sm; +} + +octave_idx_type +FloatSVD::init (const FloatMatrix& a, SVD::type svd_type) +{ + octave_idx_type info; + + octave_idx_type m = a.rows (); + octave_idx_type n = a.cols (); + + FloatMatrix atmp = a; + float *tmp_data = atmp.fortran_vec (); + + octave_idx_type min_mn = m < n ? m : n; + + char jobu = 'A'; + char jobv = 'A'; + + octave_idx_type ncol_u = m; + octave_idx_type nrow_vt = n; + octave_idx_type nrow_s = m; + octave_idx_type ncol_s = n; + + switch (svd_type) + { + case SVD::economy: + jobu = jobv = 'S'; + ncol_u = nrow_vt = nrow_s = ncol_s = min_mn; + break; + + case SVD::sigma_only: + + // Note: for this case, both jobu and jobv should be 'N', but + // there seems to be a bug in dgesvd from Lapack V2.0. To + // demonstrate the bug, set both jobu and jobv to 'N' and find + // the singular values of [eye(3), eye(3)]. The result is + // [-sqrt(2), -sqrt(2), -sqrt(2)]. + // + // For Lapack 3.0, this problem seems to be fixed. + + jobu = 'N'; + jobv = 'N'; + ncol_u = nrow_vt = 1; + break; + + default: + break; + } + + type_computed = svd_type; + + if (! (jobu == 'N' || jobu == 'O')) + left_sm.resize (m, ncol_u); + + float *u = left_sm.fortran_vec (); + + sigma.resize (nrow_s, ncol_s); + float *s_vec = sigma.fortran_vec (); + + if (! (jobv == 'N' || jobv == 'O')) + right_sm.resize (nrow_vt, n); + + float *vt = right_sm.fortran_vec (); + + // Ask DGESVD what the dimension of WORK should be. + + octave_idx_type lwork = -1; + + Array work (1); + + F77_XFCN (sgesvd, SGESVD, (F77_CONST_CHAR_ARG2 (&jobu, 1), + F77_CONST_CHAR_ARG2 (&jobv, 1), + m, n, tmp_data, m, s_vec, u, m, vt, + nrow_vt, work.fortran_vec (), lwork, info + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1))); + + lwork = static_cast (work(0)); + work.resize (lwork); + + F77_XFCN (sgesvd, SGESVD, (F77_CONST_CHAR_ARG2 (&jobu, 1), + F77_CONST_CHAR_ARG2 (&jobv, 1), + m, n, tmp_data, m, s_vec, u, m, vt, + nrow_vt, work.fortran_vec (), lwork, info + F77_CHAR_ARG_LEN (1) + F77_CHAR_ARG_LEN (1))); + + if (! (jobv == 'N' || jobv == 'O')) + right_sm = right_sm.transpose (); + + return info; +} + +std::ostream& +operator << (std::ostream& os, const FloatSVD& a) +{ + os << a.left_singular_matrix () << "\n"; + os << a.singular_values () << "\n"; + os << a.right_singular_matrix () << "\n"; + + return os; +} + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 liboctave/floatSVD.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/floatSVD.h Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,92 @@ +/* + +Copyright (C) 1994, 1995, 1996, 1997, 2000, 2002, 2003, 2004, 2005, + 2006, 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#if !defined (octave_FloatSVD_h) +#define octave_FloatSVD_h 1 + +#include + +#include "fDiagMatrix.h" +#include "fMatrix.h" +#include "dbleSVD.h" + +class +OCTAVE_API +FloatSVD +{ +public: + + FloatSVD (void) : sigma (), left_sm (), right_sm () { } + + FloatSVD (const FloatMatrix& a, SVD::type svd_type = SVD::std) { init (a, svd_type); } + + FloatSVD (const FloatMatrix& a, octave_idx_type& info, SVD::type svd_type = SVD::std) + { + info = init (a, svd_type); + } + + FloatSVD (const FloatSVD& a) + : type_computed (a.type_computed), + sigma (a.sigma), left_sm (a.left_sm), right_sm (a.right_sm) { } + + FloatSVD& operator = (const FloatSVD& a) + { + if (this != &a) + { + type_computed = a.type_computed; + sigma = a.sigma; + left_sm = a.left_sm; + right_sm = a.right_sm; + } + + return *this; + } + + ~FloatSVD (void) { } + + FloatDiagMatrix singular_values (void) const { return sigma; } + + FloatMatrix left_singular_matrix (void) const; + + FloatMatrix right_singular_matrix (void) const; + + friend std::ostream& operator << (std::ostream& os, const FloatSVD& a); + +private: + + SVD::type type_computed; + + FloatDiagMatrix sigma; + FloatMatrix left_sm; + FloatMatrix right_sm; + + octave_idx_type init (const FloatMatrix& a, SVD::type svd_type = SVD::std); +}; + +#endif + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 liboctave/lo-cieee.c --- a/liboctave/lo-cieee.c Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/lo-cieee.c Sun Apr 27 22:34:17 2008 +0200 @@ -69,12 +69,15 @@ /* Octave's idea of infinity. */ double octave_Inf; +float octave_Float_Inf; /* Octave's idea of a missing value. */ double octave_NA; +float octave_Float_NA; /* Octave's idea of not a number. */ double octave_NaN; +float octave_Float_NaN; int lo_ieee_hw; int lo_ieee_lw; @@ -82,13 +85,25 @@ #if defined (SCO) int -isnan (double x) +__isnan (double x) { return (IsNANorINF (x) && NaN (x) && ! IsINF (x)) ? 1 : 0; } int -isinf (double x) +__isinf (double x) +{ + return (IsNANorINF (x) && IsINF (x)) ? 1 : 0; +} + +int +__isnanf (float x) +{ + return (IsNANorINF (x) && NaN (x) && ! IsINF (x)) ? 1 : 0; +} + +int +__isinff (float x) { return (IsNANorINF (x) && IsINF (x)) ? 1 : 0; } @@ -96,7 +111,7 @@ #endif int -lo_ieee_isnan (double x) +__lo_ieee_isnan (double x) { #if defined (HAVE_ISNAN) return isnan (x); @@ -106,31 +121,31 @@ } int -lo_ieee_finite (double x) +__lo_ieee_finite (double x) { #if defined (HAVE_FINITE) - return finite (x) != 0 && ! lo_ieee_isnan (x); + return finite (x) != 0 && ! __lo_ieee_isnan (x); #elif defined (HAVE_ISINF) - return (! isinf (x) && ! lo_ieee_isnan (x)); + return (! isinf (x) && ! __lo_ieee_isnan (x)); #else - return ! lo_ieee_isnan (x); + return ! __lo_ieee_isnan (x); #endif } int -lo_ieee_isinf (double x) +__lo_ieee_isinf (double x) { #if defined (HAVE_ISINF) return isinf (x); #elif defined (HAVE_FINITE) - return (! (finite (x) || lo_ieee_isnan (x))); + return (! (finite (x) || __lo_ieee_isnan (x))); #else return 0; #endif } int -lo_ieee_is_NA (double x) +__lo_ieee_is_NA (double x) { #if defined (HAVE_ISNAN) lo_ieee_double t; @@ -142,9 +157,9 @@ } int -lo_ieee_is_NaN_or_NA (double x) +__lo_ieee_is_NaN_or_NA (double x) { - return lo_ieee_isnan (x); + return __lo_ieee_isnan (x); } double @@ -170,7 +185,101 @@ #endif int -lo_ieee_signbit (double x) +__lo_ieee_signbit (double x) +{ +/* In the following definitions, only check x < 0 explicitly to avoid + a function call when it looks like signbit or copysign are actually + functions. */ + +#if defined (signbit) + return signbit (x); +#elif defined (HAVE_SIGNBIT) + return (x < 0 || signbit (x)); +#elif defined (copysign) + return (copysign (1.0, x) < 0); +#elif defined (HAVE_COPYSIGN) + return (x < 0 || copysign (1.0, x) < 0); +#else + return x < 0; +#endif +} + +int +__lo_ieee_float_isnan (float x) +{ +#if defined (HAVE_ISNAN) + return isnan (x); +#else + return 0; +#endif +} + +int +__lo_ieee_float_finite (float x) +{ +#if defined (HAVE_FINITE) + return finite (x) != 0 && ! __lo_ieee_float_isnan (x); +#elif defined (HAVE_ISINF) + return (! isinf (x) && ! __lo_ieee_float_isnan (x)); +#else + return ! __lo_ieee_float_isnan (x); +#endif +} + +int +__lo_ieee_float_isinf (float x) +{ +#if defined (HAVE_ISINF) + return isinf (x); +#elif defined (HAVE_FINITE) + return (! (finite (x) || __lo_ieee_float_isnan (x))); +#else + return 0; +#endif +} + +int +__lo_ieee_float_is_NA (float x) +{ +#if defined (HAVE_ISNAN) + lo_ieee_float t; + t.value = x; + return (isnan (x) && (t.word & 0xFFFF) == LO_IEEE_NA_FLOAT_LW) ? 1 : 0; +#else + return 0; +#endif +} + +int +__lo_ieee_float_is_NaN_or_NA (float x) +{ + return __lo_ieee_float_isnan (x); +} + +float +lo_ieee_float_inf_value (void) +{ + return octave_Inf; +} + +float +lo_ieee_float_na_value (void) +{ + return octave_NA; +} + +float +lo_ieee_float_nan_value (void) +{ + return octave_NaN; +} + +#if ! (defined (signbit) || defined (HAVE_DECL_SIGNBIT)) && defined (HAVE_SIGNBIT) +extern int signbit (float); +#endif + +int +__lo_ieee_float_signbit (float x) { /* In the following definitions, only check x < 0 explicitly to avoid a function call when it looks like signbit or copysign are actually diff -r 45f5faba05a2 -r 82be108cc558 liboctave/lo-ieee.cc --- a/liboctave/lo-ieee.cc Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/lo-ieee.cc Sun Apr 27 22:34:17 2008 +0200 @@ -59,6 +59,7 @@ // correctly. octave_Inf = octave_NaN = octave_NA = DBL_MAX; + octave_Float_Inf = octave_Float_NaN = octave_Float_NA = FLT_MAX; oct_mach_info::float_format ff = oct_mach_info::native_float_format (); @@ -116,6 +117,27 @@ t.word[lo_ieee_lw] = LO_IEEE_NA_LW; octave_NA = t.value; + + volatile float float_tmp_inf; + +#if defined (SCO) + volatile float float_tmp = 1.0; + float_tmp_inf = 1.0 / (float_tmp - float_tmp); +#else + float float_tmp = 1e+10; + float_tmp_inf = float_tmp; + for (;;) + { + float_tmp_inf *= 1e+10; + if (float_tmp_inf == float_tmp) + break; + float_tmp = float_tmp_inf; + } +#endif + + octave_Float_NaN = float_tmp_inf / float_tmp_inf; + octave_Float_Inf = float_tmp_inf; + octave_Float_NA = LO_IEEE_NA_FLOAT; } break; diff -r 45f5faba05a2 -r 82be108cc558 liboctave/lo-ieee.h --- a/liboctave/lo-ieee.h Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/lo-ieee.h Sun Apr 27 22:34:17 2008 +0200 @@ -37,6 +37,15 @@ /* Octave's idea of not a number. */ extern OCTAVE_API double octave_NaN; +/* Octave's idea of infinity. */ +extern OCTAVE_API float octave_Float_Inf; + +/* Octave's idea of a missing value. */ +extern OCTAVE_API float octave_Float_NA; + +/* Octave's idea of not a number. */ +extern OCTAVE_API float octave_Float_NaN; + /* FIXME -- this code assumes that a double has twice the number of bits as an int */ @@ -49,33 +58,74 @@ unsigned int word[2]; } lo_ieee_double; +typedef union +{ + double value; + unsigned int word; +} lo_ieee_float; + #define LO_IEEE_NA_HW 0x7ff00000 #define LO_IEEE_NA_LW 1954 +#define LO_IEEE_NA_FLOAT 0x7ff007a2 +#define LO_IEEE_NA_FLOAT_LW 0x07a2 extern OCTAVE_API void octave_ieee_init (void); #if defined (SCO) -extern int isnan (double); -extern int isinf (double); +extern int __isnan (double); +extern int __isinf (double); +extern int __isnanf (float); +extern int __isinff (float); + +#define isnan(x) (sizeof (x) == sizeof (float) ? __isnanf (x) : __isnan (x)) +#define isinf(x) (sizeof (x) == sizeof (float) ? __isinff (x) : __isinf (x)) #endif -extern OCTAVE_API int lo_ieee_isnan (double x); -extern OCTAVE_API int lo_ieee_finite (double x); -extern OCTAVE_API int lo_ieee_isinf (double x); +extern OCTAVE_API int __lo_ieee_isnan (double x); +extern OCTAVE_API int __lo_ieee_finite (double x); +extern OCTAVE_API int __lo_ieee_isinf (double x); -extern OCTAVE_API int lo_ieee_is_NA (double); -extern OCTAVE_API int lo_ieee_is_NaN_or_NA (double) GCC_ATTR_DEPRECATED; +extern OCTAVE_API int __lo_ieee_is_NA (double); +extern OCTAVE_API int __lo_ieee_is_NaN_or_NA (double) GCC_ATTR_DEPRECATED; extern OCTAVE_API double lo_ieee_inf_value (void); extern OCTAVE_API double lo_ieee_na_value (void); extern OCTAVE_API double lo_ieee_nan_value (void); -extern OCTAVE_API int lo_ieee_signbit (double); +extern OCTAVE_API int __lo_ieee_signbit (double); + +extern OCTAVE_API int __lo_ieee_float_isnan (float x); +extern OCTAVE_API int __lo_ieee_float_finite (float x); +extern OCTAVE_API int __lo_ieee_float_isinf (float x); + +extern OCTAVE_API int __lo_ieee_float_is_NA (float); +extern OCTAVE_API int __lo_ieee_float_is_NaN_or_NA (float) GCC_ATTR_DEPRECATED; + +extern OCTAVE_API float lo_ieee_float_inf_value (void); +extern OCTAVE_API float lo_ieee_float_na_value (void); +extern OCTAVE_API float lo_ieee_float_nan_value (void); + +extern OCTAVE_API int __lo_ieee_float_signbit (float); #ifdef __cplusplus } #endif +#define lo_ieee_isnan(x) (sizeof (x) == sizeof (float) ? \ + __lo_ieee_float_isnan (x) : __lo_ieee_isnan (x)) +#define lo_ieee_finite(x) (sizeof (x) == sizeof (float) ? \ + __lo_ieee_float_finite (x) : __lo_ieee_finite (x)) +#define lo_ieee_isinf(x) (sizeof (x) == sizeof (float) ? \ + __lo_ieee_float_isinf (x) : __lo_ieee_isinf (x)) + + +#define lo_ieee_is_NA(x) (sizeof (x) == sizeof (float) ? \ + __lo_ieee_float_is_NA (x) : __lo_ieee_is_NA (x)) +#define lo_ieee_is_NaN_or_NA(x) (sizeof (x) == sizeof (float) ? \ + __lo_ieee_float_is_NaN_or_NA (x) : __lo_ieee_is_NaN_or_NA (x)) +#define lo_ieee_signbit(x) (sizeof (x) == sizeof (float) ? \ + __lo_ieee_float_signbit (x) : __lo_ieee_signbit (x)) + #endif /* diff -r 45f5faba05a2 -r 82be108cc558 liboctave/lo-mappers.cc --- a/liboctave/lo-mappers.cc Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/lo-mappers.cc Sun Apr 27 22:34:17 2008 +0200 @@ -396,6 +396,366 @@ return abs (x) >= abs (y) ? x : (xisnan (x) ? x : y); } + +// float -> float mappers. + +float +arg (float x) +{ + return atan2 (0.0, x); +} + +float +conj (float x) +{ + return x; +} + +float +fix (float x) +{ + return x > 0 ? floor (x) : ceil (x); +} + +float +imag (float) +{ + return 0.0; +} + +float +real (float x) +{ + return x; +} + +float +xround (float x) +{ +#if defined (HAVE_ROUND) + return round (x); +#else + if (x >= 0) + { + float y = floor (x); + + if ((x - y) >= 0.5) + y += 1.0; + + return y; + } + else + { + float y = ceil (x); + + if ((y - x) >= 0.5) + y -= 1.0; + + return y; + } +#endif +} + +float +xtrunc (float x) +{ +#if defined (HAVE_TRUNC) + return trunc (x); +#else + return x > 0 ? floor (x) : ceil (x); +#endif +} + +float +xroundb (float x) +{ + float t = xround (x); + + if (fabs (x - t) == 0.5) + t = 2 * xtrunc (0.5 * t); + + return t; +} + +float +signum (float x) +{ + float tmp = 0.0; + + if (x < 0.0) + tmp = -1.0; + else if (x > 0.0) + tmp = 1.0; + + return xisnan (x) ? octave_Float_NaN : tmp; +} + +float +xlog2 (float x) +{ +#if defined (HAVE_LOG2) + return log2 (x); +#else +#if defined (M_LN2) + static float ln2 = M_LN2; +#else + static float ln2 = log2 (2); +#endif + + return log (x) / ln2; +#endif +} + +FloatComplex +xlog2 (const FloatComplex& x) +{ +#if defined (M_LN2) + static float ln2 = M_LN2; +#else + static float ln2 = log (2); +#endif + + return std::log (x) / ln2; +} + +float +xexp2 (float x) +{ +#if defined (HAVE_EXP2) + return exp2 (x); +#else +#if defined (M_LN2) + static float ln2 = M_LN2; +#else + static float ln2 = log2 (2); +#endif + + return exp (x * ln2); +#endif +} + +float +xlog2 (float x, int& exp) +{ + return frexpf (x, &exp); +} + +FloatComplex +xlog2 (const FloatComplex& x, int& exp) +{ + float ax = std::abs (x); + float lax = xlog2 (ax, exp); + return (exp == 0) ? x : (x / ax) * lax; +} + +// float -> bool mappers. + +bool +xisnan (float x) +{ + return lo_ieee_isnan (x); +} + +bool +xfinite (float x) +{ + return lo_ieee_finite (x); +} + +bool +xisinf (float x) +{ + return lo_ieee_isinf (x); +} + +bool +octave_is_NA (float x) +{ + return lo_ieee_is_NA (x); +} + +bool +octave_is_NaN_or_NA (float x) +{ + return lo_ieee_isnan (x); +} + +// (float, float) -> float mappers. + +// FIXME -- need to handle NA too? + +float +xmin (float x, float y) +{ + if (x < y) + return x; + + if (y <= x) + return y; + + if (xisnan (x) && ! xisnan (y)) + return y; + else if (xisnan (y) && ! xisnan (x)) + return x; + else if (octave_is_NA (x) || octave_is_NA (y)) + return octave_Float_NA; + else + return octave_Float_NaN; +} + +float +xmax (float x, float y) +{ + if (x > y) + return x; + + if (y >= x) + return y; + + if (xisnan (x) && ! xisnan (y)) + return y; + else if (xisnan (y) && ! xisnan (x)) + return x; + else if (octave_is_NA (x) || octave_is_NA (y)) + return octave_Float_NA; + else + return octave_Float_NaN; +} + +// complex -> complex mappers. + +FloatComplex +acos (const FloatComplex& x) +{ + static FloatComplex i (0, 1); + + return -i * (log (x + i * (sqrt (static_cast(1.0) - x*x)))); +} + +FloatComplex +acosh (const FloatComplex& x) +{ + return log (x + sqrt (x*x - static_cast(1.0))); +} + +FloatComplex +asin (const FloatComplex& x) +{ + static FloatComplex i (0, 1); + + return -i * log (i*x + sqrt (static_cast(1.0) - x*x)); +} + +FloatComplex +asinh (const FloatComplex& x) +{ + return log (x + sqrt (x*x + static_cast(1.0))); +} + +FloatComplex +atan (const FloatComplex& x) +{ + static FloatComplex i (0, 1); + + return i * log ((i + x) / (i - x)) / static_cast(2.0); +} + +FloatComplex +atanh (const FloatComplex& x) +{ + return log ((static_cast(1.0) + x) / (static_cast(1.0) - x)) / static_cast(2.0); +} + +FloatComplex +ceil (const FloatComplex& x) +{ + return FloatComplex (ceil (real (x)), ceil (imag (x))); +} + +FloatComplex +fix (const FloatComplex& x) +{ + return FloatComplex (fix (real (x)), fix (imag (x))); +} + +FloatComplex +floor (const FloatComplex& x) +{ + return FloatComplex (floor (real (x)), floor (imag (x))); +} + +FloatComplex +xround (const FloatComplex& x) +{ + return FloatComplex (xround (real (x)), xround (imag (x))); +} + +FloatComplex +xroundb (const FloatComplex& x) +{ + return FloatComplex (xroundb (real (x)), xroundb (imag (x))); +} + +FloatComplex +signum (const FloatComplex& x) +{ + float tmp = abs (x); + + return tmp == 0 ? 0.0 : x / tmp; +} + +// complex -> bool mappers. + +bool +xisnan (const FloatComplex& x) +{ + return (xisnan (real (x)) || xisnan (imag (x))); +} + +bool +xfinite (const FloatComplex& x) +{ + float rx = real (x); + float ix = imag (x); + + return (xfinite (rx) && ! xisnan (rx) + && xfinite (ix) && ! xisnan (ix)); +} + +bool +xisinf (const FloatComplex& x) +{ + return (xisinf (real (x)) || xisinf (imag (x))); +} + +bool +octave_is_NA (const FloatComplex& x) +{ + return (octave_is_NA (real (x)) || octave_is_NA (imag (x))); +} + +bool +octave_is_NaN_or_NA (const FloatComplex& x) +{ + return (xisnan (real (x)) || xisnan (imag (x))); +} + +// (complex, complex) -> complex mappers. + +// FIXME -- need to handle NA too? + +FloatComplex +xmin (const FloatComplex& x, const FloatComplex& y) +{ + return abs (x) <= abs (y) ? x : (xisnan (x) ? x : y); +} + +FloatComplex +xmax (const FloatComplex& x, const FloatComplex& y) +{ + return abs (x) >= abs (y) ? x : (xisnan (x) ? x : y); +} + /* ;;; Local Variables: *** ;;; mode: C++ *** diff -r 45f5faba05a2 -r 82be108cc558 liboctave/lo-mappers.h --- a/liboctave/lo-mappers.h Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/lo-mappers.h Sun Apr 27 22:34:17 2008 +0200 @@ -26,6 +26,7 @@ #include "oct-cmplx.h" +// Double Precision extern OCTAVE_API double arg (double x); extern OCTAVE_API double conj (double x); extern OCTAVE_API double fix (double x); @@ -75,6 +76,56 @@ extern OCTAVE_API Complex xmin (const Complex& x, const Complex& y); extern OCTAVE_API Complex xmax (const Complex& x, const Complex& y); +// Single Precision +extern OCTAVE_API float arg (float x); +extern OCTAVE_API float conj (float x); +extern OCTAVE_API float fix (float x); +extern OCTAVE_API float imag (float x); +extern OCTAVE_API float real (float x); +extern OCTAVE_API float xround (float x); +extern OCTAVE_API float xroundb (float x); +extern OCTAVE_API float signum (float x); +extern OCTAVE_API float xtrunc (float x); +extern OCTAVE_API float xlog2 (float x); +extern OCTAVE_API FloatComplex xlog2 (const FloatComplex& x); +extern OCTAVE_API float xlog2 (float x, int& exp); +extern OCTAVE_API FloatComplex xlog2 (const FloatComplex& x, int& exp); +extern OCTAVE_API float xexp2 (float x); + +extern OCTAVE_API bool xisnan (float x); +extern OCTAVE_API bool xfinite (float x); +extern OCTAVE_API bool xisinf (float x); + +extern OCTAVE_API bool octave_is_NA (float x); +extern OCTAVE_API bool octave_is_NaN_or_NA (float x) GCC_ATTR_DEPRECATED; + +extern OCTAVE_API float xmin (float x, float y); +extern OCTAVE_API float xmax (float x, float y); + +extern OCTAVE_API FloatComplex acos (const FloatComplex& x); +extern OCTAVE_API FloatComplex acosh (const FloatComplex& x); +extern OCTAVE_API FloatComplex asin (const FloatComplex& x); +extern OCTAVE_API FloatComplex asinh (const FloatComplex& x); +extern OCTAVE_API FloatComplex atan (const FloatComplex& x); +extern OCTAVE_API FloatComplex atanh (const FloatComplex& x); + +extern OCTAVE_API FloatComplex ceil (const FloatComplex& x); +extern OCTAVE_API FloatComplex fix (const FloatComplex& x); +extern OCTAVE_API FloatComplex floor (const FloatComplex& x); +extern OCTAVE_API FloatComplex xround (const FloatComplex& x); +extern OCTAVE_API FloatComplex xroundb (const FloatComplex& x); +extern OCTAVE_API FloatComplex signum (const FloatComplex& x); + +extern OCTAVE_API bool xisnan (const FloatComplex& x); +extern OCTAVE_API bool xfinite (const FloatComplex& x); +extern OCTAVE_API bool xisinf (const FloatComplex& x); + +extern OCTAVE_API bool octave_is_NA (const FloatComplex& x); +extern OCTAVE_API bool octave_is_NaN_or_NA (const FloatComplex& x); + +extern OCTAVE_API FloatComplex xmin (const FloatComplex& x, const FloatComplex& y); +extern OCTAVE_API FloatComplex xmax (const FloatComplex& x, const FloatComplex& y); + #endif /* diff -r 45f5faba05a2 -r 82be108cc558 liboctave/lo-specfun.cc --- a/liboctave/lo-specfun.cc Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/lo-specfun.cc Sun Apr 27 22:34:17 2008 +0200 @@ -32,6 +32,12 @@ #include "dMatrix.h" #include "dNDArray.h" #include "CNDArray.h" +#include "fCColVector.h" +#include "fCMatrix.h" +#include "fRowVector.h" +#include "fMatrix.h" +#include "fNDArray.h" +#include "fCNDArray.h" #include "f77-fcn.h" #include "lo-error.h" #include "lo-ieee.h" @@ -71,40 +77,101 @@ double*, octave_idx_type&, octave_idx_type&); F77_RET_T + F77_FUNC (cbesj, cBESJ) (const float&, const float&, const float&, + const octave_idx_type&, const octave_idx_type&, float*, float*, + octave_idx_type&, octave_idx_type&); + + F77_RET_T + F77_FUNC (cbesy, CBESY) (const float&, const float&, const float&, + const octave_idx_type&, const octave_idx_type&, float*, float*, + octave_idx_type&, float*, float*, octave_idx_type&); + + F77_RET_T + F77_FUNC (cbesi, CBESI) (const float&, const float&, const float&, + const octave_idx_type&, const octave_idx_type&, float*, float*, + octave_idx_type&, octave_idx_type&); + + F77_RET_T + F77_FUNC (cbesk, CBESK) (const float&, const float&, const float&, + const octave_idx_type&, const octave_idx_type&, float*, float*, + octave_idx_type&, octave_idx_type&); + + F77_RET_T + F77_FUNC (cbesh, CBESH) (const float&, const float&, const float&, + const octave_idx_type&, const octave_idx_type&, const octave_idx_type&, float*, + float*, octave_idx_type&, octave_idx_type&); + + F77_RET_T F77_FUNC (zairy, ZAIRY) (const double&, const double&, const octave_idx_type&, const octave_idx_type&, double&, double&, octave_idx_type&, octave_idx_type&); F77_RET_T + F77_FUNC (cairy, CAIRY) (const float&, const float&, const octave_idx_type&, + const octave_idx_type&, float&, float&, octave_idx_type&, octave_idx_type&); + + F77_RET_T F77_FUNC (zbiry, ZBIRY) (const double&, const double&, const octave_idx_type&, const octave_idx_type&, double&, double&, octave_idx_type&); F77_RET_T + F77_FUNC (cbiry, CBIRY) (const float&, const float&, const octave_idx_type&, + const octave_idx_type&, float&, float&, octave_idx_type&); + + F77_RET_T F77_FUNC (xdacosh, XDACOSH) (const double&, double&); F77_RET_T + F77_FUNC (xacosh, XACOSH) (const float&, float&); + + F77_RET_T F77_FUNC (xdasinh, XDASINH) (const double&, double&); F77_RET_T + F77_FUNC (xasinh, XASINH) (const float&, float&); + + F77_RET_T F77_FUNC (xdatanh, XDATANH) (const double&, double&); F77_RET_T + F77_FUNC (xatanh, XATANH) (const float&, float&); + + F77_RET_T F77_FUNC (xderf, XDERF) (const double&, double&); F77_RET_T + F77_FUNC (xerf, XERF) (const float&, float&); + + F77_RET_T F77_FUNC (xderfc, XDERFC) (const double&, double&); F77_RET_T + F77_FUNC (xerfc, XERFC) (const float&, float&); + + F77_RET_T F77_FUNC (xdbetai, XDBETAI) (const double&, const double&, const double&, double&); F77_RET_T + F77_FUNC (xbetai, XBETAI) (const float&, const float&, + const float&, float&); + + F77_RET_T F77_FUNC (xdgamma, XDGAMMA) (const double&, double&); F77_RET_T + F77_FUNC (xgamma, XGAMMA) (const float&, float&); + + F77_RET_T F77_FUNC (xgammainc, XGAMMAINC) (const double&, const double&, double&); F77_RET_T + F77_FUNC (xsgammainc, XSGAMMAINC) (const float&, const float&, float&); + + F77_RET_T F77_FUNC (dlgams, DLGAMS) (const double&, double&, double&); + + F77_RET_T + F77_FUNC (algams, ALGAMS) (const float&, float&, float&); } #if !defined (HAVE_ACOSH) @@ -117,6 +184,16 @@ } #endif +#if !defined (HAVE_ACOSHF) +float +acoshf (float x) +{ + float retval; + F77_XFCN (xacosh, XACOSH, (x, retval)); + return retval; +} +#endif + #if !defined (HAVE_ASINH) double asinh (double x) @@ -127,6 +204,16 @@ } #endif +#if !defined (HAVE_ASINHF) +float +asinhf (float x) +{ + float retval; + F77_XFCN (xasinh, XASINH, (x, retval)); + return retval; +} +#endif + #if !defined (HAVE_ATANH) double atanh (double x) @@ -147,6 +234,16 @@ } #endif +#if !defined (HAVE_ERFF) +float +erf (float x) +{ + float retval; + F77_XFCN (xerf, XERF, (x, retval)); + return retval; +} +#endif + #if !defined (HAVE_ERFC) double erfc (double x) @@ -157,6 +254,16 @@ } #endif +#if !defined (HAVE_ERFCF) +float +erfc (float x) +{ + float retval; + F77_XFCN (xerfc, XERFC, (x, retval)); + return retval; +} +#endif + double xgamma (double x) { @@ -224,6 +331,73 @@ return result; } +float +xgamma (float x) +{ +#if defined (HAVE_TGAMMAF) + return tgammaf (x); +#else + float result; + + if (xisnan (x)) + result = x; + else if ((x <= 0 && D_NINT (x) == x) || xisinf (x)) + result = octave_Float_Inf; + else + F77_XFCN (xgamma, XGAMMA, (x, result)); + + return result; +#endif +} + +float +xlgamma (float x) +{ +#if defined (HAVE_LGAMMAF) + return lgammaf (x); +#else + float result; + float sgngam; + + if (xisnan (x)) + result = x; + else if (xisinf (x)) + result = octave_Float_Inf; + else + F77_XFCN (algams, ALGAMS, (x, result, sgngam)); + + return result; +#endif +} + +FloatComplex +xlgamma (const FloatComplex& xc) +{ + // Can only be called with a real value of x. + float x = xc.real (); + float result; + +#if defined (HAVE_LGAMMAF_R) + int sgngam; + result = lgammaf_r (x, &sgngam); +#else + float sgngam; + + if (xisnan (x)) + result = x; + else if (xisinf (x)) + result = octave_Float_Inf; + else + F77_XFCN (algams, ALGAMS, (x, result, sgngam)); + +#endif + + if (sgngam < 0) + return result + FloatComplex (0., M_PI); + else + return result; +} + #if !defined (HAVE_EXPM1) double expm1 (double x) @@ -279,6 +453,61 @@ return retval; } +#if !defined (HAVE_EXPM1F) +float +expm1f (float x) +{ + float retval; + + float ax = fabs (x); + + if (ax < 0.1) + { + ax /= 16; + + // use Taylor series to calculate exp(x)-1. + float t = ax; + float s = 0; + for (int i = 2; i < 7; i++) + s += (t *= ax/i); + s += ax; + + // use the identity (a+1)^2-1 = a*(a+2) + float e = s; + for (int i = 0; i < 4; i++) + { + s *= e + 2; + e *= e + 2; + } + + retval = (x > 0) ? s : -s / (1+s); + } + else + retval = exp (x) - 1; + + return retval; +} +#endif + +FloatComplex +expm1f(const FloatComplex& x) +{ + FloatComplex retval; + + if (std:: abs (x) < 1) + { + float im = x.imag(); + float u = expm1 (x.real ()); + float v = sin (im/2); + v = -2*v*v; + retval = FloatComplex (u*v + u + v, (u+1) * sin (im)); + } + else + retval = std::exp (x) - FloatComplex (1); + + return retval; +} + #if !defined (HAVE_LOG1P) double log1p (double x) @@ -322,6 +551,49 @@ return retval; } +#if !defined (HAVE_LOG1PF) +float +log1pf (float x) +{ + float retval; + + float ax = fabs (x); + + if (ax < 0.2) + { + // use approximation log (1+x) ~ 2*sum ((x/(2+x)).^ii ./ ii), ii = 1:2:2n+1 + float u = x / (2 + x), t = 1, s = 0; + for (int i = 2; i < 12; i += 2) + s += (t *= u*u) / (i+1); + + retval = 2 * (s + 1) * u; + } + else + retval = log (1 + x); + + return retval; +} +#endif + +FloatComplex +log1pf (const FloatComplex& x) +{ + FloatComplex retval; + + float r = x.real (), i = x.imag(); + + if (fabs (r) < 0.5 && fabs (i) < 0.5) + { + float u = 2*r + r*r + i*i; + retval = FloatComplex (log1p (u / (1+sqrt (u+1))), + atan2 (1 + r, i)); + } + else + retval = std::log (FloatComplex(1) + x); + + return retval; +} + static inline Complex zbesj (const Complex& z, double alpha, int kode, octave_idx_type& ierr); @@ -696,10 +968,10 @@ return retval; } -typedef Complex (*fptr) (const Complex&, double, int, octave_idx_type&); +typedef Complex (*dptr) (const Complex&, double, int, octave_idx_type&); static inline Complex -do_bessel (fptr f, const char *, double alpha, const Complex& x, +do_bessel (dptr f, const char *, double alpha, const Complex& x, bool scaled, octave_idx_type& ierr) { Complex retval; @@ -710,7 +982,7 @@ } static inline ComplexMatrix -do_bessel (fptr f, const char *, double alpha, const ComplexMatrix& x, +do_bessel (dptr f, const char *, double alpha, const ComplexMatrix& x, bool scaled, Array2& ierr) { octave_idx_type nr = x.rows (); @@ -728,7 +1000,7 @@ } static inline ComplexMatrix -do_bessel (fptr f, const char *, const Matrix& alpha, const Complex& x, +do_bessel (dptr f, const char *, const Matrix& alpha, const Complex& x, bool scaled, Array2& ierr) { octave_idx_type nr = alpha.rows (); @@ -746,7 +1018,7 @@ } static inline ComplexMatrix -do_bessel (fptr f, const char *fn, const Matrix& alpha, +do_bessel (dptr f, const char *fn, const Matrix& alpha, const ComplexMatrix& x, bool scaled, Array2& ierr) { ComplexMatrix retval; @@ -778,7 +1050,7 @@ } static inline ComplexNDArray -do_bessel (fptr f, const char *, double alpha, const ComplexNDArray& x, +do_bessel (dptr f, const char *, double alpha, const ComplexNDArray& x, bool scaled, ArrayN& ierr) { dim_vector dv = x.dims (); @@ -794,7 +1066,7 @@ } static inline ComplexNDArray -do_bessel (fptr f, const char *, const NDArray& alpha, const Complex& x, +do_bessel (dptr f, const char *, const NDArray& alpha, const Complex& x, bool scaled, ArrayN& ierr) { dim_vector dv = alpha.dims (); @@ -810,7 +1082,7 @@ } static inline ComplexNDArray -do_bessel (fptr f, const char *fn, const NDArray& alpha, +do_bessel (dptr f, const char *fn, const NDArray& alpha, const ComplexNDArray& x, bool scaled, ArrayN& ierr) { dim_vector dv = x.dims (); @@ -834,7 +1106,7 @@ } static inline ComplexMatrix -do_bessel (fptr f, const char *, const RowVector& alpha, +do_bessel (dptr f, const char *, const RowVector& alpha, const ComplexColumnVector& x, bool scaled, Array2& ierr) { octave_idx_type nr = x.length (); @@ -931,6 +1203,635 @@ ALL_BESSEL (besselh1, zbesh1) ALL_BESSEL (besselh2, zbesh2) +#undef ALL_BESSEL +#undef SS_BESSEL +#undef SM_BESSEL +#undef MS_BESSEL +#undef MM_BESSEL +#undef SN_BESSEL +#undef NS_BESSEL +#undef NN_BESSEL +#undef RC_BESSEL + +static inline FloatComplex +cbesj (const FloatComplex& z, float alpha, int kode, octave_idx_type& ierr); + +static inline FloatComplex +cbesy (const FloatComplex& z, float alpha, int kode, octave_idx_type& ierr); + +static inline FloatComplex +cbesi (const FloatComplex& z, float alpha, int kode, octave_idx_type& ierr); + +static inline FloatComplex +cbesk (const FloatComplex& z, float alpha, int kode, octave_idx_type& ierr); + +static inline FloatComplex +cbesh1 (const FloatComplex& z, float alpha, int kode, octave_idx_type& ierr); + +static inline FloatComplex +cbesh2 (const FloatComplex& z, float alpha, int kode, octave_idx_type& ierr); + +static inline FloatComplex +bessel_return_value (const FloatComplex& val, octave_idx_type ierr) +{ + static const FloatComplex inf_val = FloatComplex (octave_Float_Inf, octave_Float_Inf); + static const FloatComplex nan_val = FloatComplex (octave_Float_NaN, octave_Float_NaN); + + FloatComplex retval; + + switch (ierr) + { + case 0: + case 3: + retval = val; + break; + + case 2: + retval = inf_val; + break; + + default: + retval = nan_val; + break; + } + + return retval; +} + +static inline bool +is_integer_value (float x) +{ + return x == static_cast (static_cast (x)); +} + +static inline FloatComplex +cbesj (const FloatComplex& z, float alpha, int kode, octave_idx_type& ierr) +{ + FloatComplex retval; + + if (alpha >= 0.0) + { + float yr = 0.0; + float yi = 0.0; + + octave_idx_type nz; + + float zr = z.real (); + float zi = z.imag (); + + F77_FUNC (cbesj, CBESJ) (zr, zi, alpha, 2, 1, &yr, &yi, nz, ierr); + + if (kode != 2) + { + float expz = exp (std::abs (zi)); + yr *= expz; + yi *= expz; + } + + if (zi == 0.0 && zr >= 0.0) + yi = 0.0; + + retval = bessel_return_value (FloatComplex (yr, yi), ierr); + } + else if (is_integer_value (alpha)) + { + // zbesy can overflow as z->0, and cause troubles for generic case below + alpha = -alpha; + FloatComplex tmp = cbesj (z, alpha, kode, ierr); + if ((static_cast (alpha)) & 1) + tmp = - tmp; + retval = bessel_return_value (tmp, ierr); + } + else + { + alpha = -alpha; + + FloatComplex tmp = cosf (static_cast (M_PI) * alpha) * cbesj (z, alpha, kode, ierr); + + if (ierr == 0 || ierr == 3) + { + tmp -= sinf (static_cast (M_PI) * alpha) * cbesy (z, alpha, kode, ierr); + + retval = bessel_return_value (tmp, ierr); + } + else + retval = FloatComplex (octave_Float_NaN, octave_Float_NaN); + } + + return retval; +} + +static inline FloatComplex +cbesy (const FloatComplex& z, float alpha, int kode, octave_idx_type& ierr) +{ + FloatComplex retval; + + if (alpha >= 0.0) + { + float yr = 0.0; + float yi = 0.0; + + octave_idx_type nz; + + float wr, wi; + + float zr = z.real (); + float zi = z.imag (); + + ierr = 0; + + if (zr == 0.0 && zi == 0.0) + { + yr = -octave_Float_Inf; + yi = 0.0; + } + else + { + F77_FUNC (cbesy, CBESY) (zr, zi, alpha, 2, 1, &yr, &yi, nz, + &wr, &wi, ierr); + + if (kode != 2) + { + float expz = exp (std::abs (zi)); + yr *= expz; + yi *= expz; + } + + if (zi == 0.0 && zr >= 0.0) + yi = 0.0; + } + + return bessel_return_value (FloatComplex (yr, yi), ierr); + } + else if (is_integer_value (alpha - 0.5)) + { + // zbesy can overflow as z->0, and cause troubles for generic case below + alpha = -alpha; + FloatComplex tmp = cbesj (z, alpha, kode, ierr); + if ((static_cast (alpha - 0.5)) & 1) + tmp = - tmp; + retval = bessel_return_value (tmp, ierr); + } + else + { + alpha = -alpha; + + FloatComplex tmp = cosf (static_cast (M_PI) * alpha) * cbesy (z, alpha, kode, ierr); + + if (ierr == 0 || ierr == 3) + { + tmp += sinf (static_cast (M_PI) * alpha) * cbesj (z, alpha, kode, ierr); + + retval = bessel_return_value (tmp, ierr); + } + else + retval = FloatComplex (octave_Float_NaN, octave_Float_NaN); + } + + return retval; +} + +static inline FloatComplex +cbesi (const FloatComplex& z, float alpha, int kode, octave_idx_type& ierr) +{ + FloatComplex retval; + + if (alpha >= 0.0) + { + float yr = 0.0; + float yi = 0.0; + + octave_idx_type nz; + + float zr = z.real (); + float zi = z.imag (); + + F77_FUNC (cbesi, CBESI) (zr, zi, alpha, 2, 1, &yr, &yi, nz, ierr); + + if (kode != 2) + { + float expz = exp (std::abs (zr)); + yr *= expz; + yi *= expz; + } + + if (zi == 0.0 && zr >= 0.0) + yi = 0.0; + + retval = bessel_return_value (FloatComplex (yr, yi), ierr); + } + else + { + alpha = -alpha; + + FloatComplex tmp = cbesi (z, alpha, kode, ierr); + + if (ierr == 0 || ierr == 3) + { + tmp += static_cast (2.0 / M_PI) * sinf (static_cast (M_PI) * alpha) + * cbesk (z, alpha, kode, ierr); + + retval = bessel_return_value (tmp, ierr); + } + else + retval = FloatComplex (octave_Float_NaN, octave_Float_NaN); + } + + return retval; +} + +static inline FloatComplex +cbesk (const FloatComplex& z, float alpha, int kode, octave_idx_type& ierr) +{ + FloatComplex retval; + + if (alpha >= 0.0) + { + float yr = 0.0; + float yi = 0.0; + + octave_idx_type nz; + + float zr = z.real (); + float zi = z.imag (); + + ierr = 0; + + if (zr == 0.0 && zi == 0.0) + { + yr = octave_Float_Inf; + yi = 0.0; + } + else + { + F77_FUNC (cbesk, CBESK) (zr, zi, alpha, 2, 1, &yr, &yi, nz, ierr); + + if (kode != 2) + { + FloatComplex expz = exp (-z); + + float rexpz = real (expz); + float iexpz = imag (expz); + + float tmp = yr*rexpz - yi*iexpz; + + yi = yr*iexpz + yi*rexpz; + yr = tmp; + } + + if (zi == 0.0 && zr >= 0.0) + yi = 0.0; + } + + retval = bessel_return_value (FloatComplex (yr, yi), ierr); + } + else + { + FloatComplex tmp = cbesk (z, -alpha, kode, ierr); + + retval = bessel_return_value (tmp, ierr); + } + + return retval; +} + +static inline FloatComplex +cbesh1 (const FloatComplex& z, float alpha, int kode, octave_idx_type& ierr) +{ + FloatComplex retval; + + if (alpha >= 0.0) + { + float yr = 0.0; + float yi = 0.0; + + octave_idx_type nz; + + float zr = z.real (); + float zi = z.imag (); + + F77_FUNC (cbesh, CBESH) (zr, zi, alpha, 2, 1, 1, &yr, &yi, nz, ierr); + + if (kode != 2) + { + FloatComplex expz = exp (FloatComplex (0.0, 1.0) * z); + + float rexpz = real (expz); + float iexpz = imag (expz); + + float tmp = yr*rexpz - yi*iexpz; + + yi = yr*iexpz + yi*rexpz; + yr = tmp; + } + + retval = bessel_return_value (FloatComplex (yr, yi), ierr); + } + else + { + alpha = -alpha; + + static const FloatComplex eye = FloatComplex (0.0, 1.0); + + FloatComplex tmp = exp (static_cast (M_PI) * alpha * eye) * cbesh1 (z, alpha, kode, ierr); + + retval = bessel_return_value (tmp, ierr); + } + + return retval; +} + +static inline FloatComplex +cbesh2 (const FloatComplex& z, float alpha, int kode, octave_idx_type& ierr) +{ + FloatComplex retval; + + if (alpha >= 0.0) + { + float yr = 0.0; + float yi = 0.0; + + octave_idx_type nz; + + float zr = z.real (); + float zi = z.imag (); + + F77_FUNC (cbesh, CBESH) (zr, zi, alpha, 2, 2, 1, &yr, &yi, nz, ierr); + + if (kode != 2) + { + FloatComplex expz = exp (-FloatComplex (0.0, 1.0) * z); + + float rexpz = real (expz); + float iexpz = imag (expz); + + float tmp = yr*rexpz - yi*iexpz; + + yi = yr*iexpz + yi*rexpz; + yr = tmp; + } + + retval = bessel_return_value (FloatComplex (yr, yi), ierr); + } + else + { + alpha = -alpha; + + static const FloatComplex eye = FloatComplex (0.0, 1.0); + + FloatComplex tmp = exp (-static_cast (M_PI) * alpha * eye) * cbesh2 (z, alpha, kode, ierr); + + retval = bessel_return_value (tmp, ierr); + } + + return retval; +} + +typedef FloatComplex (*fptr) (const FloatComplex&, float, int, octave_idx_type&); + +static inline FloatComplex +do_bessel (fptr f, const char *, float alpha, const FloatComplex& x, + bool scaled, octave_idx_type& ierr) +{ + FloatComplex retval; + + retval = f (x, alpha, (scaled ? 2 : 1), ierr); + + return retval; +} + +static inline FloatComplexMatrix +do_bessel (fptr f, const char *, float alpha, const FloatComplexMatrix& x, + bool scaled, Array2& ierr) +{ + octave_idx_type nr = x.rows (); + octave_idx_type nc = x.cols (); + + FloatComplexMatrix retval (nr, nc); + + ierr.resize (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + retval(i,j) = f (x(i,j), alpha, (scaled ? 2 : 1), ierr(i,j)); + + return retval; +} + +static inline FloatComplexMatrix +do_bessel (fptr f, const char *, const FloatMatrix& alpha, const FloatComplex& x, + bool scaled, Array2& ierr) +{ + octave_idx_type nr = alpha.rows (); + octave_idx_type nc = alpha.cols (); + + FloatComplexMatrix retval (nr, nc); + + ierr.resize (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + retval(i,j) = f (x, alpha(i,j), (scaled ? 2 : 1), ierr(i,j)); + + return retval; +} + +static inline FloatComplexMatrix +do_bessel (fptr f, const char *fn, const FloatMatrix& alpha, + const FloatComplexMatrix& x, bool scaled, Array2& ierr) +{ + FloatComplexMatrix retval; + + octave_idx_type x_nr = x.rows (); + octave_idx_type x_nc = x.cols (); + + octave_idx_type alpha_nr = alpha.rows (); + octave_idx_type alpha_nc = alpha.cols (); + + if (x_nr == alpha_nr && x_nc == alpha_nc) + { + octave_idx_type nr = x_nr; + octave_idx_type nc = x_nc; + + retval.resize (nr, nc); + + ierr.resize (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + retval(i,j) = f (x(i,j), alpha(i,j), (scaled ? 2 : 1), ierr(i,j)); + } + else + (*current_liboctave_error_handler) + ("%s: the sizes of alpha and x must conform", fn); + + return retval; +} + +static inline FloatComplexNDArray +do_bessel (fptr f, const char *, float alpha, const FloatComplexNDArray& x, + bool scaled, ArrayN& ierr) +{ + dim_vector dv = x.dims (); + octave_idx_type nel = dv.numel (); + FloatComplexNDArray retval (dv); + + ierr.resize (dv); + + for (octave_idx_type i = 0; i < nel; i++) + retval(i) = f (x(i), alpha, (scaled ? 2 : 1), ierr(i)); + + return retval; +} + +static inline FloatComplexNDArray +do_bessel (fptr f, const char *, const FloatNDArray& alpha, const FloatComplex& x, + bool scaled, ArrayN& ierr) +{ + dim_vector dv = alpha.dims (); + octave_idx_type nel = dv.numel (); + FloatComplexNDArray retval (dv); + + ierr.resize (dv); + + for (octave_idx_type i = 0; i < nel; i++) + retval(i) = f (x, alpha(i), (scaled ? 2 : 1), ierr(i)); + + return retval; +} + +static inline FloatComplexNDArray +do_bessel (fptr f, const char *fn, const FloatNDArray& alpha, + const FloatComplexNDArray& x, bool scaled, ArrayN& ierr) +{ + dim_vector dv = x.dims (); + FloatComplexNDArray retval; + + if (dv == alpha.dims ()) + { + octave_idx_type nel = dv.numel (); + + retval.resize (dv); + ierr.resize (dv); + + for (octave_idx_type i = 0; i < nel; i++) + retval(i) = f (x(i), alpha(i), (scaled ? 2 : 1), ierr(i)); + } + else + (*current_liboctave_error_handler) + ("%s: the sizes of alpha and x must conform", fn); + + return retval; +} + +static inline FloatComplexMatrix +do_bessel (fptr f, const char *, const FloatRowVector& alpha, + const FloatComplexColumnVector& x, bool scaled, Array2& ierr) +{ + octave_idx_type nr = x.length (); + octave_idx_type nc = alpha.length (); + + FloatComplexMatrix retval (nr, nc); + + ierr.resize (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + retval(i,j) = f (x(i), alpha(j), (scaled ? 2 : 1), ierr(i,j)); + + return retval; +} + +#define SS_BESSEL(name, fcn) \ + FloatComplex \ + name (float alpha, const FloatComplex& x, bool scaled, octave_idx_type& ierr) \ + { \ + return do_bessel (fcn, #name, alpha, x, scaled, ierr); \ + } + +#define SM_BESSEL(name, fcn) \ + FloatComplexMatrix \ + name (float alpha, const FloatComplexMatrix& x, bool scaled, \ + Array2& ierr) \ + { \ + return do_bessel (fcn, #name, alpha, x, scaled, ierr); \ + } + +#define MS_BESSEL(name, fcn) \ + FloatComplexMatrix \ + name (const FloatMatrix& alpha, const FloatComplex& x, bool scaled, \ + Array2& ierr) \ + { \ + return do_bessel (fcn, #name, alpha, x, scaled, ierr); \ + } + +#define MM_BESSEL(name, fcn) \ + FloatComplexMatrix \ + name (const FloatMatrix& alpha, const FloatComplexMatrix& x, bool scaled, \ + Array2& ierr) \ + { \ + return do_bessel (fcn, #name, alpha, x, scaled, ierr); \ + } + +#define SN_BESSEL(name, fcn) \ + FloatComplexNDArray \ + name (float alpha, const FloatComplexNDArray& x, bool scaled, \ + ArrayN& ierr) \ + { \ + return do_bessel (fcn, #name, alpha, x, scaled, ierr); \ + } + +#define NS_BESSEL(name, fcn) \ + FloatComplexNDArray \ + name (const FloatNDArray& alpha, const FloatComplex& x, bool scaled, \ + ArrayN& ierr) \ + { \ + return do_bessel (fcn, #name, alpha, x, scaled, ierr); \ + } + +#define NN_BESSEL(name, fcn) \ + FloatComplexNDArray \ + name (const FloatNDArray& alpha, const FloatComplexNDArray& x, bool scaled, \ + ArrayN& ierr) \ + { \ + return do_bessel (fcn, #name, alpha, x, scaled, ierr); \ + } + +#define RC_BESSEL(name, fcn) \ + FloatComplexMatrix \ + name (const FloatRowVector& alpha, const FloatComplexColumnVector& x, bool scaled, \ + Array2& ierr) \ + { \ + return do_bessel (fcn, #name, alpha, x, scaled, ierr); \ + } + +#define ALL_BESSEL(name, fcn) \ + SS_BESSEL (name, fcn) \ + SM_BESSEL (name, fcn) \ + MS_BESSEL (name, fcn) \ + MM_BESSEL (name, fcn) \ + SN_BESSEL (name, fcn) \ + NS_BESSEL (name, fcn) \ + NN_BESSEL (name, fcn) \ + RC_BESSEL (name, fcn) + +ALL_BESSEL (besselj, cbesj) +ALL_BESSEL (bessely, cbesy) +ALL_BESSEL (besseli, cbesi) +ALL_BESSEL (besselk, cbesk) +ALL_BESSEL (besselh1, cbesh1) +ALL_BESSEL (besselh2, cbesh2) + +#undef ALL_BESSEL +#undef SS_BESSEL +#undef SM_BESSEL +#undef MS_BESSEL +#undef MM_BESSEL +#undef SN_BESSEL +#undef NS_BESSEL +#undef NN_BESSEL +#undef RC_BESSEL + Complex airy (const Complex& z, bool deriv, bool scaled, octave_idx_type& ierr) { @@ -1061,6 +1962,136 @@ return retval; } +FloatComplex +airy (const FloatComplex& z, bool deriv, bool scaled, octave_idx_type& ierr) +{ + float ar = 0.0; + float ai = 0.0; + + octave_idx_type nz; + + float zr = z.real (); + float zi = z.imag (); + + octave_idx_type id = deriv ? 1 : 0; + + F77_FUNC (cairy, CAIRY) (zr, zi, id, 2, ar, ai, nz, ierr); + + if (! scaled) + { + FloatComplex expz = exp (- static_cast (2.0 / 3.0) * z * sqrt(z)); + + float rexpz = real (expz); + float iexpz = imag (expz); + + float tmp = ar*rexpz - ai*iexpz; + + ai = ar*iexpz + ai*rexpz; + ar = tmp; + } + + if (zi == 0.0 && (! scaled || zr >= 0.0)) + ai = 0.0; + + return bessel_return_value (FloatComplex (ar, ai), ierr); +} + +FloatComplex +biry (const FloatComplex& z, bool deriv, bool scaled, octave_idx_type& ierr) +{ + float ar = 0.0; + float ai = 0.0; + + float zr = z.real (); + float zi = z.imag (); + + octave_idx_type id = deriv ? 1 : 0; + + F77_FUNC (cbiry, CBIRY) (zr, zi, id, 2, ar, ai, ierr); + + if (! scaled) + { + FloatComplex expz = exp (std::abs (real (static_cast (2.0 / 3.0) * z * sqrt (z)))); + + float rexpz = real (expz); + float iexpz = imag (expz); + + float tmp = ar*rexpz - ai*iexpz; + + ai = ar*iexpz + ai*rexpz; + ar = tmp; + } + + if (zi == 0.0 && (! scaled || zr >= 0.0)) + ai = 0.0; + + return bessel_return_value (FloatComplex (ar, ai), ierr); +} + +FloatComplexMatrix +airy (const FloatComplexMatrix& z, bool deriv, bool scaled, Array2& ierr) +{ + octave_idx_type nr = z.rows (); + octave_idx_type nc = z.cols (); + + FloatComplexMatrix retval (nr, nc); + + ierr.resize (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + retval(i,j) = airy (z(i,j), deriv, scaled, ierr(i,j)); + + return retval; +} + +FloatComplexMatrix +biry (const FloatComplexMatrix& z, bool deriv, bool scaled, Array2& ierr) +{ + octave_idx_type nr = z.rows (); + octave_idx_type nc = z.cols (); + + FloatComplexMatrix retval (nr, nc); + + ierr.resize (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + retval(i,j) = biry (z(i,j), deriv, scaled, ierr(i,j)); + + return retval; +} + +FloatComplexNDArray +airy (const FloatComplexNDArray& z, bool deriv, bool scaled, ArrayN& ierr) +{ + dim_vector dv = z.dims (); + octave_idx_type nel = dv.numel (); + FloatComplexNDArray retval (dv); + + ierr.resize (dv); + + for (octave_idx_type i = 0; i < nel; i++) + retval (i) = airy (z(i), deriv, scaled, ierr(i)); + + return retval; +} + +FloatComplexNDArray +biry (const FloatComplexNDArray& z, bool deriv, bool scaled, ArrayN& ierr) +{ + dim_vector dv = z.dims (); + octave_idx_type nel = dv.numel (); + FloatComplexNDArray retval (dv); + + ierr.resize (dv); + + for (octave_idx_type i = 0; i < nel; i++) + retval (i) = biry (z(i), deriv, scaled, ierr(i)); + + return retval; +} + static void gripe_betainc_nonconformant (octave_idx_type r1, octave_idx_type c1, octave_idx_type r2, octave_idx_type c2, octave_idx_type r3, octave_idx_type c3) @@ -1152,11 +2183,11 @@ betainc (double x, double a, const NDArray& b) { dim_vector dv = b.dims (); - int nel = dv.numel (); + octave_idx_type nel = dv.numel (); NDArray retval (dv); - for (int i = 0; i < nel; i++) + for (octave_idx_type i = 0; i < nel; i++) retval (i) = betainc (x, a, b(i)); return retval; @@ -1166,11 +2197,11 @@ betainc (double x, const NDArray& a, double b) { dim_vector dv = a.dims (); - int nel = dv.numel (); + octave_idx_type nel = dv.numel (); NDArray retval (dv); - for (int i = 0; i < nel; i++) + for (octave_idx_type i = 0; i < nel; i++) retval (i) = betainc (x, a(i), b); return retval; @@ -1184,11 +2215,11 @@ if (dv == b.dims ()) { - int nel = dv.numel (); + octave_idx_type nel = dv.numel (); retval.resize (dv); - for (int i = 0; i < nel; i++) + for (octave_idx_type i = 0; i < nel; i++) retval (i) = betainc (x, a(i), b(i)); } else @@ -1295,11 +2326,11 @@ betainc (const NDArray& x, double a, double b) { dim_vector dv = x.dims (); - int nel = dv.numel (); + octave_idx_type nel = dv.numel (); NDArray retval (dv); - for (int i = 0; i < nel; i++) + for (octave_idx_type i = 0; i < nel; i++) retval (i) = betainc (x(i), a, b); return retval; @@ -1313,11 +2344,11 @@ if (dv == b.dims ()) { - int nel = dv.numel (); + octave_idx_type nel = dv.numel (); retval.resize (dv); - for (int i = 0; i < nel; i++) + for (octave_idx_type i = 0; i < nel; i++) retval (i) = betainc (x(i), a, b(i)); } else @@ -1334,11 +2365,11 @@ if (dv == a.dims ()) { - int nel = dv.numel (); + octave_idx_type nel = dv.numel (); retval.resize (dv); - for (int i = 0; i < nel; i++) + for (octave_idx_type i = 0; i < nel; i++) retval (i) = betainc (x(i), a(i), b); } else @@ -1355,11 +2386,294 @@ if (dv == a.dims () && dv == b.dims ()) { - int nel = dv.numel (); + octave_idx_type nel = dv.numel (); + + retval.resize (dv); + + for (octave_idx_type i = 0; i < nel; i++) + retval (i) = betainc (x(i), a(i), b(i)); + } + else + gripe_betainc_nonconformant (dv, a.dims (), b.dims ()); + + return retval; +} + +float +betainc (float x, float a, float b) +{ + float retval; + F77_XFCN (xbetai, XBETAI, (x, a, b, retval)); + return retval; +} + +FloatMatrix +betainc (float x, float a, const FloatMatrix& b) +{ + octave_idx_type nr = b.rows (); + octave_idx_type nc = b.cols (); + + FloatMatrix retval (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + retval(i,j) = betainc (x, a, b(i,j)); + + return retval; +} + +FloatMatrix +betainc (float x, const FloatMatrix& a, float b) +{ + octave_idx_type nr = a.rows (); + octave_idx_type nc = a.cols (); + + FloatMatrix retval (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + retval(i,j) = betainc (x, a(i,j), b); + + return retval; +} + +FloatMatrix +betainc (float x, const FloatMatrix& a, const FloatMatrix& b) +{ + FloatMatrix retval; + + octave_idx_type a_nr = a.rows (); + octave_idx_type a_nc = a.cols (); + + octave_idx_type b_nr = b.rows (); + octave_idx_type b_nc = b.cols (); + + if (a_nr == b_nr && a_nc == b_nc) + { + retval.resize (a_nr, a_nc); + + for (octave_idx_type j = 0; j < a_nc; j++) + for (octave_idx_type i = 0; i < a_nr; i++) + retval(i,j) = betainc (x, a(i,j), b(i,j)); + } + else + gripe_betainc_nonconformant (1, 1, a_nr, a_nc, b_nr, b_nc); + + return retval; +} + +FloatNDArray +betainc (float x, float a, const FloatNDArray& b) +{ + dim_vector dv = b.dims (); + octave_idx_type nel = dv.numel (); + + FloatNDArray retval (dv); + + for (octave_idx_type i = 0; i < nel; i++) + retval (i) = betainc (x, a, b(i)); + + return retval; +} + +FloatNDArray +betainc (float x, const FloatNDArray& a, float b) +{ + dim_vector dv = a.dims (); + octave_idx_type nel = dv.numel (); + + FloatNDArray retval (dv); + + for (octave_idx_type i = 0; i < nel; i++) + retval (i) = betainc (x, a(i), b); + + return retval; +} + +FloatNDArray +betainc (float x, const FloatNDArray& a, const FloatNDArray& b) +{ + FloatNDArray retval; + dim_vector dv = a.dims (); + + if (dv == b.dims ()) + { + octave_idx_type nel = dv.numel (); retval.resize (dv); - for (int i = 0; i < nel; i++) + for (octave_idx_type i = 0; i < nel; i++) + retval (i) = betainc (x, a(i), b(i)); + } + else + gripe_betainc_nonconformant (dim_vector (0), dv, b.dims ()); + + return retval; +} + + +FloatMatrix +betainc (const FloatMatrix& x, float a, float b) +{ + octave_idx_type nr = x.rows (); + octave_idx_type nc = x.cols (); + + FloatMatrix retval (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + retval(i,j) = betainc (x(i,j), a, b); + + return retval; +} + +FloatMatrix +betainc (const FloatMatrix& x, float a, const FloatMatrix& b) +{ + FloatMatrix retval; + + octave_idx_type nr = x.rows (); + octave_idx_type nc = x.cols (); + + octave_idx_type b_nr = b.rows (); + octave_idx_type b_nc = b.cols (); + + if (nr == b_nr && nc == b_nc) + { + retval.resize (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + retval(i,j) = betainc (x(i,j), a, b(i,j)); + } + else + gripe_betainc_nonconformant (nr, nc, 1, 1, b_nr, b_nc); + + return retval; +} + +FloatMatrix +betainc (const FloatMatrix& x, const FloatMatrix& a, float b) +{ + FloatMatrix retval; + + octave_idx_type nr = x.rows (); + octave_idx_type nc = x.cols (); + + octave_idx_type a_nr = a.rows (); + octave_idx_type a_nc = a.cols (); + + if (nr == a_nr && nc == a_nc) + { + retval.resize (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + retval(i,j) = betainc (x(i,j), a(i,j), b); + } + else + gripe_betainc_nonconformant (nr, nc, a_nr, a_nc, 1, 1); + + return retval; +} + +FloatMatrix +betainc (const FloatMatrix& x, const FloatMatrix& a, const FloatMatrix& b) +{ + FloatMatrix retval; + + octave_idx_type nr = x.rows (); + octave_idx_type nc = x.cols (); + + octave_idx_type a_nr = a.rows (); + octave_idx_type a_nc = a.cols (); + + octave_idx_type b_nr = b.rows (); + octave_idx_type b_nc = b.cols (); + + if (nr == a_nr && nr == b_nr && nc == a_nc && nc == b_nc) + { + retval.resize (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + retval(i,j) = betainc (x(i,j), a(i,j), b(i,j)); + } + else + gripe_betainc_nonconformant (nr, nc, a_nr, a_nc, b_nr, b_nc); + + return retval; +} + +FloatNDArray +betainc (const FloatNDArray& x, float a, float b) +{ + dim_vector dv = x.dims (); + octave_idx_type nel = dv.numel (); + + FloatNDArray retval (dv); + + for (octave_idx_type i = 0; i < nel; i++) + retval (i) = betainc (x(i), a, b); + + return retval; +} + +FloatNDArray +betainc (const FloatNDArray& x, float a, const FloatNDArray& b) +{ + FloatNDArray retval; + dim_vector dv = x.dims (); + + if (dv == b.dims ()) + { + octave_idx_type nel = dv.numel (); + + retval.resize (dv); + + for (octave_idx_type i = 0; i < nel; i++) + retval (i) = betainc (x(i), a, b(i)); + } + else + gripe_betainc_nonconformant (dv, dim_vector (0), b.dims ()); + + return retval; +} + +FloatNDArray +betainc (const FloatNDArray& x, const FloatNDArray& a, float b) +{ + FloatNDArray retval; + dim_vector dv = x.dims (); + + if (dv == a.dims ()) + { + octave_idx_type nel = dv.numel (); + + retval.resize (dv); + + for (octave_idx_type i = 0; i < nel; i++) + retval (i) = betainc (x(i), a(i), b); + } + else + gripe_betainc_nonconformant (dv, a.dims (), dim_vector (0)); + + return retval; +} + +FloatNDArray +betainc (const FloatNDArray& x, const FloatNDArray& a, const FloatNDArray& b) +{ + FloatNDArray retval; + dim_vector dv = x.dims (); + + if (dv == a.dims () && dv == b.dims ()) + { + octave_idx_type nel = dv.numel (); + + retval.resize (dv); + + for (octave_idx_type i = 0; i < nel; i++) retval (i) = betainc (x(i), a(i), b(i)); } else @@ -1487,14 +2801,14 @@ gammainc (double x, const NDArray& a) { dim_vector dv = a.dims (); - int nel = dv.numel (); + octave_idx_type nel = dv.numel (); NDArray retval; NDArray result (dv); bool err; - for (int i = 0; i < nel; i++) + for (octave_idx_type i = 0; i < nel; i++) { result (i) = gammainc (x, a(i), err); @@ -1513,14 +2827,14 @@ gammainc (const NDArray& x, double a) { dim_vector dv = x.dims (); - int nel = dv.numel (); + octave_idx_type nel = dv.numel (); NDArray retval; NDArray result (dv); bool err; - for (int i = 0; i < nel; i++) + for (octave_idx_type i = 0; i < nel; i++) { result (i) = gammainc (x(i), a, err); @@ -1539,7 +2853,7 @@ gammainc (const NDArray& x, const NDArray& a) { dim_vector dv = x.dims (); - int nel = dv.numel (); + octave_idx_type nel = dv.numel (); NDArray retval; NDArray result; @@ -1550,7 +2864,212 @@ bool err; - for (int i = 0; i < nel; i++) + for (octave_idx_type i = 0; i < nel; i++) + { + result (i) = gammainc (x(i), a(i), err); + + if (err) + goto done; + } + + retval = result; + } + else + { + std::string x_str = dv.str (); + std::string a_str = a.dims ().str (); + + (*current_liboctave_error_handler) + ("gammainc: nonconformant arguments (arg 1 is %s, arg 2 is %s)", + x_str.c_str (), a_str. c_str ()); + } + + done: + + return retval; +} + +float +gammainc (float x, float a, bool& err) +{ + float retval; + + err = false; + + if (a < 0.0 || x < 0.0) + { + (*current_liboctave_error_handler) + ("gammainc: A and X must be non-negative"); + + err = true; + } + else + F77_XFCN (xsgammainc, XSGAMMAINC, (a, x, retval)); + + return retval; +} + +FloatMatrix +gammainc (float x, const FloatMatrix& a) +{ + octave_idx_type nr = a.rows (); + octave_idx_type nc = a.cols (); + + FloatMatrix result (nr, nc); + FloatMatrix retval; + + bool err; + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + { + result(i,j) = gammainc (x, a(i,j), err); + + if (err) + goto done; + } + + retval = result; + + done: + + return retval; +} + +FloatMatrix +gammainc (const FloatMatrix& x, float a) +{ + octave_idx_type nr = x.rows (); + octave_idx_type nc = x.cols (); + + FloatMatrix result (nr, nc); + FloatMatrix retval; + + bool err; + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + { + result(i,j) = gammainc (x(i,j), a, err); + + if (err) + goto done; + } + + retval = result; + + done: + + return retval; +} + +FloatMatrix +gammainc (const FloatMatrix& x, const FloatMatrix& a) +{ + FloatMatrix result; + FloatMatrix retval; + + octave_idx_type nr = x.rows (); + octave_idx_type nc = x.cols (); + + octave_idx_type a_nr = a.rows (); + octave_idx_type a_nc = a.cols (); + + if (nr == a_nr && nc == a_nc) + { + result.resize (nr, nc); + + bool err; + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + { + result(i,j) = gammainc (x(i,j), a(i,j), err); + + if (err) + goto done; + } + + retval = result; + } + else + (*current_liboctave_error_handler) + ("gammainc: nonconformant arguments (arg 1 is %dx%d, arg 2 is %dx%d)", + nr, nc, a_nr, a_nc); + + done: + + return retval; +} + +FloatNDArray +gammainc (float x, const FloatNDArray& a) +{ + dim_vector dv = a.dims (); + octave_idx_type nel = dv.numel (); + + FloatNDArray retval; + FloatNDArray result (dv); + + bool err; + + for (octave_idx_type i = 0; i < nel; i++) + { + result (i) = gammainc (x, a(i), err); + + if (err) + goto done; + } + + retval = result; + + done: + + return retval; +} + +FloatNDArray +gammainc (const FloatNDArray& x, float a) +{ + dim_vector dv = x.dims (); + octave_idx_type nel = dv.numel (); + + FloatNDArray retval; + FloatNDArray result (dv); + + bool err; + + for (octave_idx_type i = 0; i < nel; i++) + { + result (i) = gammainc (x(i), a, err); + + if (err) + goto done; + } + + retval = result; + + done: + + return retval; +} + +FloatNDArray +gammainc (const FloatNDArray& x, const FloatNDArray& a) +{ + dim_vector dv = x.dims (); + octave_idx_type nel = dv.numel (); + + FloatNDArray retval; + FloatNDArray result; + + if (dv == a.dims ()) + { + result.resize (dv); + + bool err; + + for (octave_idx_type i = 0; i < nel; i++) { result (i) = gammainc (x(i), a(i), err); diff -r 45f5faba05a2 -r 82be108cc558 liboctave/lo-specfun.h --- a/liboctave/lo-specfun.h Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/lo-specfun.h Sun Apr 27 22:34:17 2008 +0200 @@ -35,6 +35,12 @@ class ComplexNDArray; class RowVector; class ComplexColumnVector; +class FloatMatrix; +class FloatComplexMatrix; +class FloatNDArray; +class FloatComplexNDArray; +class FloatRowVector; +class FloatComplexColumnVector; class Range; #if !defined (HAVE_ACOSH) @@ -57,20 +63,54 @@ extern OCTAVE_API double erfc (double); #endif +#if !defined (HAVE_ACOSHF) +extern OCTAVE_API float acoshf (float); +#endif + +#if !defined (HAVE_ASINHF) +extern OCTAVE_API float asinhf (float); +#endif + +#if !defined (HAVE_ATANHF) +extern OCTAVE_API float atanhf (float); +#endif + +#if !defined (HAVE_ERFF) +extern OCTAVE_API float erf (float); +#endif + +#if !defined (HAVE_ERFCF) +extern OCTAVE_API float erfc (float); +#endif + #if !defined (HAVE_EXPM1) extern OCTAVE_API double expm1 (double x); #endif extern OCTAVE_API Complex expm1 (const Complex& x); +#if !defined (HAVE_EXPM1F) +extern OCTAVE_API float expm1f (float x); +#endif +extern OCTAVE_API FloatComplex expm1f (const FloatComplex& x); + #if !defined (HAVE_LOG1P) extern OCTAVE_API double log1p (double x); #endif extern OCTAVE_API Complex log1p (const Complex& x); +#if !defined (HAVE_LOG1PF) +extern OCTAVE_API float log1pf (float x); +#endif +extern OCTAVE_API FloatComplex log1pf (const FloatComplex& x); + extern OCTAVE_API double xgamma (double x); extern OCTAVE_API double xlgamma (double x); extern OCTAVE_API Complex xlgamma (const Complex& x); +extern OCTAVE_API float xgamma (float x); +extern OCTAVE_API float xlgamma (float x); +extern OCTAVE_API FloatComplex xlgamma (const FloatComplex& x); + extern OCTAVE_API Complex besselj (double alpha, const Complex& x, bool scaled, octave_idx_type& ierr); @@ -257,6 +297,192 @@ besselh2 (const RowVector& alpha, const ComplexColumnVector& x, bool scaled, Array2& ierr); +extern OCTAVE_API FloatComplex +besselj (float alpha, const FloatComplex& x, bool scaled, octave_idx_type& ierr); + +extern OCTAVE_API FloatComplex +bessely (float alpha, const FloatComplex& x, bool scaled, octave_idx_type& ierr); + +extern OCTAVE_API FloatComplex +besseli (float alpha, const FloatComplex& x, bool scaled, octave_idx_type& ierr); + +extern OCTAVE_API FloatComplex +besselk (float alpha, const FloatComplex& x, bool scaled, octave_idx_type& ierr); + +extern OCTAVE_API FloatComplex +besselh1 (float alpha, const FloatComplex& x, bool scaled, octave_idx_type& ierr); + +extern OCTAVE_API FloatComplex +besselh2 (float alpha, const FloatComplex& x, bool scaled, octave_idx_type& ierr); + +extern OCTAVE_API FloatComplexMatrix +besselj (float alpha, const FloatComplexMatrix& x, bool scaled, + Array2& ierr); + +extern OCTAVE_API FloatComplexMatrix +bessely (float alpha, const FloatComplexMatrix& x, bool scaled, + Array2& ierr); + +extern OCTAVE_API FloatComplexMatrix +besseli (float alpha, const FloatComplexMatrix& x, bool scaled, + Array2& ierr); + +extern OCTAVE_API FloatComplexMatrix +besselk (float alpha, const FloatComplexMatrix& x, bool scaled, + Array2& ierr); + +extern OCTAVE_API FloatComplexMatrix +besselh1 (float alpha, const FloatComplexMatrix& x, bool scaled, + Array2& ierr); + +extern OCTAVE_API FloatComplexMatrix +besselh2 (float alpha, const FloatComplexMatrix& x, bool scaled, + Array2& ierr); + +extern OCTAVE_API FloatComplexMatrix +besselj (const FloatMatrix& alpha, const FloatComplex& x, bool scaled, + Array2& ierr); + +extern OCTAVE_API FloatComplexMatrix +bessely (const FloatMatrix& alpha, const FloatComplex& x, bool scaled, + Array2& ierr); + +extern OCTAVE_API FloatComplexMatrix +besseli (const FloatMatrix& alpha, const FloatComplex& x, bool scaled, + Array2& ierr); + +extern OCTAVE_API FloatComplexMatrix +besselk (const FloatMatrix& alpha, const FloatComplex& x, bool scaled, + Array2& ierr); + +extern OCTAVE_API FloatComplexMatrix +besselh1 (const FloatMatrix& alpha, const FloatComplex& x, bool scaled, + Array2& ierr); + +extern OCTAVE_API FloatComplexMatrix +besselh2 (const FloatMatrix& alpha, const FloatComplex& x, bool scaled, + Array2& ierr); + +extern OCTAVE_API FloatComplexMatrix +besselj (const FloatMatrix& alpha, const FloatComplexMatrix& x, bool scaled, + Array2& ierr); + +extern OCTAVE_API FloatComplexMatrix +bessely (const FloatMatrix& alpha, const FloatComplexMatrix& x, bool scaled, + Array2& ierr); + +extern OCTAVE_API FloatComplexMatrix +besseli (const FloatMatrix& alpha, const FloatComplexMatrix& x, bool scaled, + Array2& ierr); + +extern OCTAVE_API FloatComplexMatrix +besselk (const FloatMatrix& alpha, const FloatComplexMatrix& x, bool scaled, + Array2& ierr); + +extern OCTAVE_API FloatComplexMatrix +besselh1 (const FloatMatrix& alpha, const FloatComplexMatrix& x, bool scaled, + Array2& ierr); + +extern OCTAVE_API FloatComplexMatrix +besselh2 (const FloatMatrix& alpha, const FloatComplexMatrix& x, bool scaled, + Array2& ierr); + +extern OCTAVE_API FloatComplexNDArray +besselj (float alpha, const FloatComplexNDArray& x, bool scaled, + ArrayN& ierr); + +extern OCTAVE_API FloatComplexNDArray +bessely (float alpha, const FloatComplexNDArray& x, bool scaled, + ArrayN& ierr); + +extern OCTAVE_API FloatComplexNDArray +besseli (float alpha, const FloatComplexNDArray& x, bool scaled, + ArrayN& ierr); + +extern OCTAVE_API FloatComplexNDArray +besselk (float alpha, const FloatComplexNDArray& x, bool scaled, + ArrayN& ierr); + +extern OCTAVE_API FloatComplexNDArray +besselh1 (float alpha, const FloatComplexNDArray& x, bool scaled, + ArrayN& ierr); + +extern OCTAVE_API FloatComplexNDArray +besselh2 (float alpha, const FloatComplexNDArray& x, bool scaled, + ArrayN& ierr); + +extern OCTAVE_API FloatComplexNDArray +besselj (const FloatNDArray& alpha, const FloatComplex& x, bool scaled, + ArrayN& ierr); + +extern OCTAVE_API FloatComplexNDArray +bessely (const FloatNDArray& alpha, const FloatComplex& x, bool scaled, + ArrayN& ierr); + +extern OCTAVE_API FloatComplexNDArray +besseli (const FloatNDArray& alpha, const FloatComplex& x, bool scaled, + ArrayN& ierr); + +extern OCTAVE_API FloatComplexNDArray +besselk (const FloatNDArray& alpha, const FloatComplex& x, bool scaled, + ArrayN& ierr); + +extern OCTAVE_API FloatComplexNDArray +besselh1 (const FloatNDArray& alpha, const FloatComplex& x, bool scaled, + ArrayN& ierr); + +extern OCTAVE_API FloatComplexNDArray +besselh2 (const FloatNDArray& alpha, const FloatComplex& x, bool scaled, + ArrayN& ierr); + +extern OCTAVE_API FloatComplexNDArray +besselj (const FloatNDArray& alpha, const FloatComplexNDArray& x, bool scaled, + ArrayN& ierr); + +extern OCTAVE_API FloatComplexNDArray +bessely (const FloatNDArray& alpha, const FloatComplexNDArray& x, bool scaled, + ArrayN& ierr); + +extern OCTAVE_API FloatComplexNDArray +besseli (const FloatNDArray& alpha, const FloatComplexNDArray& x, bool scaled, + ArrayN& ierr); + +extern OCTAVE_API FloatComplexNDArray +besselk (const FloatNDArray& alpha, const FloatComplexNDArray& x, bool scaled, + ArrayN& ierr); + +extern OCTAVE_API FloatComplexNDArray +besselh1 (const FloatNDArray& alpha, const FloatComplexNDArray& x, bool scaled, + ArrayN& ierr); + +extern OCTAVE_API FloatComplexNDArray +besselh2 (const FloatNDArray& alpha, const FloatComplexNDArray& x, bool scaled, + ArrayN& ierr); + +extern OCTAVE_API FloatComplexMatrix +besselj (const FloatRowVector& alpha, const FloatComplexColumnVector& x, bool scaled, + Array2& ierr); + +extern OCTAVE_API FloatComplexMatrix +bessely (const FloatRowVector& alpha, const FloatComplexColumnVector& x, bool scaled, + Array2& ierr); + +extern OCTAVE_API FloatComplexMatrix +besseli (const FloatRowVector& alpha, const FloatComplexColumnVector& x, bool scaled, + Array2& ierr); + +extern OCTAVE_API FloatComplexMatrix +besselk (const FloatRowVector& alpha, const FloatComplexColumnVector& x, bool scaled, + Array2& ierr); + +extern OCTAVE_API FloatComplexMatrix +besselh1 (const FloatRowVector& alpha, const FloatComplexColumnVector& x, bool scaled, + Array2& ierr); + +extern OCTAVE_API FloatComplexMatrix +besselh2 (const FloatRowVector& alpha, const FloatComplexColumnVector& x, bool scaled, + Array2& ierr); + extern OCTAVE_API Complex airy (const Complex& z, bool deriv, bool scaled, octave_idx_type& ierr); extern OCTAVE_API Complex biry (const Complex& z, bool deriv, bool scaled, octave_idx_type& ierr); @@ -272,6 +498,21 @@ extern OCTAVE_API ComplexNDArray biry (const ComplexNDArray& z, bool deriv, bool scaled, ArrayN& ierr); +extern OCTAVE_API FloatComplex airy (const FloatComplex& z, bool deriv, bool scaled, octave_idx_type& ierr); +extern OCTAVE_API FloatComplex biry (const FloatComplex& z, bool deriv, bool scaled, octave_idx_type& ierr); + +extern OCTAVE_API FloatComplexMatrix +airy (const FloatComplexMatrix& z, bool deriv, bool scaled, Array2& ierr); + +extern OCTAVE_API FloatComplexMatrix +biry (const FloatComplexMatrix& z, bool deriv, bool scaled, Array2& ierr); + +extern OCTAVE_API FloatComplexNDArray +airy (const FloatComplexNDArray& z, bool deriv, bool scaled, ArrayN& ierr); + +extern OCTAVE_API FloatComplexNDArray +biry (const FloatComplexNDArray& z, bool deriv, bool scaled, ArrayN& ierr); + extern OCTAVE_API double betainc (double x, double a, double b); extern OCTAVE_API Matrix betainc (double x, double a, const Matrix& b); extern OCTAVE_API Matrix betainc (double x, const Matrix& a, double b); @@ -291,6 +532,25 @@ extern OCTAVE_API NDArray betainc (const NDArray& x, const NDArray& a, double b); extern OCTAVE_API NDArray betainc (const NDArray& x, const NDArray& a, const NDArray& b); +extern OCTAVE_API float betainc (float x, float a, float b); +extern OCTAVE_API FloatMatrix betainc (float x, float a, const FloatMatrix& b); +extern OCTAVE_API FloatMatrix betainc (float x, const FloatMatrix& a, float b); +extern OCTAVE_API FloatMatrix betainc (float x, const FloatMatrix& a, const FloatMatrix& b); + +extern OCTAVE_API FloatNDArray betainc (float x, float a, const FloatNDArray& b); +extern OCTAVE_API FloatNDArray betainc (float x, const FloatNDArray& a, float b); +extern OCTAVE_API FloatNDArray betainc (float x, const FloatNDArray& a, const FloatNDArray& b); + +extern OCTAVE_API FloatMatrix betainc (const FloatMatrix& x, float a, float b); +extern OCTAVE_API FloatMatrix betainc (const FloatMatrix& x, float a, const FloatMatrix& b); +extern OCTAVE_API FloatMatrix betainc (const FloatMatrix& x, const FloatMatrix& a, float b); +extern OCTAVE_API FloatMatrix betainc (const FloatMatrix& x, const FloatMatrix& a, const FloatMatrix& b); + +extern OCTAVE_API FloatNDArray betainc (const FloatNDArray& x, float a, float b); +extern OCTAVE_API FloatNDArray betainc (const FloatNDArray& x, float a, const FloatNDArray& b); +extern OCTAVE_API FloatNDArray betainc (const FloatNDArray& x, const FloatNDArray& a, float b); +extern OCTAVE_API FloatNDArray betainc (const FloatNDArray& x, const FloatNDArray& a, const FloatNDArray& b); + extern OCTAVE_API double gammainc (double x, double a, bool& err); extern OCTAVE_API Matrix gammainc (double x, const Matrix& a); extern OCTAVE_API Matrix gammainc (const Matrix& x, double a); @@ -306,6 +566,21 @@ return gammainc (x, a, err); } +extern OCTAVE_API float gammainc (float x, float a, bool& err); +extern OCTAVE_API FloatMatrix gammainc (float x, const FloatMatrix& a); +extern OCTAVE_API FloatMatrix gammainc (const FloatMatrix& x, float a); +extern OCTAVE_API FloatMatrix gammainc (const FloatMatrix& x, const FloatMatrix& a); + +extern OCTAVE_API FloatNDArray gammainc (float x, const FloatNDArray& a); +extern OCTAVE_API FloatNDArray gammainc (const FloatNDArray& x, float a); +extern OCTAVE_API FloatNDArray gammainc (const FloatNDArray& x, const FloatNDArray& a); + +inline float gammainc (float x, float a) +{ + bool err; + return gammainc (x, a, err); +} + #endif /* diff -r 45f5faba05a2 -r 82be108cc558 liboctave/lo-utils.cc --- a/liboctave/lo-utils.cc Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/lo-utils.cc Sun Apr 27 22:34:17 2008 +0200 @@ -62,6 +62,17 @@ return static_cast ((x > 0) ? (x + 0.5) : (x - 0.5)); } +octave_idx_type +NINTbig (float x) +{ + if (x > std::numeric_limits::max ()) + return std::numeric_limits::max (); + else if (x < std::numeric_limits::min ()) + return std::numeric_limits::min (); + else + return static_cast ((x > 0) ? (x + 0.5) : (x - 0.5)); +} + int NINT (double x) { @@ -73,6 +84,17 @@ return static_cast ((x > 0) ? (x + 0.5) : (x - 0.5)); } +int +NINT (float x) +{ + if (x > std::numeric_limits::max ()) + return std::numeric_limits::max (); + else if (x < std::numeric_limits::min ()) + return std::numeric_limits::min (); + else + return static_cast ((x > 0) ? (x + 0.5) : (x - 0.5)); +} + double D_NINT (double x) { @@ -82,6 +104,15 @@ return floor (x + 0.5); } +float +F_NINT (float x) +{ + if (xisinf (x) || xisnan (x)) + return x; + else + return floor (x + 0.5); +} + // Save a string. char * @@ -379,6 +410,196 @@ os << ")"; } + + + + + + + + + + + + + + + + + + + + + +static inline float +read_float_inf_nan_na (std::istream& is, char c, char sign = '+') +{ + float d = 0.0; + + switch (c) + { + case 'i': case 'I': + { + c = is.get (); + if (c == 'n' || c == 'N') + { + c = is.get (); + if (c == 'f' || c == 'F') + d = sign == '-' ? -octave_Inf : octave_Inf; + else + is.putback (c); + } + else + is.putback (c); + } + break; + + case 'n': case 'N': + { + c = is.get (); + if (c == 'a' || c == 'A') + { + c = is.get (); + if (c == 'n' || c == 'N') + d = octave_NaN; + else + { + is.putback (c); + d = octave_NA; + } + } + else + is.putback (c); + } + break; + + default: + abort (); + } + + return d; +} + +float +octave_read_float (std::istream& is) +{ + float d = 0.0; + + char c1 = ' '; + + while (isspace (c1)) + c1 = is.get (); + + switch (c1) + { + case '-': + { + char c2 = 0; + c2 = is.get (); + if (c2 == 'i' || c2 == 'I') + d = read_float_inf_nan_na (is, c2, c1); + else + { + is.putback (c2); + is.putback (c1); + is >> d; + } + } + break; + + case '+': + { + char c2 = 0; + c2 = is.get (); + if (c2 == 'i' || c2 == 'I') + d = read_float_inf_nan_na (is, c2, c1); + else + { + is.putback (c2); + is.putback (c1); + is >> d; + } + } + break; + + case 'i': case 'I': + case 'n': case 'N': + d = read_float_inf_nan_na (is, c1); + break; + + default: + is.putback (c1); + is >> d; + } + + return d; +} + +FloatComplex +octave_read_float_complex (std::istream& is) +{ + float re = 0.0, im = 0.0; + + FloatComplex cx = 0.0; + + char ch = ' '; + + while (isspace (ch)) + ch = is.get (); + + if (ch == '(') + { + re = octave_read_float (is); + ch = is.get (); + + if (ch == ',') + { + im = octave_read_float (is); + ch = is.get (); + + if (ch == ')') + cx = FloatComplex (re, im); + else + is.setstate (std::ios::failbit); + } + else if (ch == ')') + cx = re; + else + is.setstate (std::ios::failbit); + } + else + { + is.putback (ch); + cx = octave_read_float (is); + } + + return cx; + +} + +void +octave_write_float (std::ostream& os, float d) +{ + if (lo_ieee_is_NA (d)) + os << "NA"; + else if (lo_ieee_isnan (d)) + os << "NaN"; + else if (lo_ieee_isinf (d)) + os << (d < 0 ? "-Inf" : "Inf"); + else + os << d; +} + +void +octave_write_float_complex (std::ostream& os, const FloatComplex& c) +{ + os << "("; + octave_write_float (os, real (c)); + os << ","; + octave_write_float (os, imag (c)); + os << ")"; +} + /* ;;; Local Variables: *** ;;; mode: C++ *** diff -r 45f5faba05a2 -r 82be108cc558 liboctave/lo-utils.h --- a/liboctave/lo-utils.h Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/lo-utils.h Sun Apr 27 22:34:17 2008 +0200 @@ -34,8 +34,11 @@ #include "syswait.h" extern OCTAVE_API octave_idx_type NINTbig (double x); +extern OCTAVE_API octave_idx_type NINTbig (float x); extern OCTAVE_API int NINT (double x); +extern OCTAVE_API int NINT (float x); extern OCTAVE_API double D_NINT (double x); +extern OCTAVE_API float F_NINT (float x); extern OCTAVE_API char *strsave (const char *); @@ -65,6 +68,12 @@ extern OCTAVE_API void octave_write_double (std::ostream& os, double dval); extern OCTAVE_API void octave_write_complex (std::ostream& os, const Complex& cval); +extern OCTAVE_API float octave_read_float (std::istream& is); +extern OCTAVE_API FloatComplex octave_read_float_complex (std::istream& is); + +extern OCTAVE_API void octave_write_float (std::ostream& os, float dval); +extern OCTAVE_API void octave_write_float_complex (std::ostream& os, const FloatComplex& cval); + #ifdef HAVE_LOADLIBRARY_API #include extern "C" OCTAVE_API void * octave_w32_library_search (HINSTANCE handle, const char *name); diff -r 45f5faba05a2 -r 82be108cc558 liboctave/mx-base.h --- a/liboctave/mx-base.h Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/mx-base.h Sun Apr 27 22:34:17 2008 +0200 @@ -34,21 +34,29 @@ #include "chMatrix.h" #include "dMatrix.h" #include "CMatrix.h" +#include "fMatrix.h" +#include "fCMatrix.h" // Column Vector classes. #include "dColVector.h" #include "CColVector.h" +#include "fColVector.h" +#include "fCColVector.h" // Row Vector classes. #include "dRowVector.h" #include "CRowVector.h" +#include "fRowVector.h" +#include "fCRowVector.h" // Diagonal Matrix classes. #include "dDiagMatrix.h" #include "CDiagMatrix.h" +#include "fDiagMatrix.h" +#include "fCDiagMatrix.h" // Sparse Matrix classes. @@ -62,6 +70,8 @@ #include "chNDArray.h" #include "dNDArray.h" #include "CNDArray.h" +#include "fNDArray.h" +#include "fCNDArray.h" #include "int8NDArray.h" #include "int16NDArray.h" diff -r 45f5faba05a2 -r 82be108cc558 liboctave/mx-defs.h --- a/liboctave/mx-defs.h Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/mx-defs.h Sun Apr 27 22:34:17 2008 +0200 @@ -28,50 +28,84 @@ class Matrix; class ComplexMatrix; +class FloatMatrix; +class FloatComplexMatrix; class boolMatrix; class charMatrix; class NDArray; class ComplexNDArray; +class FloatNDArray; +class FloatComplexNDArray; class boolNDArray; class charNDArray; class ColumnVector; class ComplexColumnVector; +class FloatColumnVector; +class FloatComplexColumnVector; class RowVector; class ComplexRowVector; +class FloatRowVector; +class FloatComplexRowVector; class DiagMatrix; class ComplexDiagMatrix; +class FloatDiagMatrix; +class FloatComplexDiagMatrix; class AEPBALANCE; class ComplexAEPBALANCE; +class FloatAEPBALANCE; +class FloatComplexAEPBALANCE; class GEPBALANCE; +class ComplexGEPBALANCE; +class FloatGEPBALANCE; +class FloatComplexGEPBALANCE; class CHOL; class ComplexCHOL; +class FloatCHOL; +class FloatComplexCHOL; class DET; class ComplexDET; +class FloatDET; +class FloatComplexDET; class EIG; class HESS; class ComplexHESS; +class FloatHESS; +class FloatComplexHESS; class SCHUR; class ComplexSCHUR; +class FloatSCHUR; +class FloatComplexSCHUR; class SVD; class ComplexSVD; +class FloatSVD; +class FloatComplexSVD; class LU; class ComplexLU; +class FloatLU; +class FloatComplexLU; class QR; class ComplexQR; +class FloatQR; +class FloatComplexQR; + +class QRP; +class ComplexQRP; +class FloatQRP; +class FloatComplexQRP; // Other data types we use but that don't always need to have full // declarations. @@ -88,6 +122,13 @@ typedef double (*d_c_Mapper)(const Complex&); typedef Complex (*c_c_Mapper)(const Complex&); +typedef bool (*b_f_Mapper)(float); +typedef bool (*b_fc_Mapper)(const FloatComplex&); + +typedef float (*f_f_Mapper)(float); +typedef float (*f_fc_Mapper)(const FloatComplex&); +typedef FloatComplex (*fc_fc_Mapper)(const FloatComplex&); + #endif #endif diff -r 45f5faba05a2 -r 82be108cc558 liboctave/mx-ext.h --- a/liboctave/mx-ext.h Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/mx-ext.h Sun Apr 27 22:34:17 2008 +0200 @@ -33,11 +33,15 @@ #include "dbleDET.h" #include "CmplxDET.h" +#include "floatDET.h" +#include "fCmplxDET.h" // Result of a Cholesky Factorization #include "dbleCHOL.h" #include "CmplxCHOL.h" +#include "floatCHOL.h" +#include "fCmplxCHOL.h" // Result of a Hessenberg Decomposition @@ -48,11 +52,15 @@ #include "dbleSCHUR.h" #include "CmplxSCHUR.h" +#include "floatSCHUR.h" +#include "fCmplxSCHUR.h" // Result of a Singular Value Decomposition. #include "dbleSVD.h" #include "CmplxSVD.h" +#include "floatSVD.h" +#include "fCmplxSVD.h" // Result of an Eigenvalue computation. @@ -62,6 +70,8 @@ #include "dbleLU.h" #include "CmplxLU.h" +#include "floatLU.h" +#include "fCmplxLU.h" // Result of a QR decomposition. diff -r 45f5faba05a2 -r 82be108cc558 liboctave/mx-inlines.cc --- a/liboctave/mx-inlines.cc Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/mx-inlines.cc Sun Apr 27 22:34:17 2008 +0200 @@ -68,6 +68,11 @@ VS_OPS (Complex, Complex, double) VS_OPS (Complex, Complex, Complex) +VS_OPS (float, float, float) +VS_OPS (FloatComplex, float, FloatComplex) +VS_OPS (FloatComplex, FloatComplex, float) +VS_OPS (FloatComplex, FloatComplex, FloatComplex) + #define SV_OP_FCN(F, OP) \ template \ inline void \ @@ -106,6 +111,11 @@ SV_OPS (Complex, Complex, double) SV_OPS (Complex, Complex, Complex) +SV_OPS (float, float, float) +SV_OPS (FloatComplex, float, FloatComplex) +SV_OPS (FloatComplex, FloatComplex, float) +SV_OPS (FloatComplex, FloatComplex, FloatComplex) + #define VV_OP_FCN(F, OP) \ template \ inline void \ @@ -144,6 +154,11 @@ VV_OPS (Complex, Complex, double) VV_OPS (Complex, Complex, Complex) +VV_OPS (float, float, float) +VV_OPS (FloatComplex, float, FloatComplex) +VV_OPS (FloatComplex, FloatComplex, float) +VV_OPS (FloatComplex, FloatComplex, FloatComplex) + #define VS_OP2(F, OP, V, S) \ static inline V * \ F (V *v, size_t n, S s) \ @@ -164,6 +179,10 @@ VS_OP2S (Complex, double) VS_OP2S (Complex, Complex) +VS_OP2S (float, float) +VS_OP2S (FloatComplex, float) +VS_OP2S (FloatComplex, FloatComplex) + #define VV_OP2(F, OP, T1, T2) \ static inline T1 * \ F (T1 *v1, const T2 *v2, size_t n) \ @@ -184,6 +203,10 @@ VV_OP2S (Complex, double) VV_OP2S (Complex, Complex) +VV_OP2S (float, float) +VV_OP2S (FloatComplex, float) +VV_OP2S (FloatComplex, FloatComplex) + #define OP_EQ_FCN(T1, T2) \ static inline bool \ mx_inline_equal (const T1 *x, const T2 *y, size_t n) \ @@ -198,6 +221,8 @@ OP_EQ_FCN (char, char) OP_EQ_FCN (double, double) OP_EQ_FCN (Complex, Complex) +OP_EQ_FCN (float, float) +OP_EQ_FCN (FloatComplex, FloatComplex) #define OP_DUP_FCN(OP, F, R, T) \ static inline R * \ @@ -215,6 +240,8 @@ OP_DUP_FCN (, mx_inline_dup, double, double) OP_DUP_FCN (, mx_inline_dup, Complex, Complex) +OP_DUP_FCN (, mx_inline_dup, float, float) +OP_DUP_FCN (, mx_inline_dup, FloatComplex, FloatComplex) // These should really return a bool *. Also, they should probably be // in with a collection of other element-by-element boolean ops. @@ -230,6 +257,18 @@ OP_DUP_FCN (imag, mx_inline_imag_dup, double, Complex) OP_DUP_FCN (conj, mx_inline_conj_dup, Complex, Complex) +OP_DUP_FCN (0.0 ==, mx_inline_not, float, float) +OP_DUP_FCN (static_cast(0.0) ==, mx_inline_not, float, FloatComplex) + +OP_DUP_FCN (, mx_inline_make_complex, FloatComplex, float) + +OP_DUP_FCN (-, mx_inline_change_sign, float, float) +OP_DUP_FCN (-, mx_inline_change_sign, FloatComplex, FloatComplex) + +OP_DUP_FCN (real, mx_inline_real_dup, float, FloatComplex) +OP_DUP_FCN (imag, mx_inline_imag_dup, float, FloatComplex) +OP_DUP_FCN (conj, mx_inline_conj_dup, FloatComplex, FloatComplex) + // Avoid some code duplication. Maybe we should use templates. #define MX_CUMULATIVE_OP(RET_TYPE, ELT_TYPE, OP) \ diff -r 45f5faba05a2 -r 82be108cc558 liboctave/mx-op-defs.h --- a/liboctave/mx-op-defs.h Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/mx-op-defs.h Sun Apr 27 22:34:17 2008 +0200 @@ -1003,16 +1003,8 @@ \ for (int j = 0; j < len; j++) \ { \ - if (dm.elem(j, j) == 1.0) \ - { \ - for (int i = 0; i < m_nr; i++) \ - r.elem(i, j) = m.elem(i, j); \ - } \ - else \ - { \ - for (int i = 0; i < m_nr; i++) \ - r.elem(i, j) = dm.elem(j, j) * m.elem(i, j); \ - } \ + for (int i = 0; i < m_nr; i++) \ + r.elem(i, j) = dm.elem(j, j) * m.elem(i, j); \ } \ } \ } \ @@ -1091,16 +1083,8 @@ \ for (int i = 0; i < len; i++) \ { \ - if (dm.elem(i, i) == 1.0) \ - { \ - for (int j = 0; j < m_nc; j++) \ - r.elem(i, j) = m.elem(i, j); \ - } \ - else \ - { \ - for (int j = 0; j < m_nc; j++) \ - r.elem(i, j) = dm.elem(i, i) * m.elem(i, j); \ - } \ + for (int j = 0; j < m_nc; j++) \ + r.elem(i, j) = dm.elem(i, i) * m.elem(i, j); \ } \ } \ } \ diff -r 45f5faba05a2 -r 82be108cc558 liboctave/mx-ops --- a/liboctave/mx-ops Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/mx-ops Sun Apr 27 22:34:17 2008 +0200 @@ -26,6 +26,10 @@ # M: matrix # DM: diagonal matrix # ND: N-d array +# FS: scalar +# FM: matrix +# FDM: diagonal matrix +# FND: N-d array # # core-type is only used for the octave_int types, and is the template # parameter: octave_int8 is octave_int @@ -42,6 +46,14 @@ m Matrix M dMatrix.h YES 0.0 nda NDArray ND dNDArray.h YES 0.0 s double S NONE NO 0.0 +fcdm FloatComplexDiagMatrix DM fCDiagMatrix.h YES static_cast(0.0) +fcm FloatComplexMatrix M fCMatrix.h YES static_cast(0.0) +fcnda FloatComplexNDArray ND fCNDArray.h YES static_cast(0.0) +fcs FloatComplex S oct-cmplx.h NO static_cast(0.0) +fdm FloatDiagMatrix DM fDiagMatrix.h YES static_cast(0.0) +fm FloatMatrix M fMatrix.h YES static_cast(0.0) +fnda FloatNDArray ND fNDArray.h YES static_cast(0.0) +fs float S NONE NO static_cast(0.0) i8 octave_int8 S oct-inttypes.h YES octave_int8(0) int8_t ui8 octave_uint8 S oct-inttypes.h YES octave_uint8(0) uint8_t i16 octave_int16 S oct-inttypes.h YES octave_int16(0) int16_t @@ -98,6 +110,37 @@ m m dm B m s dm B # +fcdm fcdm fdm B +fcdm fdm fcdm B +fcm fcs fcdm B +fcm fcs fdm B +fcm fcs fm BCL real NONE boolMatrix.h +fcnda fcs fnda BCL real NONE boolMatrix.h boolNDArray.h +fcm fcdm fcs B +fcm fcdm fcm B +fcm fcdm fm B +fcm fcdm fs B +fcm fcm fcdm B +fcm fcm fdm B +fcm fcm fm BCL real NONE boolMatrix.h +fcnda fcnda fnda BCL real NONE boolMatrix.h boolNDArray.h +fcm fcm fs BCL real NONE boolMatrix.h +fcnda fcnda fs BCL real NONE boolMatrix.h boolNDArray.h +fcm fdm fcs B +fcm fdm fcm B +fcm fm fcs BCL NONE real boolMatrix.h +fcnda fnda fcs BCL NONE real boolMatrix.h boolNDArray.h +fcm fm fcdm B +fcm fm fcm BCL NONE real boolMatrix.h +fcnda fnda fcnda BCL NONE real boolMatrix.h boolNDArray.h +fcm fs fcdm B +fcm fs fcm BCL NONE real boolMatrix.h +fcnda fs fcnda BCL NONE real boolMatrix.h boolNDArray.h +fm fdm fm B +fm fdm fs B +fm fm fdm B +fm fs fdm B +# i8nda s i8nda BCL NONE NONE boolMatrix.h boolNDArray.h i8nda i8nda s BCL NONE NONE boolMatrix.h boolNDArray.h ui8nda s ui8nda BCL NONE NONE boolMatrix.h boolNDArray.h @@ -114,6 +157,22 @@ i64nda i64nda s CL NONE NONE boolMatrix.h boolNDArray.h ui64nda s ui64nda CL NONE NONE boolMatrix.h boolNDArray.h ui64nda ui64nda s CL NONE NONE boolMatrix.h boolNDArray.h +i8nda fs i8nda BCL NONE NONE boolMatrix.h boolNDArray.h +i8nda i8nda fs BCL NONE NONE boolMatrix.h boolNDArray.h +ui8nda fs ui8nda BCL NONE NONE boolMatrix.h boolNDArray.h +ui8nda ui8nda fs BCL NONE NONE boolMatrix.h boolNDArray.h +i16nda fs i16nda BCL NONE NONE boolMatrix.h boolNDArray.h +i16nda i16nda fs BCL NONE NONE boolMatrix.h boolNDArray.h +ui16nda fs ui16nda BCL NONE NONE boolMatrix.h boolNDArray.h +ui16nda ui16nda fs BCL NONE NONE boolMatrix.h boolNDArray.h +i32nda fs i32nda BCL NONE NONE boolMatrix.h boolNDArray.h +i32nda i32nda fs BCL NONE NONE boolMatrix.h boolNDArray.h +ui32nda fs ui32nda BCL NONE NONE boolMatrix.h boolNDArray.h +ui32nda ui32nda fs BCL NONE NONE boolMatrix.h boolNDArray.h +i64nda fs i64nda CL NONE NONE boolMatrix.h boolNDArray.h +i64nda i64nda fs CL NONE NONE boolMatrix.h boolNDArray.h +ui64nda fs ui64nda CL NONE NONE boolMatrix.h boolNDArray.h +ui64nda ui64nda fs CL NONE NONE boolMatrix.h boolNDArray.h # i8nda nda i8 BCL NONE NONE boolMatrix.h boolNDArray.h i8nda i8 nda BCL NONE NONE boolMatrix.h boolNDArray.h @@ -131,6 +190,22 @@ i64nda i64 nda CL NONE NONE boolMatrix.h boolNDArray.h ui64nda nda ui64 CL NONE NONE boolMatrix.h boolNDArray.h ui64nda ui64 nda CL NONE NONE boolMatrix.h boolNDArray.h +i8nda fnda i8 BCL NONE NONE boolMatrix.h boolNDArray.h +i8nda i8 fnda BCL NONE NONE boolMatrix.h boolNDArray.h +ui8nda fnda ui8 BCL NONE NONE boolMatrix.h boolNDArray.h +ui8nda ui8 fnda BCL NONE NONE boolMatrix.h boolNDArray.h +i16nda fnda i16 BCL NONE NONE boolMatrix.h boolNDArray.h +i16nda i16 fnda BCL NONE NONE boolMatrix.h boolNDArray.h +ui16nda fnda ui16 BCL NONE NONE boolMatrix.h boolNDArray.h +ui16nda ui16 fnda BCL NONE NONE boolMatrix.h boolNDArray.h +i32nda fnda i32 BCL NONE NONE boolMatrix.h boolNDArray.h +i32nda i32 fnda BCL NONE NONE boolMatrix.h boolNDArray.h +ui32nda fnda ui32 BCL NONE NONE boolMatrix.h boolNDArray.h +ui32nda ui32 fnda BCL NONE NONE boolMatrix.h boolNDArray.h +i64nda fnda i64 CL NONE NONE boolMatrix.h boolNDArray.h +i64nda i64 fnda CL NONE NONE boolMatrix.h boolNDArray.h +ui64nda fnda ui64 CL NONE NONE boolMatrix.h boolNDArray.h +ui64nda ui64 fnda CL NONE NONE boolMatrix.h boolNDArray.h # i8nda nda i8nda BCL NONE NONE boolMatrix.h boolNDArray.h i8nda i8nda nda BCL NONE NONE boolMatrix.h boolNDArray.h @@ -148,6 +223,22 @@ i64nda i64nda nda CL NONE NONE boolMatrix.h boolNDArray.h ui6nda nda ui64nda CL NONE NONE boolMatrix.h boolNDArray.h ui64nda ui64nda nda CL NONE NONE boolMatrix.h boolNDArray.h +i8nda fnda i8nda BCL NONE NONE boolMatrix.h boolNDArray.h +i8nda i8nda fnda BCL NONE NONE boolMatrix.h boolNDArray.h +ui8nda fnda ui8nda BCL NONE NONE boolMatrix.h boolNDArray.h +ui8nda ui8nda fnda BCL NONE NONE boolMatrix.h boolNDArray.h +i16nda fnda i16nda BCL NONE NONE boolMatrix.h boolNDArray.h +i16nda i16nda fnda BCL NONE NONE boolMatrix.h boolNDArray.h +ui16nda fnda ui16nda BCL NONE NONE boolMatrix.h boolNDArray.h +ui16nda ui16nda fnda BCL NONE NONE boolMatrix.h boolNDArray.h +i32nda fnda i32nda BCL NONE NONE boolMatrix.h boolNDArray.h +i32nda i32nda fnda BCL NONE NONE boolMatrix.h boolNDArray.h +ui32nda fnda ui32nda BCL NONE NONE boolMatrix.h boolNDArray.h +ui32nda ui32nda fnda BCL NONE NONE boolMatrix.h boolNDArray.h +i64nda fnda i64nda CL NONE NONE boolMatrix.h boolNDArray.h +i64nda i64nda fnda CL NONE NONE boolMatrix.h boolNDArray.h +ui6nda fnda ui64nda CL NONE NONE boolMatrix.h boolNDArray.h +ui64nda ui64nda fnda CL NONE NONE boolMatrix.h boolNDArray.h # x i8nda ui8 CL NONE NONE boolMatrix.h boolNDArray.h x i8nda i16 CL NONE NONE boolMatrix.h boolNDArray.h diff -r 45f5faba05a2 -r 82be108cc558 liboctave/oct-cmplx.h --- a/liboctave/oct-cmplx.h Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/oct-cmplx.h Sun Apr 27 22:34:17 2008 +0200 @@ -27,6 +27,7 @@ #include typedef std::complex Complex; +typedef std::complex FloatComplex; #endif diff -r 45f5faba05a2 -r 82be108cc558 liboctave/oct-fftw.cc --- a/liboctave/oct-fftw.cc Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/oct-fftw.cc Sun Apr 27 22:34:17 2008 +0200 @@ -340,10 +340,298 @@ return *cur_plan_p; } -octave_fftw_planner fftw_planner; + +octave_float_fftw_planner::octave_float_fftw_planner (void) +{ + meth = ESTIMATE; + + plan[0] = plan[1] = 0; + d[0] = d[1] = s[0] = s[1] = r[0] = r[1] = h[0] = h[1] = 0; + simd_align[0] = simd_align[1] = false; + inplace[0] = inplace[1] = false; + n[0] = n[1] = dim_vector (); + + rplan = 0; + rd = rs = rr = rh = 0; + rsimd_align = false; + rn = dim_vector (); + + // If we have a system wide wisdom file, import it. + fftwf_import_system_wisdom (); +} + +octave_float_fftw_planner::FftwMethod +octave_float_fftw_planner::method (void) +{ + return meth; +} + +octave_float_fftw_planner::FftwMethod +octave_float_fftw_planner::method (FftwMethod _meth) +{ + FftwMethod ret = meth; + if (_meth == ESTIMATE || _meth == MEASURE || + _meth == PATIENT || _meth == EXHAUSTIVE || + _meth == HYBRID) + { + if (meth != _meth) + { + meth = _meth; + if (rplan) + fftwf_destroy_plan (rplan); + if (plan[0]) + fftwf_destroy_plan (plan[0]); + if (plan[1]) + fftwf_destroy_plan (plan[1]); + rplan = plan[0] = plan[1] = 0; + } + } + else + ret = UNKNOWN; + return ret; +} + +fftwf_plan +octave_float_fftw_planner::create_plan (int dir, const int rank, + const dim_vector dims, octave_idx_type howmany, + octave_idx_type stride, octave_idx_type dist, + const FloatComplex *in, FloatComplex *out) +{ + int which = (dir == FFTW_FORWARD) ? 0 : 1; + fftwf_plan *cur_plan_p = &plan[which]; + bool create_new_plan = false; + bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out); + bool ioinplace = (in == out); + + // Don't create a new plan if we have a non SIMD plan already but + // can do SIMD. This prevents endlessly recreating plans if we + // change the alignment. + + if (plan[which] == 0 || d[which] != dist || s[which] != stride + || r[which] != rank || h[which] != howmany + || ioinplace != inplace[which] + || ((ioalign != simd_align[which]) ? !ioalign : false)) + create_new_plan = true; + else + { + // We still might not have the same shape of array. + + for (int i = 0; i < rank; i++) + if (dims(i) != n[which](i)) + { + create_new_plan = true; + break; + } + } + + if (create_new_plan) + { + d[which] = dist; + s[which] = stride; + r[which] = rank; + h[which] = howmany; + simd_align[which] = ioalign; + inplace[which] = ioinplace; + n[which] = dims; + + // Note reversal of dimensions for column major storage in FFTW. + octave_idx_type nn = 1; + OCTAVE_LOCAL_BUFFER (int, tmp, rank); + + for (int i = 0, j = rank-1; i < rank; i++, j--) + { + tmp[i] = dims(j); + nn *= dims(j); + } + + int plan_flags = 0; + bool plan_destroys_in = true; + + switch (meth) + { + case UNKNOWN: + case ESTIMATE: + plan_flags |= FFTW_ESTIMATE; + plan_destroys_in = false; + break; + case MEASURE: + plan_flags |= FFTW_MEASURE; + break; + case PATIENT: + plan_flags |= FFTW_PATIENT; + break; + case EXHAUSTIVE: + plan_flags |= FFTW_EXHAUSTIVE; + break; + case HYBRID: + if (nn < 8193) + plan_flags |= FFTW_MEASURE; + else + { + plan_flags |= FFTW_ESTIMATE; + plan_destroys_in = false; + } + break; + } + + if (ioalign) + plan_flags &= ~FFTW_UNALIGNED; + else + plan_flags |= FFTW_UNALIGNED; + + if (*cur_plan_p) + fftwf_destroy_plan (*cur_plan_p); + if (plan_destroys_in) + { + // Create matrix with the same size and 16-byte alignment as input + OCTAVE_LOCAL_BUFFER (FloatComplex, itmp, nn * howmany + 32); + itmp = reinterpret_cast + (((reinterpret_cast(itmp) + 15) & ~ 0xF) + + ((reinterpret_cast (in)) & 0xF)); + + *cur_plan_p = + fftwf_plan_many_dft (rank, tmp, howmany, + reinterpret_cast (itmp), + 0, stride, dist, reinterpret_cast (out), + 0, stride, dist, dir, plan_flags); + } + else + { + *cur_plan_p = + fftwf_plan_many_dft (rank, tmp, howmany, + reinterpret_cast (const_cast (in)), + 0, stride, dist, reinterpret_cast (out), + 0, stride, dist, dir, plan_flags); + } + + if (*cur_plan_p == 0) + (*current_liboctave_error_handler) ("Error creating fftw plan"); + } + + return *cur_plan_p; +} + +fftwf_plan +octave_float_fftw_planner::create_plan (const int rank, const dim_vector dims, + octave_idx_type howmany, octave_idx_type stride, octave_idx_type dist, + const float *in, FloatComplex *out) +{ + fftwf_plan *cur_plan_p = &rplan; + bool create_new_plan = false; + bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out); + + // Don't create a new plan if we have a non SIMD plan already but + // can do SIMD. This prevents endlessly recreating plans if we + // change the alignment. + + if (rplan == 0 || rd != dist || rs != stride || rr != rank + || rh != howmany || ((ioalign != rsimd_align) ? !ioalign : false)) + create_new_plan = true; + else + { + // We still might not have the same shape of array. + + for (int i = 0; i < rank; i++) + if (dims(i) != rn(i)) + { + create_new_plan = true; + break; + } + } + + if (create_new_plan) + { + rd = dist; + rs = stride; + rr = rank; + rh = howmany; + rsimd_align = ioalign; + rn = dims; + + // Note reversal of dimensions for column major storage in FFTW. + octave_idx_type nn = 1; + OCTAVE_LOCAL_BUFFER (int, tmp, rank); + + for (int i = 0, j = rank-1; i < rank; i++, j--) + { + tmp[i] = dims(j); + nn *= dims(j); + } + + int plan_flags = 0; + bool plan_destroys_in = true; + + switch (meth) + { + case UNKNOWN: + case ESTIMATE: + plan_flags |= FFTW_ESTIMATE; + plan_destroys_in = false; + break; + case MEASURE: + plan_flags |= FFTW_MEASURE; + break; + case PATIENT: + plan_flags |= FFTW_PATIENT; + break; + case EXHAUSTIVE: + plan_flags |= FFTW_EXHAUSTIVE; + break; + case HYBRID: + if (nn < 8193) + plan_flags |= FFTW_MEASURE; + else + { + plan_flags |= FFTW_ESTIMATE; + plan_destroys_in = false; + } + break; + } + + if (ioalign) + plan_flags &= ~FFTW_UNALIGNED; + else + plan_flags |= FFTW_UNALIGNED; + + if (*cur_plan_p) + fftwf_destroy_plan (*cur_plan_p); + + if (plan_destroys_in) + { + // Create matrix with the same size and 16-byte alignment as input + OCTAVE_LOCAL_BUFFER (float, itmp, nn + 32); + itmp = reinterpret_cast + (((reinterpret_cast(itmp) + 15) & ~ 0xF) + + ((reinterpret_cast (in)) & 0xF)); + + *cur_plan_p = + fftwf_plan_many_dft_r2c (rank, tmp, howmany, itmp, + 0, stride, dist, reinterpret_cast (out), + 0, stride, dist, plan_flags); + } + else + { + *cur_plan_p = + fftwf_plan_many_dft_r2c (rank, tmp, howmany, + (const_cast (in)), + 0, stride, dist, reinterpret_cast (out), + 0, stride, dist, plan_flags); + } + + if (*cur_plan_p == 0) + (*current_liboctave_error_handler) ("Error creating fftw plan"); + } + + return *cur_plan_p; +} + +octave_fftw_planner fftw_planner; +octave_float_fftw_planner float_fftw_planner; + +template static inline void -convert_packcomplex_1d (Complex *out, size_t nr, size_t nc, +convert_packcomplex_1d (T *out, size_t nr, size_t nc, octave_idx_type stride, octave_idx_type dist) { OCTAVE_QUIT; @@ -357,14 +645,15 @@ OCTAVE_QUIT; } +template static inline void -convert_packcomplex_Nd (Complex *out, const dim_vector &dv) +convert_packcomplex_Nd (T *out, const dim_vector &dv) { size_t nc = dv(0); size_t nr = dv(1); size_t np = (dv.length () > 2 ? dv.numel () / nc / nr : 1); size_t nrp = nr * np; - Complex *ptr1, *ptr2; + T *ptr1, *ptr2; OCTAVE_QUIT; @@ -409,7 +698,7 @@ for (size_t k = 0; k < jstart; k+= kstep) for (size_t l = nc/2+1; l < nc; l++) { - Complex tmp = out[i+ j + k + l]; + T tmp = out[i+ j + k + l]; out[i + j + k + l] = out[i + jj + k + l]; out[i + jj + k + l] = tmp; } @@ -427,10 +716,10 @@ dim_vector dv (npts); fftw_plan plan = fftw_planner.create_plan (1, dv, nsamples, stride, dist, - in, out); + in, out); fftw_execute_dft_r2c (plan, (const_cast(in)), - reinterpret_cast (out)); + reinterpret_cast (out)); // Need to create other half of the transform. @@ -545,6 +834,133 @@ return 0; } +int +octave_fftw::fft (const float *in, FloatComplex *out, size_t npts, + size_t nsamples, octave_idx_type stride, octave_idx_type dist) +{ + dist = (dist < 0 ? npts : dist); + + dim_vector dv (npts); + fftwf_plan plan = float_fftw_planner.create_plan (1, dv, nsamples, stride, dist, + in, out); + + fftwf_execute_dft_r2c (plan, (const_cast(in)), + reinterpret_cast (out)); + + // Need to create other half of the transform. + + convert_packcomplex_1d (out, nsamples, npts, stride, dist); + + return 0; +} + +int +octave_fftw::fft (const FloatComplex *in, FloatComplex *out, size_t npts, + size_t nsamples, octave_idx_type stride, octave_idx_type dist) +{ + dist = (dist < 0 ? npts : dist); + + dim_vector dv (npts); + fftwf_plan plan = float_fftw_planner.create_plan (FFTW_FORWARD, 1, dv, nsamples, + stride, dist, in, out); + + fftwf_execute_dft (plan, + reinterpret_cast (const_cast(in)), + reinterpret_cast (out)); + + return 0; +} + +int +octave_fftw::ifft (const FloatComplex *in, FloatComplex *out, size_t npts, + size_t nsamples, octave_idx_type stride, octave_idx_type dist) +{ + dist = (dist < 0 ? npts : dist); + + dim_vector dv (npts); + fftwf_plan plan = float_fftw_planner.create_plan (FFTW_BACKWARD, 1, dv, nsamples, + stride, dist, in, out); + + fftwf_execute_dft (plan, + reinterpret_cast (const_cast(in)), + reinterpret_cast (out)); + + const FloatComplex scale = npts; + for (size_t j = 0; j < nsamples; j++) + for (size_t i = 0; i < npts; i++) + out[i*stride + j*dist] /= scale; + + return 0; +} + +int +octave_fftw::fftNd (const float *in, FloatComplex *out, const int rank, + const dim_vector &dv) +{ + octave_idx_type dist = 1; + for (int i = 0; i < rank; i++) + dist *= dv(i); + + // Fool with the position of the start of the output matrix, so that + // creating other half of the matrix won't cause cache problems. + + octave_idx_type offset = (dv.numel () / dv(0)) * ((dv(0) - 1) / 2); + + fftwf_plan plan = float_fftw_planner.create_plan (rank, dv, 1, 1, dist, + in, out + offset); + + fftwf_execute_dft_r2c (plan, (const_cast(in)), + reinterpret_cast (out+ offset)); + + // Need to create other half of the transform. + + convert_packcomplex_Nd (out, dv); + + return 0; +} + +int +octave_fftw::fftNd (const FloatComplex *in, FloatComplex *out, const int rank, + const dim_vector &dv) +{ + octave_idx_type dist = 1; + for (int i = 0; i < rank; i++) + dist *= dv(i); + + fftwf_plan plan = float_fftw_planner.create_plan (FFTW_FORWARD, rank, dv, 1, 1, + dist, in, out); + + fftwf_execute_dft (plan, + reinterpret_cast (const_cast(in)), + reinterpret_cast (out)); + + return 0; +} + +int +octave_fftw::ifftNd (const FloatComplex *in, FloatComplex *out, const int rank, + const dim_vector &dv) +{ + octave_idx_type dist = 1; + for (int i = 0; i < rank; i++) + dist *= dv(i); + + fftwf_plan plan = float_fftw_planner.create_plan (FFTW_BACKWARD, rank, dv, 1, 1, + dist, in, out); + + fftwf_execute_dft (plan, + reinterpret_cast (const_cast(in)), + reinterpret_cast (out)); + + const size_t npts = dv.numel (); + const FloatComplex scale = npts; + for (size_t i = 0; i < npts; i++) + out[i] /= scale; + + return 0; +} + + #endif /* diff -r 45f5faba05a2 -r 82be108cc558 liboctave/oct-fftw.h --- a/liboctave/oct-fftw.h Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/oct-fftw.h Sun Apr 27 22:34:17 2008 +0200 @@ -106,8 +106,86 @@ bool rsimd_align; }; +class +OCTAVE_API +octave_float_fftw_planner +{ +public: + + octave_float_fftw_planner (void); + + fftwf_plan create_plan (int dir, const int rank, const dim_vector dims, + octave_idx_type howmany, octave_idx_type stride, octave_idx_type dist, + const FloatComplex *in, FloatComplex *out); + + fftwf_plan create_plan (const int rank, const dim_vector dims, + octave_idx_type howmany, octave_idx_type stride, octave_idx_type dist, + const float *in, FloatComplex *out); + + enum FftwMethod { + UNKNOWN = -1, + ESTIMATE, + MEASURE, + PATIENT, + EXHAUSTIVE, + HYBRID + }; + + FftwMethod method (void); + + FftwMethod method (FftwMethod _meth); + +private: + + FftwMethod meth; + + // FIXME -- perhaps this should be split into two classes? + + // Plan for fft and ifft of complex values + fftwf_plan plan[2]; + + // dist + octave_idx_type d[2]; + + // stride + octave_idx_type s[2]; + + // rank + int r[2]; + + // howmany + octave_idx_type h[2]; + + // dims + dim_vector n[2]; + + bool simd_align[2]; + bool inplace[2]; + + // Plan for fft of real values + fftwf_plan rplan; + + // dist + octave_idx_type rd; + + // stride + octave_idx_type rs; + + // rank + int rr; + + // howmany + octave_idx_type rh; + + // dims + dim_vector rn; + + bool rsimd_align; +}; + // FIXME -- maybe octave_fftw_planner should be a singleton object? extern OCTAVE_API octave_fftw_planner fftw_planner; +extern OCTAVE_API octave_float_fftw_planner float_fftw_planner; class OCTAVE_API @@ -127,6 +205,19 @@ static int ifftNd (const Complex*, Complex*, const int, const dim_vector &); + static int fft (const float *in, FloatComplex *out, size_t npts, + size_t nsamples = 1, octave_idx_type stride = 1, octave_idx_type dist = -1); + static int fft (const FloatComplex *in, FloatComplex *out, size_t npts, + size_t nsamples = 1, octave_idx_type stride = 1, octave_idx_type dist = -1); + static int ifft (const FloatComplex *in, FloatComplex *out, size_t npts, + size_t nsamples = 1, octave_idx_type stride = 1, octave_idx_type dist = -1); + + static int fftNd (const float*, FloatComplex*, const int, const dim_vector &); + static int fftNd (const FloatComplex*, FloatComplex*, const int, + const dim_vector &); + static int ifftNd (const FloatComplex*, FloatComplex*, const int, + const dim_vector &); + private: octave_fftw (); octave_fftw (const octave_fftw&); diff -r 45f5faba05a2 -r 82be108cc558 liboctave/oct-inttypes.h --- a/liboctave/oct-inttypes.h Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/oct-inttypes.h Sun Apr 27 22:34:17 2008 +0200 @@ -132,7 +132,7 @@ inline T octave_int_fit_to_range (const double& x, const T& mn, const T& mx) { - return (lo_ieee_isnan (x) ? 0 : (x > mx ? mx : (x < mn ? mn : static_cast (x)))); + return (__lo_ieee_isnan (x) ? 0 : (x > mx ? mx : (x < mn ? mn : static_cast (x)))); } // If X is unsigned and the new type is signed, then we only have to @@ -450,7 +450,7 @@ { double tb = static_cast (b.value ()); double r = pow (a, tb); - r = lo_ieee_isnan (r) ? 0 : xround (r); + r = __lo_ieee_isnan (r) ? 0 : xround (r); return OCTAVE_INT_FIT_TO_RANGE (r, T); } @@ -460,7 +460,7 @@ { double ta = static_cast (a.value ()); double r = pow (ta, b); - r = lo_ieee_isnan (r) ? 0 : xround (r); + r = __lo_ieee_isnan (r) ? 0 : xround (r); return OCTAVE_INT_FIT_TO_RANGE (r, T); } @@ -524,7 +524,7 @@ { \ double tx = static_cast (x.value ()); \ double r = xround (tx OP y); \ - r = lo_ieee_isnan (r) ? 0 : xround (r); \ + r = __lo_ieee_isnan (r) ? 0 : xround (r); \ return OCTAVE_INT_FIT_TO_RANGE (r, T); \ } @@ -540,7 +540,7 @@ { \ double ty = static_cast (y.value ()); \ double r = x OP ty; \ - r = lo_ieee_isnan (r) ? 0 : xround (r); \ + r = __lo_ieee_isnan (r) ? 0 : xround (r); \ return OCTAVE_INT_FIT_TO_RANGE (r, T); \ } @@ -581,6 +581,70 @@ OCTAVE_DOUBLE_INT_CMP_OP (==) OCTAVE_DOUBLE_INT_CMP_OP (!=) +#define OCTAVE_INT_FLOAT_BIN_OP(OP) \ + template \ + octave_int \ + operator OP (const octave_int& x, float y) \ + { \ + double tx = static_cast (x.value ()); \ + double r = xround (tx OP y); \ + r = __lo_ieee_isnan (r) ? 0 : xround (r); \ + return OCTAVE_INT_FIT_TO_RANGE (r, T); \ + } + +OCTAVE_INT_FLOAT_BIN_OP(+) +OCTAVE_INT_FLOAT_BIN_OP(-) +OCTAVE_INT_FLOAT_BIN_OP(*) +OCTAVE_INT_FLOAT_BIN_OP(/) + +#define OCTAVE_FLOAT_INT_BIN_OP(OP) \ + template \ + octave_int \ + operator OP (float x, const octave_int& y) \ + { \ + double ty = static_cast (y.value ()); \ + double r = x OP ty; \ + r = __lo_ieee_isnan (r) ? 0 : xround (r); \ + return OCTAVE_INT_FIT_TO_RANGE (r, T); \ + } + +OCTAVE_FLOAT_INT_BIN_OP(+) +OCTAVE_FLOAT_INT_BIN_OP(-) +OCTAVE_FLOAT_INT_BIN_OP(*) +OCTAVE_FLOAT_INT_BIN_OP(/) + +#define OCTAVE_INT_FLOAT_CMP_OP(OP) \ + template \ + bool \ + operator OP (const octave_int& x, const float& y) \ + { \ + double tx = static_cast (x.value ()); \ + return tx OP y; \ + } + +OCTAVE_INT_FLOAT_CMP_OP (<) +OCTAVE_INT_FLOAT_CMP_OP (<=) +OCTAVE_INT_FLOAT_CMP_OP (>=) +OCTAVE_INT_FLOAT_CMP_OP (>) +OCTAVE_INT_FLOAT_CMP_OP (==) +OCTAVE_INT_FLOAT_CMP_OP (!=) + +#define OCTAVE_FLOAT_INT_CMP_OP(OP) \ + template \ + bool \ + operator OP (const float& x, const octave_int& y) \ + { \ + double ty = static_cast (y.value ()); \ + return x OP ty; \ + } + +OCTAVE_FLOAT_INT_CMP_OP (<) +OCTAVE_FLOAT_INT_CMP_OP (<=) +OCTAVE_FLOAT_INT_CMP_OP (>=) +OCTAVE_FLOAT_INT_CMP_OP (>) +OCTAVE_FLOAT_INT_CMP_OP (==) +OCTAVE_FLOAT_INT_CMP_OP (!=) + #define OCTAVE_INT_BITCMP_OP(OP) \ template \ octave_int \ diff -r 45f5faba05a2 -r 82be108cc558 liboctave/vx-ops --- a/liboctave/vx-ops Wed May 14 18:09:56 2008 +0200 +++ b/liboctave/vx-ops Sun Apr 27 22:34:17 2008 +0200 @@ -23,6 +23,12 @@ cv ColumnVector V dColVector.h YES 0.0 rv RowVector V dRowVector.h YES 0.0 s double S NONE NO 0.0 +fccv FloatComplexColumnVector V fCColVector.h YES 0.0 +fcrv FloatComplexRowVector V fCRowVector.h YES 0.0 +fcs FloatComplex S oct-cmplx.h NO 0.0 +fcv FloatColumnVector V fColVector.h YES 0.0 +frv FloatRowVector V fRowVector.h YES 0.0 +fs float S NONE NO 0.0 # ops ccv ccv cv B ccv ccv s B @@ -36,3 +42,15 @@ crv rv cs B ccv s ccv B crv s crv B +fccv fccv fcv B +fccv fccv fs B +fcrv fcrv frv B +fcrv fcrv fs B +fccv fcs fcv B +fcrv fcs frv B +fccv fcv fccv B +fccv fcv fcs B +fcrv frv fcrv B +fcrv frv fcs B +fccv fs fccv B +fcrv fs fcrv B diff -r 45f5faba05a2 -r 82be108cc558 scripts/ChangeLog --- a/scripts/ChangeLog Wed May 14 18:09:56 2008 +0200 +++ b/scripts/ChangeLog Sun Apr 27 22:34:17 2008 +0200 @@ -1,3 +1,8 @@ +2008-05-20 David Bateman + + * miscellaneous/single.m: Remove. + * Makefile.in (SOURCES): Remove it here as well. + 2008-05-20 David Bateman * general/interp1q.m: New function. diff -r 45f5faba05a2 -r 82be108cc558 scripts/elfun/asec.m --- a/scripts/elfun/asec.m Wed May 14 18:09:56 2008 +0200 +++ b/scripts/elfun/asec.m Sun Apr 27 22:34:17 2008 +0200 @@ -44,4 +44,3 @@ %!error asec (); %!error asec (1, 2); - diff -r 45f5faba05a2 -r 82be108cc558 scripts/miscellaneous/Makefile.in --- a/scripts/miscellaneous/Makefile.in Wed May 14 18:09:56 2008 +0200 +++ b/scripts/miscellaneous/Makefile.in Sun Apr 27 22:34:17 2008 +0200 @@ -40,7 +40,7 @@ info.m inputname.m ismac.m ispc.m isunix.m license.m list_primes.m ls.m \ ls_command.m menu.m mex.m mexext.m mkoctfile.m movefile.m \ namelengthmax.m news.m orderfields.m pack.m paren.m parseparams.m perl.m\ - run.m semicolon.m setfield.m single.m substruct.m swapbytes.m symvar.m \ + run.m semicolon.m setfield.m substruct.m swapbytes.m symvar.m \ tar.m tempdir.m tempname.m texas_lotto.m unix.m unpack.m untar.m \ unzip.m ver.m version.m warning_ids.m what.m xor.m zip.m diff -r 45f5faba05a2 -r 82be108cc558 scripts/miscellaneous/single.m --- a/scripts/miscellaneous/single.m Wed May 14 18:09:56 2008 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,36 +0,0 @@ -## Copyright (C) 2005, 2006, 2007 John W. Eaton -## -## This file is part of Octave. -## -## Octave is free software; you can redistribute it and/or modify it -## under the terms of the GNU General Public License as published by -## the Free Software Foundation; either version 3 of the License, or (at -## your option) any later version. -## -## Octave is distributed in the hope that it will be useful, but -## WITHOUT ANY WARRANTY; without even the implied warranty of -## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -## General Public License for more details. -## -## You should have received a copy of the GNU General Public License -## along with Octave; see the file COPYING. If not, see -## . - -## -*- texinfo -*- -## @deftypefn {Function File} {} single (@var{val}) -## Convert the numeric value @var{val} to single precision. -## -## @strong{Note}: this function currently returns its argument converted -## to double precision because Octave does not yet have a single-precision -## numeric data type. -## @end deftypefn - -function retval = single (val) - - if (nargin == 1 && isnumeric (val)) - retval = double(val); - else - print_usage (); - endif - -endfunction diff -r 45f5faba05a2 -r 82be108cc558 src/ChangeLog --- a/src/ChangeLog Wed May 14 18:09:56 2008 +0200 +++ b/src/ChangeLog Sun Apr 27 22:34:17 2008 +0200 @@ -1,3 +1,123 @@ +2008-05-20 David Bateman + + * data.cc (Flog2): Handle single precision. + * ov-float.h, ov.float.cc, ov-flt-complex.h, ov-flt-complex.cc, + ov-flt-re-mat.h, ov-flt-re-mat.cc, ov-flt-cx-mat.h, + ov-flt-cx-mat.cc: Provide single precision version of log2 mapper + function. + + * DLD-FUNCTIONS/__convn__.cc, DLD-FUNCTIONS/__pchip_deriv__.cc, + DLD-FUNCTIONS/besselj.cc, DLD-FUNCTIONS/betainc.cc, + DLD-FUNCTIONS/conv2.cc, DLD-FUNCTIONS/gammainc.cc, + DLD-FUNCTIONS/givens.cc, DLD-FUNCTIONS/kron.cc, + DLD-FUNCTIONS/lookup.cc, DLD-FUNCTIONS/syl.cc, data.cc: + Prefer demotion to single precision rather than promotion to double. + + * ov-float.cc, ov-float.h, ov-flt-complex.cc, ov-flt-complex.h, + ov-flt-cx-mat.cc, ov-flt-cx-mat.h, ov-flt-re-mat.cc, + ov-flt-re-mat.h (numeric_conversion_function (void) const): + Remove method. + + * ov-complex.cc, ov-complex.h, ov-cx-mat.cc, ov-cx-mat.h, + ov-re-mat.cc, ov-re-mat.h, ov-scalar.cc, ov-scalar.h + (numeric_conversion_function (void) const): Add method for + conversion to single precision. + + * DLD-FUNCTIONS/conv2.cc (Fconv2): Don't access third arg if we + don't have one. + + * DLD-FUNCTIONS/balance.cc, DLD-FUNCTIONS/expm.cc, + DLD-FUNCTIONS/find.cc, DLD-FUNCTIONS/hess.cc, + DLD-FUNCTIONS/qr.cc: COnvert for use with single precision. + + * OPERATORS/op-int.h, OPERATORS/op-int-conv.cc, + OPERATORS/op-int-concat.cc: Adapt for single precision. + + * OPERATORS/op-i8-i8.cc, OPERATORS/op-i16-i16.cc, + OPERATORS/op-i32-i32.cc, OPERATORS/op-i64-i64.cc, + OPERATORS/op-ui8-ui8.cc, OPERATORS/op-ui16-ui16.cc, + OPERATORS/op-ui32-ui32.cc, OPERATORS/op-ui64-ui64.cc: + Add includes for single precision types. + + * OPERATORS/op-b-b.cc, OPERATORS/op-b-bm.cc, OPERATORS/op-bm-b.cc, + OPERATORS/op-fcm-fs.cc, OPERATORS/op-fcs-fs.cc, + OPERATORS/op-fm-fs.cc, OPERATORS/op-fs-fcm.cc, + OPERATORS/op-fs-fcs.cc, OPERATORS/op-fs-fm.cc, + OPERATORS/op-fs-fs.cc, OPERATORS/op-int.h, ov.cc, ov-scalar.cc, + ov-float.h, ov-flt-complex.cc, ov-float.cc, ov-flt-re-mat.cc, + ov-flt-cx-mat.cc: Replace octave_float with octave_scalar_float + + * OPERATORS/op-fm-fm.cc, OPERATORS/op-fm-fs.cc, + OPERATORS/op-fm-fcm.cc, OPERATORS/op-fm-fcs.cc, + OPERATORS/op-fs-fm.cc, OPERATORS/op-fs-fs.cc, + OPERATORS/op-fs-fcm.cc, OPERATORS/op-fs-fcs.cc, + OPERATORS/op-fcm-fm.cc, OPERATORS/op-fcm-fs.cc, + OPERATORS/op-fcm-fcm.cc, OPERATORS/op-fcm-fcs.cc, + OPERATORS/op-fcs-fm.cc, OPERATORS/op-fcs-fs.cc, + OPERATORS/op-fcs-fcm.cc, OPERATORS/op-fcs-fcs.cc, + OPERATORS/op-m-m.cc, OPERATORS/op-m-s.cc, + OPERATORS/op-m-cm.cc, OPERATORS/op-m-cs.cc, + OPERATORS/op-s-m.cc, OPERATORS/op-s-s.cc, + OPERATORS/op-s-cm.cc, OPERATORS/op-s-cs.cc, + OPERATORS/op-cm-m.cc, OPERATORS/op-cm-s.cc, + OPERATORS/op-cm-cm.cc, OPERATORS/op-cm-cs.cc, + OPERATORS/op-cs-m.cc, OPERATORS/op-cs-s.cc, + OPERATORS/op-cs-cm.cc, OPERATORS/op-cs-cs.cc: + Add mixed single/double assign operators. + + * ov.h (numeric_demotion_function): New method for double to + single demotion. + * ov-base.h (numeric_demotion_function): Declare virtual version. + + * ov-complex.cc, ov-complex.h, ov-cx-mat.cc, ov-cx-mat.h, + ov-re-mat.cc, ov-re-mat.h, ov-scalar.cc, ov-scalar.h + (numeric_cdemote_function (void) const): Add method for + conversion to single precision renamed from the method + numeric_conversion_function + + * ov.cc (do_binary_op): Use demotion function seperately than the + numeric conversion function so as to avoid isses like + a=zeros(2,2);a(1,:)=1:2. + + * OPERATORS/op-fcm-fcm.cc, OPERATORS/op-fcm-fcs.cc, + OPERATORS/op-fcm-fm.cc, OPERATORS/op-fcm-fs.cc, + OPERATORS/op-fcs-fcm.cc, OPERATORS/op-fcs-fcs.cc, + OPERATORS/op-fcs-fm.cc, OPERATORS/op-fcs-fs.cc, + OPERATORS/op-fm-fcm.cc, OPERATORS/op-fm-fcs.cc, + OPERATORS/op-fm-fm.cc, OPERATORS/op-fm-fs.cc, + OPERATORS/op-fs-fcm.cc, OPERATORS/op-fs-fcs.cc, + OPERATORS/op-fs-fm.cc, OPERATORS/op-fs-fs.cc, ov-float.cc, + ov-float.h, ov-flt-complex.cc, ov-flt-complex.h, ov-flt-cx-mat.cc, + ov-flt-cx-mat.h, ov-flt-re-mat.cc, ov-flt-re-mat.h: New files. + * Makefile.in (OV_INCLUDES, OV_SRC, OP_XSRC. FLOAT_OP_XSRC, + DOUBLE_OP_XSRC): Add them here. + + * DLD-FUNCTIONS/__convn__.cc, DLD-FUNCTIONS/__lin_interpn__.cc, + DLD-FUNCTIONS/__pchip_deriv__.cc, DLD-FUNCTIONS/besselj.cc, + DLD-FUNCTIONS/betainc.cc, DLD-FUNCTIONS/bsxfun.cc, + DLD-FUNCTIONS/chol.cc, DLD-FUNCTIONS/conv2.cc, + DLD-FUNCTIONS/det.cc, DLD-FUNCTIONS/eig.cc, DLD-FUNCTIONS/fft.cc, + DLD-FUNCTIONS/fft2.cc, DLD-FUNCTIONS/fftn.cc, + DLD-FUNCTIONS/fftw.cc, DLD-FUNCTIONS/filter.cc, + DLD-FUNCTIONS/gammainc.cc, DLD-FUNCTIONS/givens.cc, + DLD-FUNCTIONS/inv.cc, DLD-FUNCTIONS/kron.cc, + DLD-FUNCTIONS/lookup.cc, DLD-FUNCTIONS/lu.cc, + DLD-FUNCTIONS/matrix_type.cc, DLD-FUNCTIONS/max.cc, + DLD-FUNCTIONS/pinv.cc, DLD-FUNCTIONS/schur.cc, + DLD-FUNCTIONS/sqrtm.cc, DLD-FUNCTIONS/svd.cc, + DLD-FUNCTIONS/syl.cc, DLD-FUNCTIONS/symbfact.cc, + DLD-FUNCTIONS/typecast.cc, OPERATORS/op-b-b.cc, + OPERATORS/op-b-bm.cc, OPERATORS/op-bm-b.cc, OPERATORS/op-bm-bm.cc, + OPERATORS/op-cm-cm.cc, OPERATORS/op-cs-cs.cc, OPERATORS/op-m-m.cc, + OPERATORS/op-range.cc, OPERATORS/op-s-s.cc, bitfcns.cc, data.cc, + oct-stream.cc, ov-base.cc, ov-base.h, ov-bool-mat.cc, + ov-bool-mat.h, ov-bool.h, ov-ch-mat.cc, ov-ch-mat.h, + ov-complex.cc, ov-complex.h, ov-cx-mat.cc, ov-cx-mat.h, ov-intx.h, + ov-range.cc, ov-range.h, ov-re-mat.cc, ov-re-mat.h, ov-scalar.h, + ov.cc, ov.h, pr-output.cc, pr-output.h, pt-mat.cc, utils.cc, + utils.h, xdiv.cc, xdiv.h, xpow.cc, xpow.h: + Allow single precision types. + 2008-05-20 David Bateman * DLD-FUNCTIONS/rcond.cc: New function. diff -r 45f5faba05a2 -r 82be108cc558 src/DLD-FUNCTIONS/__convn__.cc --- a/src/DLD-FUNCTIONS/__convn__.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/DLD-FUNCTIONS/__convn__.cc Sun Apr 27 22:34:17 2008 +0200 @@ -53,6 +53,11 @@ OCTAVE_CONVN_TRAIT (NDArray, ComplexNDArray, ComplexNDArray); OCTAVE_CONVN_TRAIT (ComplexNDArray, ComplexNDArray, ComplexNDArray); +OCTAVE_CONVN_TRAIT (FloatNDArray, FloatNDArray, FloatNDArray); +OCTAVE_CONVN_TRAIT (FloatComplexNDArray, FloatNDArray, FloatComplexNDArray); +OCTAVE_CONVN_TRAIT (FloatNDArray, FloatComplexNDArray, FloatComplexNDArray); +OCTAVE_CONVN_TRAIT (FloatComplexNDArray, FloatComplexNDArray, FloatComplexNDArray); + // FIXME -- this function should maybe be available in liboctave? template octave_value @@ -132,50 +137,100 @@ if (args.length () == 2) { - if (args(0).is_real_type ()) + if (args(0).is_single_type() || args(1).is_single_type()) { - if (args(1).is_real_type ()) + if (args(0).is_real_type ()) { - const NDArray a = args (0).array_value (); - const NDArray b = args (1).array_value (); + if (args(1).is_real_type ()) + { + const FloatNDArray a = args (0).float_array_value (); + const FloatNDArray b = args (1).float_array_value (); - if (! error_state) - retval = convn (a, b); - } - else if (args(1).is_complex_type ()) - { - const NDArray a = args (0).array_value (); - const ComplexNDArray b = args (1).complex_array_value (); + if (! error_state) + retval = convn (a, b); + } + else if (args(1).is_complex_type ()) + { + const FloatNDArray a = args (0).float_array_value (); + const FloatComplexNDArray b = args (1).float_complex_array_value (); - if (! error_state) - retval = convn (a, b); + if (! error_state) + retval = convn (a, b); + } + else + error ("__convn__: invalid call"); } - else - error ("__convn__: invalid call"); - } - else if (args(0).is_complex_type ()) - { - if (args(1).is_complex_type ()) + else if (args(0).is_complex_type ()) { - const ComplexNDArray a = args (0).complex_array_value (); - const ComplexNDArray b = args (1).complex_array_value (); + if (args(1).is_complex_type ()) + { + const FloatComplexNDArray a = args (0).float_complex_array_value (); + const FloatComplexNDArray b = args (1).float_complex_array_value (); - if (! error_state) - retval = convn (a, b); - } - else if (args(1).is_real_type ()) - { - const ComplexNDArray a = args (0).complex_array_value (); - const NDArray b = args (1).array_value (); + if (! error_state) + retval = convn (a, b); + } + else if (args(1).is_real_type ()) + { + const FloatComplexNDArray a = args (0).float_complex_array_value (); + const FloatNDArray b = args (1).float_array_value (); - if (! error_state) - retval = convn (a, b); + if (! error_state) + retval = convn (a, b); + } + else + error ("__convn__: invalid call"); } else error ("__convn__: invalid call"); } else - error ("__convn__: invalid call"); + { + if (args(0).is_real_type ()) + { + if (args(1).is_real_type ()) + { + const NDArray a = args (0).array_value (); + const NDArray b = args (1).array_value (); + + if (! error_state) + retval = convn (a, b); + } + else if (args(1).is_complex_type ()) + { + const NDArray a = args (0).array_value (); + const ComplexNDArray b = args (1).complex_array_value (); + + if (! error_state) + retval = convn (a, b); + } + else + error ("__convn__: invalid call"); + } + else if (args(0).is_complex_type ()) + { + if (args(1).is_complex_type ()) + { + const ComplexNDArray a = args (0).complex_array_value (); + const ComplexNDArray b = args (1).complex_array_value (); + + if (! error_state) + retval = convn (a, b); + } + else if (args(1).is_real_type ()) + { + const ComplexNDArray a = args (0).complex_array_value (); + const NDArray b = args (1).array_value (); + + if (! error_state) + retval = convn (a, b); + } + else + error ("__convn__: invalid call"); + } + else + error ("__convn__: invalid call"); + } } else print_usage (); diff -r 45f5faba05a2 -r 82be108cc558 src/DLD-FUNCTIONS/__lin_interpn__.cc --- a/src/DLD-FUNCTIONS/__lin_interpn__.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/DLD-FUNCTIONS/__lin_interpn__.cc Sun Apr 27 22:34:17 2008 +0200 @@ -32,16 +32,18 @@ // equivalent to isvector.m +template bool -isvector (const NDArray& array) +isvector (const T& array) { const dim_vector dv = array.dims (); return dv.length () == 2 && (dv(0) == 1 || dv(1) == 1); } // lookup a value in a sorted table (lookup.m) +template octave_idx_type -lookup (const double *x, octave_idx_type n, double y) +lookup (const T *x, octave_idx_type n, T y) { octave_idx_type j; @@ -118,15 +120,16 @@ // n-dimensional linear interpolation +template void lin_interpn (int n, const octave_idx_type *size, const octave_idx_type *scale, - octave_idx_type Ni, double extrapval, const double **x, - const double *v, const double **y, double *vi) + octave_idx_type Ni, T extrapval, const T **x, + const T *v, const T **y, T *vi) { bool out = false; int bit; - OCTAVE_LOCAL_BUFFER (double, coef, 2*n); + OCTAVE_LOCAL_BUFFER (T, coef, 2*n); OCTAVE_LOCAL_BUFFER (octave_idx_type, index, n); // loop over all points @@ -158,7 +161,7 @@ // loop over all corners of hypercube (1< +octave_value +lin_interpn (int n, M *X, const M V, M *Y) +{ + octave_value retval; + + M Vi = M (Y[0].dims ()); + + OCTAVE_LOCAL_BUFFER (const T *, y, n); + OCTAVE_LOCAL_BUFFER (octave_idx_type, size, n); + + for (int i = 0; i < n; i++) + { + y[i] = Y[i].data (); + size[i] = V.dims()(i); + } + + OCTAVE_LOCAL_BUFFER (const T *, x, n); + OCTAVE_LOCAL_BUFFER (octave_idx_type, scale, n); + + const T *v = V.data (); + T *vi = Vi.fortran_vec (); + octave_idx_type Ni = Vi.numel (); + + T extrapval = octave_NA; + + // offset in memory of each dimension + + scale[0] = 1; + + for (int i = 1; i < n; i++) + scale[i] = scale[i-1] * size[i-1]; + + // tests if X[0] is a vector, if yes, assume that all elements of X are + // in the ndgrid format. + + if (! isvector (X[0])) + { + for (int i = 0; i < n; i++) + { + if (X[i].dims () != V.dims ()) + { + error ("interpn: incompatible size of argument number %d", i+1); + return retval; + } + else + { + M tmp = M (dim_vector (size[i], 1)); + + for (octave_idx_type j = 0; j < size[i]; j++) + tmp(j) = X[i](scale[i]*j); + + X[i] = tmp; + } + } + } + + for (int i = 0; i < n; i++) + { + if (! isvector (X[i]) && X[i].numel () != size[i]) + { + error ("interpn: incompatible size of argument number %d", i+1); + return retval; + } + else + x[i] = X[i].data (); + } + + lin_interpn (n, size, scale, Ni, extrapval, x, v, y, vi); + + retval = Vi; + + return retval; +} + // Perform @var{n}-dimensional interpolation. Each element of then // @var{n}-dimensional array @var{v} represents a value at a location // given by the parameters @var{x1}, @var{x2},...,@var{xn}. The parameters @@ -206,33 +284,12 @@ // dimension of the problem int n = (nargin-1)/2; - OCTAVE_LOCAL_BUFFER (NDArray, X, n); - OCTAVE_LOCAL_BUFFER (NDArray, Y, n); - - OCTAVE_LOCAL_BUFFER (const double *, x, n); - OCTAVE_LOCAL_BUFFER (const double *, y, n); - OCTAVE_LOCAL_BUFFER (octave_idx_type, scale, n); - OCTAVE_LOCAL_BUFFER (octave_idx_type, size, n); - - const NDArray V = args(n).array_value (); - NDArray Vi = NDArray (args(n+1).dims ()); - - if (error_state) + if (args(n).is_single_type()) { - print_usage (); - return retval; - } + OCTAVE_LOCAL_BUFFER (FloatNDArray, X, n); + OCTAVE_LOCAL_BUFFER (FloatNDArray, Y, n); - const double *v = V.data (); - double *vi = Vi.fortran_vec (); - octave_idx_type Ni = Vi.numel (); - - double extrapval = octave_NA; - - for (int i = 0; i < n; i++) - { - X[i] = args(i).array_value (); - Y[i] = args(n+i+1).array_value (); + const FloatNDArray V = args(n).float_array_value (); if (error_state) { @@ -240,61 +297,59 @@ return retval; } - y[i] = Y[i].data (); - size[i] = V.dims()(i); - - if (Y[0].dims () != Y[i].dims ()) - { - error ("interpn: incompatible size of argument number %d", n+i+2); - return retval; - } - } - - // offset in memory of each dimension - - scale[0] = 1; - - for (int i = 1; i < n; i++) - scale[i] = scale[i-1] * size[i-1]; - - // tests if X[0] is a vector, if yes, assume that all elements of X are - // in the ndgrid format. - - if (! isvector (X[0])) - { for (int i = 0; i < n; i++) { - if (X[i].dims () != V.dims ()) + X[i] = args(i).float_array_value (); + Y[i] = args(n+i+1).float_array_value (); + + if (error_state) { - error ("interpn: incompatible size of argument number %d", i+1); + print_usage (); return retval; } - else + + if (Y[0].dims () != Y[i].dims ()) { - NDArray tmp = NDArray (dim_vector (size[i], 1)); - - for (octave_idx_type j = 0; j < size[i]; j++) - tmp(j) = X[i](scale[i]*j); - - X[i] = tmp; + error ("interpn: incompatible size of argument number %d", n+i+2); + return retval; } } + + retval = lin_interpn (n, X, V, Y); } - - for (int i = 0; i < n; i++) + else { - if (! isvector (X[i]) && X[i].numel () != size[i]) + OCTAVE_LOCAL_BUFFER (NDArray, X, n); + OCTAVE_LOCAL_BUFFER (NDArray, Y, n); + + const NDArray V = args(n).array_value (); + + if (error_state) { - error ("interpn: incompatible size of argument number %d", i+1); + print_usage (); return retval; } - else - x[i] = X[i].data (); - } + + for (int i = 0; i < n; i++) + { + X[i] = args(i).array_value (); + Y[i] = args(n+i+1).array_value (); - lin_interpn (n, size, scale, Ni, extrapval, x, v, y, vi); + if (error_state) + { + print_usage (); + return retval; + } - retval = Vi; + if (Y[0].dims () != Y[i].dims ()) + { + error ("interpn: incompatible size of argument number %d", n+i+2); + return retval; + } + } + + retval = lin_interpn (n, X, V, Y); + } return retval; } diff -r 45f5faba05a2 -r 82be108cc558 src/DLD-FUNCTIONS/__pchip_deriv__.cc --- a/src/DLD-FUNCTIONS/__pchip_deriv__.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/DLD-FUNCTIONS/__pchip_deriv__.cc Sun Apr 27 22:34:17 2008 +0200 @@ -37,6 +37,11 @@ F77_FUNC (dpchim, DPCHIM) (const octave_idx_type& n, double *x, double *f, double *d, const octave_idx_type &incfd, octave_idx_type *ierr); + + F77_RET_T + F77_FUNC (pchim, PCHIM) (const octave_idx_type& n, float *x, float *f, + float *d, const octave_idx_type &incfd, + octave_idx_type *ierr); } // Wrapper for SLATEC/PCHIP function DPCHIM to calculate the derivates @@ -53,44 +58,88 @@ if (nargin == 2) { - ColumnVector xvec (args(0).vector_value ()); - Matrix ymat (args(1).matrix_value ()); + if (args(0).is_single_type () || args(1).is_single_type ()) + { + FloatColumnVector xvec (args(0).float_vector_value ()); + FloatMatrix ymat (args(1).float_matrix_value ()); + + octave_idx_type nx = xvec.length (); + octave_idx_type nyr = ymat.rows (); + octave_idx_type nyc = ymat.columns (); - octave_idx_type nx = xvec.length (); - octave_idx_type nyr = ymat.rows (); - octave_idx_type nyc = ymat.columns (); + if (nx != nyr) + { + error ("number of rows for x and y must match"); + return retval; + } + + FloatColumnVector dvec (nx), yvec (nx); + FloatMatrix dmat (nyr, nyc); - if (nx != nyr) - { - error ("number of rows for x and y must match"); - return retval; - } + octave_idx_type ierr; + const octave_idx_type incfd = 1; + for (int c = 0; c < nyc; c++) + { + for (int r = 0; r < nx; r++) + yvec(r) = ymat(r,c); - ColumnVector dvec (nx), yvec (nx); - Matrix dmat (nyr, nyc); + F77_FUNC (pchim, PCHIM) (nx, xvec.fortran_vec (), + yvec.fortran_vec (), + dvec.fortran_vec (), incfd, &ierr); + + if (ierr < 0) + { + error ("PCHIM error: %i\n", ierr); + return retval; + } + + for (int r=0; r #include "CmplxAEPBAL.h" -#include "CmplxAEPBAL.h" +#include "fCmplxAEPBAL.h" #include "dbleAEPBAL.h" -#include "dbleAEPBAL.h" +#include "floatAEPBAL.h" +#include "CmplxGEPBAL.h" +#include "fCmplxGEPBAL.h" +#include "dbleGEPBAL.h" +#include "floatGEPBAL.h" #include "quit.h" #include "defun-dld.h" @@ -42,35 +46,6 @@ #include "oct-obj.h" #include "utils.h" -extern "C" -{ - F77_RET_T - F77_FUNC (dggbal, DGGBAL) (F77_CONST_CHAR_ARG_DECL, const octave_idx_type& N, - double* A, const octave_idx_type& LDA, double* B, - const octave_idx_type& LDB, octave_idx_type& ILO, octave_idx_type& IHI, - double* LSCALE, double* RSCALE, - double* WORK, octave_idx_type& INFO - F77_CHAR_ARG_LEN_DECL); - - F77_RET_T - F77_FUNC (dggbak, DGGBAK) (F77_CONST_CHAR_ARG_DECL, - F77_CONST_CHAR_ARG_DECL, - const octave_idx_type& N, const octave_idx_type& ILO, - const octave_idx_type& IHI, const double* LSCALE, - const double* RSCALE, octave_idx_type& M, double* V, - const octave_idx_type& LDV, octave_idx_type& INFO - F77_CHAR_ARG_LEN_DECL - F77_CHAR_ARG_LEN_DECL); - - F77_RET_T - F77_FUNC (zggbal, ZGGBAL) (F77_CONST_CHAR_ARG_DECL, const octave_idx_type& N, - Complex* A, const octave_idx_type& LDA, Complex* B, - const octave_idx_type& LDB, octave_idx_type& ILO, octave_idx_type& IHI, - double* LSCALE, double* RSCALE, - double* WORK, octave_idx_type& INFO - F77_CHAR_ARG_LEN_DECL); -} - DEFUN_DLD (balance, args, nargout, "-*- texinfo -*-\n\ @deftypefn {Loadable Function} {@var{aa} =} balance (@var{a}, @var{opt})\n\ @@ -145,14 +120,32 @@ return retval; } + bool isfloat = args(0).is_single_type () || + (! AEPcase && args(1).is_single_type()); + + bool complex_case = (args(0).is_complex_type () || + (! AEPcase && args(1).is_complex_type ())); + // Extract argument 1 parameter for both AEP and GEP. Matrix aa; ComplexMatrix caa; + FloatMatrix faa; + FloatComplexMatrix fcaa; - if (args(0).is_complex_type ()) - caa = args(0).complex_matrix_value (); + if (isfloat) + { + if (complex_case) + fcaa = args(0).float_complex_matrix_value (); + else + faa = args(0).float_matrix_value (); + } else - aa = args(0).matrix_value (); + { + if (complex_case) + caa = args(0).complex_matrix_value (); + else + aa = args(0).matrix_value (); + } if (error_state) return retval; @@ -173,33 +166,66 @@ } // balance the AEP - if (args(0).is_complex_type ()) + if (isfloat) { - ComplexAEPBALANCE result (caa, bal_job); + if (complex_case) + { + FloatComplexAEPBALANCE result (fcaa, bal_job); - if (nargout == 0 || nargout == 1) - retval(0) = result.balanced_matrix (); + if (nargout == 0 || nargout == 1) + retval(0) = result.balanced_matrix (); + else + { + retval(1) = result.balanced_matrix (); + retval(0) = result.balancing_matrix (); + } + } else { - retval(1) = result.balanced_matrix (); - retval(0) = result.balancing_matrix (); + FloatAEPBALANCE result (faa, bal_job); + + if (nargout == 0 || nargout == 1) + retval(0) = result.balanced_matrix (); + else + { + retval(1) = result.balanced_matrix (); + retval(0) = result.balancing_matrix (); + } } } else { - AEPBALANCE result (aa, bal_job); + if (complex_case) + { + ComplexAEPBALANCE result (caa, bal_job); - if (nargout == 0 || nargout == 1) - retval(0) = result.balanced_matrix (); + if (nargout == 0 || nargout == 1) + retval(0) = result.balanced_matrix (); + else + { + retval(1) = result.balanced_matrix (); + retval(0) = result.balancing_matrix (); + } + } else { - retval(1) = result.balanced_matrix (); - retval(0) = result.balancing_matrix (); + AEPBALANCE result (aa, bal_job); + + if (nargout == 0 || nargout == 1) + retval(0) = result.balanced_matrix (); + else + { + retval(1) = result.balanced_matrix (); + retval(0) = result.balancing_matrix (); + } } } } else { + if (nargout == 1) + warning ("balance: used GEP, should have two output arguments"); + // Generalized eigenvalue problem. if (nargin == 2) bal_job = "B"; @@ -219,126 +245,130 @@ Matrix bb; ComplexMatrix cbb; - - if (args(1).is_complex_type ()) - cbb = args(1).complex_matrix_value (); - else - bb = args(1).matrix_value (); - - if (error_state) - return retval; - - // Both matrices loaded, now let's check what kind of arithmetic: - // first, declare variables used in both the real and complex case - - octave_idx_type ilo, ihi, info; - RowVector lscale(nn), rscale(nn), work(6*nn); - char job = bal_job[0]; + FloatMatrix fbb; + FloatComplexMatrix fcbb; - static octave_idx_type complex_case - = (args(0).is_complex_type () || args(1).is_complex_type ()); - - // now balance - if (complex_case) + if (isfloat) { - if (args(0).is_real_type ()) - caa = ComplexMatrix (aa); - - if (args(1).is_real_type ()) - cbb = ComplexMatrix (bb); - - F77_XFCN (zggbal, ZGGBAL, - (F77_CONST_CHAR_ARG2 (&job, 1), - nn, caa.fortran_vec (), nn, cbb.fortran_vec (), - nn, ilo, ihi, lscale.fortran_vec (), - rscale.fortran_vec (), work.fortran_vec (), info - F77_CHAR_ARG_LEN (1))); + if (complex_case) + fcbb = args(1).float_complex_matrix_value (); + else + fbb = args(1).float_matrix_value (); } else { - // real matrices case - - F77_XFCN (dggbal, DGGBAL, - (F77_CONST_CHAR_ARG2 (&job, 1), - nn, aa.fortran_vec (), nn, bb.fortran_vec (), - nn, ilo, ihi, lscale.fortran_vec (), - rscale.fortran_vec (), work.fortran_vec (), info - F77_CHAR_ARG_LEN (1))); + if (complex_case) + cbb = args(1).complex_matrix_value (); + else + bb = args(1).matrix_value (); } - - // Since we just want the balancing matrices, we can use dggbal - // for both the real and complex cases. - - Matrix Pl(nn,nn), Pr(nn,nn); - - for (octave_idx_type ii = 0; ii < nn; ii++) - for (octave_idx_type jj = 0; jj < nn; jj++) - { - OCTAVE_QUIT; - Pl(ii,jj) = Pr(ii,jj) = (ii == jj ? 1.0 : 0.0); - } - - // left first - F77_XFCN (dggbak, DGGBAK, - (F77_CONST_CHAR_ARG2 (&job, 1), - F77_CONST_CHAR_ARG2 ("L", 1), - nn, ilo, ihi, lscale.data (), rscale.data (), - nn, Pl.fortran_vec (), nn, info - F77_CHAR_ARG_LEN (1) - F77_CHAR_ARG_LEN (1))); - - // then right - F77_XFCN (dggbak, DGGBAK, - (F77_CONST_CHAR_ARG2 (&job, 1), - F77_CONST_CHAR_ARG2 ("R", 1), - nn, ilo, ihi, lscale.data (), rscale.data (), - nn, Pr.fortran_vec (), nn, info - F77_CHAR_ARG_LEN (1) - F77_CHAR_ARG_LEN (1))); - - switch (nargout) + // balance the GEP + if (isfloat) { - case 0: - case 1: - warning ("balance: used GEP, should have two output arguments"); - if (complex_case) - retval(0) = caa; - else - retval(0) = aa; - break; - - case 2: if (complex_case) { - retval(1) = cbb; - retval(0) = caa; + FloatComplexGEPBALANCE result (fcaa, fcbb, bal_job); + + switch (nargout) + { + case 4: + retval(3) = result.balanced_matrix2 (); + // fall through + case 3: + retval(2) = result.balanced_matrix (); + retval(1) = result.balancing_matrix2 (); + retval(0) = result.balancing_matrix (); + break; + case 2: + retval(1) = result.balancing_matrix2 (); + // fall through + case 1: + retval(0) = result.balancing_matrix (); + break; + default: + error ("balance: invalid number of output arguments"); + break; + } } else { - retval(1) = bb; - retval(0) = aa; + FloatGEPBALANCE result (faa, fbb, bal_job); + + switch (nargout) + { + case 4: + retval(3) = result.balanced_matrix2 (); + // fall through + case 3: + retval(2) = result.balanced_matrix (); + retval(1) = result.balancing_matrix2 (); + retval(0) = result.balancing_matrix (); + break; + case 2: + retval(1) = result.balancing_matrix2 (); + // fall through + case 1: + retval(0) = result.balancing_matrix (); + break; + default: + error ("balance: invalid number of output arguments"); + break; + } } - break; - - case 4: + } + else + { if (complex_case) { - retval(3) = cbb; - retval(2) = caa; + ComplexGEPBALANCE result (caa, cbb, bal_job); + + switch (nargout) + { + case 4: + retval(3) = result.balanced_matrix2 (); + // fall through + case 3: + retval(2) = result.balanced_matrix (); + retval(1) = result.balancing_matrix2 (); + retval(0) = result.balancing_matrix (); + break; + case 2: + retval(1) = result.balancing_matrix2 (); + // fall through + case 1: + retval(0) = result.balancing_matrix (); + break; + default: + error ("balance: invalid number of output arguments"); + break; + } } else { - retval(3) = bb; - retval(2) = aa; + GEPBALANCE result (aa, bb, bal_job); + + switch (nargout) + { + case 4: + retval(3) = result.balanced_matrix2 (); + // fall through + case 3: + retval(2) = result.balanced_matrix (); + retval(1) = result.balancing_matrix2 (); + retval(0) = result.balancing_matrix (); + break; + case 2: + retval(1) = result.balancing_matrix2 (); + // fall through + case 1: + retval(0) = result.balancing_matrix (); + break; + default: + error ("balance: invalid number of output arguments"); + break; + } } - retval(1) = Pr; - retval(0) = Pl.transpose (); // so that aa_bal = cc*aa*dd, etc. - break; - - default: - error ("balance: invalid number of output arguments"); - break; } } diff -r 45f5faba05a2 -r 82be108cc558 src/DLD-FUNCTIONS/besselj.cc --- a/src/DLD-FUNCTIONS/besselj.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/DLD-FUNCTIONS/besselj.cc Sun Apr 27 22:34:17 2008 +0200 @@ -116,6 +116,43 @@ return retval; } +static inline FloatMatrix +int_array2_to_float_matrix (const Array2& a) +{ + octave_idx_type nr = a.rows (); + octave_idx_type nc = a.cols (); + + FloatMatrix retval (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + { + OCTAVE_QUIT; + + retval(i,j) = static_cast (a(i,j)); + } + + return retval; +} + +static inline FloatNDArray +int_arrayN_to_float_array (const ArrayN& a) +{ + dim_vector dv = a.dims (); + int nel = dv.numel (); + + FloatNDArray retval (dv); + + for (int i = 0; i < nel; i++) + { + OCTAVE_QUIT; + + retval(i) = static_cast (a(i)); + } + + return retval; +} + static void gripe_bessel_arg (const char *fn, const char *arg) { @@ -137,92 +174,146 @@ octave_value alpha_arg = args(0); octave_value x_arg = args(1); - if (alpha_arg.is_scalar_type ()) + if (alpha_arg.is_single_type () || x_arg.is_single_type ()) { - double alpha = args(0).double_value (); - - if (! error_state) + if (alpha_arg.is_scalar_type ()) { - if (x_arg.is_scalar_type ()) - { - Complex x = x_arg.complex_value (); - - if (! error_state) - { - octave_idx_type ierr; - octave_value result; - - DO_BESSEL (type, alpha, x, scaled, ierr, result); - - if (nargout > 1) - retval(1) = static_cast (ierr); - - retval(0) = result; - } - else - gripe_bessel_arg (fn, "second"); - } - else - { - ComplexNDArray x = x_arg.complex_array_value (); - - if (! error_state) - { - ArrayN ierr; - octave_value result; - - DO_BESSEL (type, alpha, x, scaled, ierr, result); - - if (nargout > 1) - retval(1) = int_arrayN_to_array (ierr); - - retval(0) = result; - } - else - gripe_bessel_arg (fn, "second"); - } - } - else - gripe_bessel_arg (fn, "first"); - } - else - { - dim_vector dv0 = args(0).dims (); - dim_vector dv1 = args(1).dims (); - - bool args0_is_row_vector = (dv0 (1) == dv0.numel ()); - bool args1_is_col_vector = (dv1 (0) == dv1.numel ()); - - if (args0_is_row_vector && args1_is_col_vector) - { - RowVector ralpha = args(0).row_vector_value (); + float alpha = args(0).float_value (); if (! error_state) { - ComplexColumnVector cx = - x_arg.complex_column_vector_value (); - - if (! error_state) + if (x_arg.is_scalar_type ()) { - Array2 ierr; - octave_value result; + FloatComplex x = x_arg.float_complex_value (); + + if (! error_state) + { + octave_idx_type ierr; + octave_value result; - DO_BESSEL (type, ralpha, cx, scaled, ierr, result); - - if (nargout > 1) - retval(1) = int_array2_to_matrix (ierr); + DO_BESSEL (type, alpha, x, scaled, ierr, result); + + if (nargout > 1) + retval(1) = static_cast (ierr); - retval(0) = result; + retval(0) = result; + } + else + gripe_bessel_arg (fn, "second"); } else - gripe_bessel_arg (fn, "second"); + { + FloatComplexNDArray x = x_arg.float_complex_array_value (); + + if (! error_state) + { + ArrayN ierr; + octave_value result; + + DO_BESSEL (type, alpha, x, scaled, ierr, result); + + if (nargout > 1) + retval(1) = int_arrayN_to_float_array (ierr); + + retval(0) = result; + } + else + gripe_bessel_arg (fn, "second"); + } } else gripe_bessel_arg (fn, "first"); } else { - NDArray alpha = args(0).array_value (); + dim_vector dv0 = args(0).dims (); + dim_vector dv1 = args(1).dims (); + + bool args0_is_row_vector = (dv0 (1) == dv0.numel ()); + bool args1_is_col_vector = (dv1 (0) == dv1.numel ()); + + if (args0_is_row_vector && args1_is_col_vector) + { + FloatRowVector ralpha = args(0).float_row_vector_value (); + + if (! error_state) + { + FloatComplexColumnVector cx = + x_arg.float_complex_column_vector_value (); + + if (! error_state) + { + Array2 ierr; + octave_value result; + + DO_BESSEL (type, ralpha, cx, scaled, ierr, result); + + if (nargout > 1) + retval(1) = int_array2_to_float_matrix (ierr); + + retval(0) = result; + } + else + gripe_bessel_arg (fn, "second"); + } + else + gripe_bessel_arg (fn, "first"); + } + else + { + FloatNDArray alpha = args(0).float_array_value (); + + if (! error_state) + { + if (x_arg.is_scalar_type ()) + { + FloatComplex x = x_arg.float_complex_value (); + + if (! error_state) + { + ArrayN ierr; + octave_value result; + + DO_BESSEL (type, alpha, x, scaled, ierr, result); + + if (nargout > 1) + retval(1) = int_arrayN_to_float_array (ierr); + + retval(0) = result; + } + else + gripe_bessel_arg (fn, "second"); + } + else + { + FloatComplexNDArray x = x_arg.float_complex_array_value (); + + if (! error_state) + { + ArrayN ierr; + octave_value result; + + DO_BESSEL (type, alpha, x, scaled, ierr, result); + + if (nargout > 1) + retval(1) = int_arrayN_to_float_array (ierr); + + retval(0) = result; + } + else + gripe_bessel_arg (fn, "second"); + } + } + else + gripe_bessel_arg (fn, "first"); + } + } + } + else + { + if (alpha_arg.is_scalar_type ()) + { + double alpha = args(0).double_value (); if (! error_state) { @@ -232,6 +323,25 @@ if (! error_state) { + octave_idx_type ierr; + octave_value result; + + DO_BESSEL (type, alpha, x, scaled, ierr, result); + + if (nargout > 1) + retval(1) = static_cast (ierr); + + retval(0) = result; + } + else + gripe_bessel_arg (fn, "second"); + } + else + { + ComplexNDArray x = x_arg.complex_array_value (); + + if (! error_state) + { ArrayN ierr; octave_value result; @@ -245,28 +355,93 @@ else gripe_bessel_arg (fn, "second"); } - else + } + else + gripe_bessel_arg (fn, "first"); + } + else + { + dim_vector dv0 = args(0).dims (); + dim_vector dv1 = args(1).dims (); + + bool args0_is_row_vector = (dv0 (1) == dv0.numel ()); + bool args1_is_col_vector = (dv1 (0) == dv1.numel ()); + + if (args0_is_row_vector && args1_is_col_vector) + { + RowVector ralpha = args(0).row_vector_value (); + + if (! error_state) { - ComplexNDArray x = x_arg.complex_array_value (); + ComplexColumnVector cx = + x_arg.complex_column_vector_value (); if (! error_state) { - ArrayN ierr; + Array2 ierr; octave_value result; - - DO_BESSEL (type, alpha, x, scaled, ierr, result); - + + DO_BESSEL (type, ralpha, cx, scaled, ierr, result); + if (nargout > 1) - retval(1) = int_arrayN_to_array (ierr); - + retval(1) = int_array2_to_matrix (ierr); + retval(0) = result; } else gripe_bessel_arg (fn, "second"); } + else + gripe_bessel_arg (fn, "first"); } else - gripe_bessel_arg (fn, "first"); + { + NDArray alpha = args(0).array_value (); + + if (! error_state) + { + if (x_arg.is_scalar_type ()) + { + Complex x = x_arg.complex_value (); + + if (! error_state) + { + ArrayN ierr; + octave_value result; + + DO_BESSEL (type, alpha, x, scaled, ierr, result); + + if (nargout > 1) + retval(1) = int_arrayN_to_array (ierr); + + retval(0) = result; + } + else + gripe_bessel_arg (fn, "second"); + } + else + { + ComplexNDArray x = x_arg.complex_array_value (); + + if (! error_state) + { + ArrayN ierr; + octave_value result; + + DO_BESSEL (type, alpha, x, scaled, ierr, result); + + if (nargout > 1) + retval(1) = int_arrayN_to_array (ierr); + + retval(0) = result; + } + else + gripe_bessel_arg (fn, "second"); + } + } + else + gripe_bessel_arg (fn, "first"); + } } } } @@ -459,8 +634,6 @@ int kind = 0; - ComplexNDArray z; - if (nargin > 1) { kind = args(0).int_value (); @@ -476,25 +649,52 @@ if (! error_state) { - z = args(nargin == 1 ? 0 : 1).complex_array_value (); + int idx = nargin == 1 ? 0 : 1; - if (! error_state) + if (args (idx).is_single_type ()) { - ArrayN ierr; - octave_value result; + FloatComplexNDArray z = args(idx).float_complex_array_value (); + + if (! error_state) + { + ArrayN ierr; + octave_value result; - if (kind > 1) - result = biry (z, kind == 3, scale, ierr); - else - result = airy (z, kind == 1, scale, ierr); + if (kind > 1) + result = biry (z, kind == 3, scale, ierr); + else + result = airy (z, kind == 1, scale, ierr); - if (nargout > 1) - retval(1) = int_arrayN_to_array (ierr); + if (nargout > 1) + retval(1) = int_arrayN_to_float_array (ierr); - retval(0) = result; + retval(0) = result; + } + else + error ("airy: expecting complex matrix for Z"); } else - error ("airy: expecting complex matrix for Z"); + { + ComplexNDArray z = args(idx).complex_array_value (); + + if (! error_state) + { + ArrayN ierr; + octave_value result; + + if (kind > 1) + result = biry (z, kind == 3, scale, ierr); + else + result = airy (z, kind == 1, scale, ierr); + + if (nargout > 1) + retval(1) = int_arrayN_to_array (ierr); + + retval(0) = result; + } + else + error ("airy: expecting complex matrix for Z"); + } } } else diff -r 45f5faba05a2 -r 82be108cc558 src/DLD-FUNCTIONS/betainc.cc --- a/src/DLD-FUNCTIONS/betainc.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/DLD-FUNCTIONS/betainc.cc Sun Apr 27 22:34:17 2008 +0200 @@ -69,100 +69,206 @@ octave_value a_arg = args(1); octave_value b_arg = args(2); - if (x_arg.is_scalar_type ()) + // FIXME Can we make a template version of the duplicated code below + if (x_arg.is_single_type () || a_arg.is_single_type () || + b_arg.is_single_type ()) { - double x = x_arg.double_value (); - - if (a_arg.is_scalar_type ()) + if (x_arg.is_scalar_type ()) { - double a = a_arg.double_value (); + float x = x_arg.float_value (); - if (! error_state) + if (a_arg.is_scalar_type ()) { - if (b_arg.is_scalar_type ()) + float a = a_arg.float_value (); + + if (! error_state) { - double b = b_arg.double_value (); + if (b_arg.is_scalar_type ()) + { + float b = b_arg.float_value (); - if (! error_state) - retval = betainc (x, a, b); + if (! error_state) + retval = betainc (x, a, b); + } + else + { + FloatNDArray b = b_arg.float_array_value (); + + if (! error_state) + retval = betainc (x, a, b); + } } - else + } + else + { + FloatNDArray a = a_arg.float_array_value (); + + if (! error_state) { - NDArray b = b_arg.array_value (); + if (b_arg.is_scalar_type ()) + { + float b = b_arg.float_value (); - if (! error_state) - retval = betainc (x, a, b); + if (! error_state) + retval = betainc (x, a, b); + } + else + { + FloatNDArray b = b_arg.float_array_value (); + + if (! error_state) + retval = betainc (x, a, b); + } } } } else { - NDArray a = a_arg.array_value (); + FloatNDArray x = x_arg.float_array_value (); - if (! error_state) + if (a_arg.is_scalar_type ()) { - if (b_arg.is_scalar_type ()) + float a = a_arg.float_value (); + + if (! error_state) { - double b = b_arg.double_value (); + if (b_arg.is_scalar_type ()) + { + float b = b_arg.float_value (); + + if (! error_state) + retval = betainc (x, a, b); + } + else + { + FloatNDArray b = b_arg.float_array_value (); - if (! error_state) - retval = betainc (x, a, b); + if (! error_state) + retval = betainc (x, a, b); + } } - else + } + else + { + FloatNDArray a = a_arg.float_array_value (); + + if (! error_state) { - NDArray b = b_arg.array_value (); + if (b_arg.is_scalar_type ()) + { + float b = b_arg.float_value (); - if (! error_state) - retval = betainc (x, a, b); + if (! error_state) + retval = betainc (x, a, b); + } + else + { + FloatNDArray b = b_arg.float_array_value (); + + if (! error_state) + retval = betainc (x, a, b); + } } } } } else { - NDArray x = x_arg.array_value (); - - if (a_arg.is_scalar_type ()) + if (x_arg.is_scalar_type ()) { - double a = a_arg.double_value (); + double x = x_arg.double_value (); - if (! error_state) + if (a_arg.is_scalar_type ()) { - if (b_arg.is_scalar_type ()) + double a = a_arg.double_value (); + + if (! error_state) { - double b = b_arg.double_value (); + if (b_arg.is_scalar_type ()) + { + double b = b_arg.double_value (); - if (! error_state) - retval = betainc (x, a, b); + if (! error_state) + retval = betainc (x, a, b); + } + else + { + NDArray b = b_arg.array_value (); + + if (! error_state) + retval = betainc (x, a, b); + } } - else + } + else + { + NDArray a = a_arg.array_value (); + + if (! error_state) { - NDArray b = b_arg.array_value (); + if (b_arg.is_scalar_type ()) + { + double b = b_arg.double_value (); - if (! error_state) - retval = betainc (x, a, b); + if (! error_state) + retval = betainc (x, a, b); + } + else + { + NDArray b = b_arg.array_value (); + + if (! error_state) + retval = betainc (x, a, b); + } } } } else { - NDArray a = a_arg.array_value (); + NDArray x = x_arg.array_value (); - if (! error_state) + if (a_arg.is_scalar_type ()) { - if (b_arg.is_scalar_type ()) + double a = a_arg.double_value (); + + if (! error_state) { - double b = b_arg.double_value (); + if (b_arg.is_scalar_type ()) + { + double b = b_arg.double_value (); + + if (! error_state) + retval = betainc (x, a, b); + } + else + { + NDArray b = b_arg.array_value (); - if (! error_state) - retval = betainc (x, a, b); + if (! error_state) + retval = betainc (x, a, b); + } } - else + } + else + { + NDArray a = a_arg.array_value (); + + if (! error_state) { - NDArray b = b_arg.array_value (); + if (b_arg.is_scalar_type ()) + { + double b = b_arg.double_value (); - if (! error_state) - retval = betainc (x, a, b); + if (! error_state) + retval = betainc (x, a, b); + } + else + { + NDArray b = b_arg.array_value (); + + if (! error_state) + retval = betainc (x, a, b); + } } } } diff -r 45f5faba05a2 -r 82be108cc558 src/DLD-FUNCTIONS/bsxfun.cc --- a/src/DLD-FUNCTIONS/bsxfun.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/DLD-FUNCTIONS/bsxfun.cc Sun Apr 27 22:34:17 2008 +0200 @@ -228,6 +228,8 @@ BSXDEF(NDArray); BSXDEF(ComplexNDArray); + BSXDEF(FloatNDArray); + BSXDEF(FloatComplexNDArray); BSXDEF(boolNDArray); BSXDEF(int8NDArray); BSXDEF(int16NDArray); @@ -290,6 +292,22 @@ result_ComplexNDArray.resize (dvc); } } + else if (result_type == "single") + { + if (tmp(0).is_real_type ()) + { + have_FloatNDArray = true; + result_FloatNDArray = tmp(0).float_array_value (); + result_FloatNDArray.resize (dvc); + } + else + { + have_ComplexNDArray = true; + result_ComplexNDArray = + tmp(0).complex_array_value (); + result_ComplexNDArray.resize (dvc); + } + } else if BSXINIT(boolNDArray, "logical", bool) else if BSXINIT(int8NDArray, "int8", int8) else if BSXINIT(int16NDArray, "int16", int16) @@ -310,9 +328,61 @@ { update_index (ra_idx, dvc, i); - if (have_NDArray) + if (have_FloatNDArray || + have_FloatComplexNDArray) { - if (tmp(0).class_name () != "double") + if (! tmp(0).is_float_type ()) + { + if (have_FloatNDArray) + { + have_FloatNDArray = false; + C = result_FloatNDArray; + } + else + { + have_FloatComplexNDArray = false; + C = result_FloatComplexNDArray; + } + C = do_cat_op (C, tmp(0), ra_idx); + } + else if (tmp(0).is_double_type ()) + { + if (tmp(0).is_complex_type () && + have_FloatNDArray) + { + result_ComplexNDArray = + ComplexNDArray (result_FloatNDArray); + result_ComplexNDArray.insert + (tmp(0).complex_array_value(), ra_idx); + have_FloatComplexNDArray = false; + have_ComplexNDArray = true; + } + else + { + result_NDArray = + NDArray (result_FloatNDArray); + result_NDArray.insert + (tmp(0).array_value(), ra_idx); + have_FloatNDArray = false; + have_NDArray = true; + } + } + else if (tmp(0).is_real_type ()) + result_FloatNDArray.insert + (tmp(0).float_array_value(), ra_idx); + else + { + result_FloatComplexNDArray = + FloatComplexNDArray (result_FloatNDArray); + result_FloatComplexNDArray.insert + (tmp(0).float_complex_array_value(), ra_idx); + have_FloatNDArray = false; + have_FloatComplexNDArray = true; + } + } + else if (have_NDArray) + { + if (! tmp(0).is_float_type ()) { have_NDArray = false; C = result_NDArray; @@ -368,6 +438,8 @@ if BSXEND(NDArray) else if BSXEND(ComplexNDArray) + else if BSXEND(FloatNDArray) + else if BSXEND(FloatComplexNDArray) else if BSXEND(boolNDArray) else if BSXEND(int8NDArray) else if BSXEND(int16NDArray) diff -r 45f5faba05a2 -r 82be108cc558 src/DLD-FUNCTIONS/chol.cc --- a/src/DLD-FUNCTIONS/chol.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/DLD-FUNCTIONS/chol.cc Sun Apr 27 22:34:17 2008 +0200 @@ -31,6 +31,8 @@ #include "CmplxCHOL.h" #include "dbleCHOL.h" +#include "fCmplxCHOL.h" +#include "floatCHOL.h" #include "SparseCmplxCHOL.h" #include "SparsedbleCHOL.h" #include "oct-spparms.h" @@ -226,6 +228,51 @@ else gripe_wrong_type_arg ("chol", arg); } + else if (arg.is_single_type ()) + { + if (arg.is_real_type ()) + { + FloatMatrix m = arg.float_matrix_value (); + + if (! error_state) + { + octave_idx_type info; + FloatCHOL fact (m, info); + if (nargout == 2 || info == 0) + { + retval(1) = static_cast (info); + if (LLt) + retval(0) = fact.chol_matrix ().transpose (); + else + retval(0) = fact.chol_matrix (); + } + else + error ("chol: matrix not positive definite"); + } + } + else if (arg.is_complex_type ()) + { + FloatComplexMatrix m = arg.float_complex_matrix_value (); + + if (! error_state) + { + octave_idx_type info; + FloatComplexCHOL fact (m, info); + if (nargout == 2 || info == 0) + { + retval(1) = static_cast (info); + if (LLt) + retval(0) = fact.chol_matrix ().hermitian (); + else + retval(0) = fact.chol_matrix (); + } + else + error ("chol: matrix not positive definite"); + } + } + else + gripe_wrong_type_arg ("chol", arg); + } else { if (arg.is_real_type ()) diff -r 45f5faba05a2 -r 82be108cc558 src/DLD-FUNCTIONS/conv2.cc --- a/src/DLD-FUNCTIONS/conv2.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/DLD-FUNCTIONS/conv2.cc Sun Apr 27 22:34:17 2008 +0200 @@ -40,6 +40,12 @@ extern MArray2 conv2 (MArray&, MArray&, MArray2&, Shape); + +extern MArray2 +conv2 (MArray&, MArray&, MArray2&, Shape); + +extern MArray2 +conv2 (MArray&, MArray&, MArray2&, Shape); #endif template @@ -142,6 +148,12 @@ extern MArray2 conv2 (MArray2&, MArray2&, Shape); + +extern MArray2 +conv2 (MArray2&, MArray2&, Shape); + +extern MArray2 +conv2 (MArray2&, MArray2&, Shape); #endif template @@ -304,46 +316,98 @@ return retval; } - if (args(0).is_complex_type () - || args(1).is_complex_type () - || args(2).is_complex_type ()) - { - ComplexColumnVector v1 (args(0).complex_vector_value ()); - ComplexColumnVector v2 (args(1).complex_vector_value ()); - ComplexMatrix a (args(2).complex_matrix_value ()); - ComplexMatrix c (conv2 (v1, v2, a, ishape)); - if (! error_state) - retval = c; - } + if (args(0).is_single_type () || + args(1).is_single_type () || + args(2).is_single_type ()) + { + if (args(0).is_complex_type () + || args(1).is_complex_type () + || args(2).is_complex_type ()) + { + FloatComplexColumnVector v1 (args(0).float_complex_vector_value ()); + FloatComplexColumnVector v2 (args(1).float_complex_vector_value ()); + FloatComplexMatrix a (args(2).float_complex_matrix_value ()); + FloatComplexMatrix c (conv2 (v1, v2, a, ishape)); + if (! error_state) + retval = c; + } + else + { + FloatColumnVector v1 (args(0).float_vector_value ()); + FloatColumnVector v2 (args(1).float_vector_value ()); + FloatMatrix a (args(2).float_matrix_value ()); + FloatMatrix c (conv2 (v1, v2, a, ishape)); + if (! error_state) + retval = c; + } + } else - { - ColumnVector v1 (args(0).vector_value ()); - ColumnVector v2 (args(1).vector_value ()); - Matrix a (args(2).matrix_value ()); - Matrix c (conv2 (v1, v2, a, ishape)); - if (! error_state) - retval = c; - } + { + if (args(0).is_complex_type () + || args(1).is_complex_type () + || args(2).is_complex_type ()) + { + ComplexColumnVector v1 (args(0).complex_vector_value ()); + ComplexColumnVector v2 (args(1).complex_vector_value ()); + ComplexMatrix a (args(2).complex_matrix_value ()); + ComplexMatrix c (conv2 (v1, v2, a, ishape)); + if (! error_state) + retval = c; + } + else + { + ColumnVector v1 (args(0).vector_value ()); + ColumnVector v2 (args(1).vector_value ()); + Matrix a (args(2).matrix_value ()); + Matrix c (conv2 (v1, v2, a, ishape)); + if (! error_state) + retval = c; + } + } } // if (separable) else { - if (args(0).is_complex_type () - || args(1).is_complex_type ()) - { - ComplexMatrix a (args(0).complex_matrix_value ()); - ComplexMatrix b (args(1).complex_matrix_value ()); - ComplexMatrix c (conv2 (a, b, ishape)); - if (! error_state) - retval = c; - } + if (args(0).is_single_type () || + args(1).is_single_type ()) + { + if (args(0).is_complex_type () + || args(1).is_complex_type ()) + { + FloatComplexMatrix a (args(0).float_complex_matrix_value ()); + FloatComplexMatrix b (args(1).float_complex_matrix_value ()); + FloatComplexMatrix c (conv2 (a, b, ishape)); + if (! error_state) + retval = c; + } + else + { + FloatMatrix a (args(0).float_matrix_value ()); + FloatMatrix b (args(1).float_matrix_value ()); + FloatMatrix c (conv2 (a, b, ishape)); + if (! error_state) + retval = c; + } + } else - { - Matrix a (args(0).matrix_value ()); - Matrix b (args(1).matrix_value ()); - Matrix c (conv2 (a, b, ishape)); - if (! error_state) - retval = c; - } + { + if (args(0).is_complex_type () + || args(1).is_complex_type ()) + { + ComplexMatrix a (args(0).complex_matrix_value ()); + ComplexMatrix b (args(1).complex_matrix_value ()); + ComplexMatrix c (conv2 (a, b, ishape)); + if (! error_state) + retval = c; + } + else + { + Matrix a (args(0).matrix_value ()); + Matrix b (args(1).matrix_value ()); + Matrix c (conv2 (a, b, ishape)); + if (! error_state) + retval = c; + } + } } // if (separable) diff -r 45f5faba05a2 -r 82be108cc558 src/DLD-FUNCTIONS/det.cc --- a/src/DLD-FUNCTIONS/det.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/DLD-FUNCTIONS/det.cc Sun Apr 27 22:34:17 2008 +0200 @@ -27,6 +27,8 @@ #include "CmplxDET.h" #include "dbleDET.h" +#include "fCmplxDET.h" +#include "floatDET.h" #include "defun-dld.h" #include "error.h" @@ -75,74 +77,114 @@ return retval; } - if (arg.is_real_type ()) + + if (arg.is_single_type ()) { - octave_idx_type info; - double rcond = 0.0; - // Always compute rcond, so we can detect numerically - // singular matrices. - if (arg.is_sparse_type ()) + if (arg.is_real_type ()) { - SparseMatrix m = arg.sparse_matrix_value (); + octave_idx_type info; + float rcond = 0.0; + // Always compute rcond, so we can detect numerically + // singular matrices. + FloatMatrix m = arg.float_matrix_value (); if (! error_state) { - DET det = m.determinant (info, rcond); + FloatDET det = m.determinant (info, rcond); retval(1) = rcond; - volatile double xrcond = rcond; - xrcond += 1.0; - retval(0) = ((info == -1 || xrcond == 1.0) ? 0.0 : det.value ()); - } - } - else - { - Matrix m = arg.matrix_value (); - if (! error_state) - { - DET det = m.determinant (info, rcond); - retval(1) = rcond; - volatile double xrcond = rcond; + volatile float xrcond = rcond; xrcond += 1.0; retval(0) = ((info == -1 || xrcond == 1.0) ? 0.0 : det.value ()); } } - } - else if (arg.is_complex_type ()) - { - octave_idx_type info; - double rcond = 0.0; - // Always compute rcond, so we can detect numerically - // singular matrices. - if (arg.is_sparse_type ()) + else if (arg.is_complex_type ()) { - SparseComplexMatrix m = arg.sparse_complex_matrix_value (); + octave_idx_type info; + float rcond = 0.0; + // Always compute rcond, so we can detect numerically + // singular matrices. + FloatComplexMatrix m = arg.float_complex_matrix_value (); if (! error_state) { - ComplexDET det = m.determinant (info, rcond); + FloatComplexDET det = m.determinant (info, rcond); retval(1) = rcond; - volatile double xrcond = rcond; + volatile float xrcond = rcond; xrcond += 1.0; retval(0) = ((info == -1 || xrcond == 1.0) ? Complex (0.0) : det.value ()); - } - } - else - { - ComplexMatrix m = arg.complex_matrix_value (); - if (! error_state) - { - ComplexDET det = m.determinant (info, rcond); - retval(1) = rcond; - volatile double xrcond = rcond; - xrcond += 1.0; - retval(0) = ((info == -1 || xrcond == 1.0) - ? Complex (0.0) : det.value ()); - + } } } else - gripe_wrong_type_arg ("det", arg); + { + if (arg.is_real_type ()) + { + octave_idx_type info; + double rcond = 0.0; + // Always compute rcond, so we can detect numerically + // singular matrices. + if (arg.is_sparse_type ()) + { + SparseMatrix m = arg.sparse_matrix_value (); + if (! error_state) + { + DET det = m.determinant (info, rcond); + retval(1) = rcond; + volatile double xrcond = rcond; + xrcond += 1.0; + retval(0) = ((info == -1 || xrcond == 1.0) ? 0.0 : det.value ()); + } + } + else + { + Matrix m = arg.matrix_value (); + if (! error_state) + { + DET det = m.determinant (info, rcond); + retval(1) = rcond; + volatile double xrcond = rcond; + xrcond += 1.0; + retval(0) = ((info == -1 || xrcond == 1.0) ? 0.0 : det.value ()); + } + } + } + else if (arg.is_complex_type ()) + { + octave_idx_type info; + double rcond = 0.0; + // Always compute rcond, so we can detect numerically + // singular matrices. + if (arg.is_sparse_type ()) + { + SparseComplexMatrix m = arg.sparse_complex_matrix_value (); + if (! error_state) + { + ComplexDET det = m.determinant (info, rcond); + retval(1) = rcond; + volatile double xrcond = rcond; + xrcond += 1.0; + retval(0) = ((info == -1 || xrcond == 1.0) + ? Complex (0.0) : det.value ()); + } + } + else + { + ComplexMatrix m = arg.complex_matrix_value (); + if (! error_state) + { + ComplexDET det = m.determinant (info, rcond); + retval(1) = rcond; + volatile double xrcond = rcond; + xrcond += 1.0; + retval(0) = ((info == -1 || xrcond == 1.0) + ? Complex (0.0) : det.value ()); + } + } + } + else + gripe_wrong_type_arg ("det", arg); + } return retval; } diff -r 45f5faba05a2 -r 82be108cc558 src/DLD-FUNCTIONS/eig.cc --- a/src/DLD-FUNCTIONS/eig.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/DLD-FUNCTIONS/eig.cc Sun Apr 27 22:34:17 2008 +0200 @@ -26,6 +26,7 @@ #endif #include "EIG.h" +#include "fEIG.h" #include "defun-dld.h" #include "error.h" @@ -75,46 +76,92 @@ Matrix tmp; ComplexMatrix ctmp; - EIG result; + FloatMatrix ftmp; + FloatComplexMatrix fctmp; + + if (arg.is_single_type ()) + { + FloatEIG result; - if (arg.is_real_type ()) - { - tmp = arg.matrix_value (); + if (arg.is_real_type ()) + { + ftmp = arg.float_matrix_value (); + + if (error_state) + return retval; + else + result = FloatEIG (ftmp, nargout > 1); + } + else if (arg.is_complex_type ()) + { + fctmp = arg.float_complex_matrix_value (); - if (error_state) - return retval; - else - result = EIG (tmp, nargout > 1); - } - else if (arg.is_complex_type ()) - { - ctmp = arg.complex_matrix_value (); + if (error_state) + return retval; + else + result = FloatEIG (fctmp, nargout > 1); + } - if (error_state) - return retval; - else - result = EIG (ctmp, nargout > 1); + if (! error_state) + { + if (nargout == 0 || nargout == 1) + { + retval(0) = result.eigenvalues (); + } + else + { + // Blame it on Matlab. + + FloatComplexDiagMatrix d (result.eigenvalues ()); + + retval(1) = d; + retval(0) = result.eigenvectors (); + } + } } else { - gripe_wrong_type_arg ("eig", tmp); - return retval; - } + EIG result; + + if (arg.is_real_type ()) + { + tmp = arg.matrix_value (); - if (! error_state) - { - if (nargout == 0 || nargout == 1) + if (error_state) + return retval; + else + result = EIG (tmp, nargout > 1); + } + else if (arg.is_complex_type ()) { - retval(0) = result.eigenvalues (); + ctmp = arg.complex_matrix_value (); + + if (error_state) + return retval; + else + result = EIG (ctmp, nargout > 1); } else { - // Blame it on Matlab. + gripe_wrong_type_arg ("eig", tmp); + return retval; + } - ComplexDiagMatrix d (result.eigenvalues ()); + if (! error_state) + { + if (nargout == 0 || nargout == 1) + { + retval(0) = result.eigenvalues (); + } + else + { + // Blame it on Matlab. - retval(1) = d; - retval(0) = result.eigenvectors (); + ComplexDiagMatrix d (result.eigenvalues ()); + + retval(1) = d; + retval(0) = result.eigenvectors (); + } } } diff -r 45f5faba05a2 -r 82be108cc558 src/DLD-FUNCTIONS/expm.cc --- a/src/DLD-FUNCTIONS/expm.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/DLD-FUNCTIONS/expm.cc Sun Apr 27 22:34:17 2008 +0200 @@ -134,12 +134,16 @@ octave_idx_type nr = arg.rows (); octave_idx_type nc = arg.columns (); + bool isfloat = arg.is_single_type (); int arg_is_empty = empty_arg ("expm", nr, nc); if (arg_is_empty < 0) return retval; if (arg_is_empty > 0) - return octave_value (Matrix ()); + if (isfloat) + return octave_value (FloatMatrix ()); + else + return octave_value (Matrix ()); if (nr != nc) { @@ -147,27 +151,51 @@ return retval; } - if (arg.is_real_type ()) + if (isfloat) { - Matrix m = arg.matrix_value (); + if (arg.is_real_type ()) + { + FloatMatrix m = arg.float_matrix_value (); - if (error_state) - return retval; - else - retval = m.expm (); - } - else if (arg.is_complex_type ()) - { - ComplexMatrix m = arg.complex_matrix_value (); + if (error_state) + return retval; + else + retval = m.expm (); + } + else if (arg.is_complex_type ()) + { + FloatComplexMatrix m = arg.float_complex_matrix_value (); - if (error_state) - return retval; - else - retval = m.expm (); + if (error_state) + return retval; + else + retval = m.expm (); + } } else { - gripe_wrong_type_arg ("expm", arg); + if (arg.is_real_type ()) + { + Matrix m = arg.matrix_value (); + + if (error_state) + return retval; + else + retval = m.expm (); + } + else if (arg.is_complex_type ()) + { + ComplexMatrix m = arg.complex_matrix_value (); + + if (error_state) + return retval; + else + retval = m.expm (); + } + else + { + gripe_wrong_type_arg ("expm", arg); + } } return retval; diff -r 45f5faba05a2 -r 82be108cc558 src/DLD-FUNCTIONS/fft.cc --- a/src/DLD-FUNCTIONS/fft.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/DLD-FUNCTIONS/fft.cc Sun Apr 27 22:34:17 2008 +0200 @@ -117,29 +117,55 @@ if (dims.any_zero () || n_points == 0) return octave_value (NDArray (dims)); - if (arg.is_real_type ()) + if (arg.is_single_type ()) { - NDArray nda = arg.array_value (); + if (arg.is_real_type ()) + { + FloatNDArray nda = arg.float_array_value (); - if (! error_state) - { - nda.resize (dims, 0.0); - retval = (type != 0 ? nda.ifourier (dim) : nda.fourier (dim)); + if (! error_state) + { + nda.resize (dims, 0.0); + retval = (type != 0 ? nda.ifourier (dim) : nda.fourier (dim)); + } } - } - else if (arg.is_complex_type ()) - { - ComplexNDArray cnda = arg.complex_array_value (); + else + { + FloatComplexNDArray cnda = arg.float_complex_array_value (); - if (! error_state) - { - cnda.resize (dims, 0.0); - retval = (type != 0 ? cnda.ifourier (dim) : cnda.fourier (dim)); + if (! error_state) + { + cnda.resize (dims, 0.0); + retval = (type != 0 ? cnda.ifourier (dim) : cnda.fourier (dim)); + } } } else { - gripe_wrong_type_arg (fcn, arg); + if (arg.is_real_type ()) + { + NDArray nda = arg.array_value (); + + if (! error_state) + { + nda.resize (dims, 0.0); + retval = (type != 0 ? nda.ifourier (dim) : nda.fourier (dim)); + } + } + else if (arg.is_complex_type ()) + { + ComplexNDArray cnda = arg.complex_array_value (); + + if (! error_state) + { + cnda.resize (dims, 0.0); + retval = (type != 0 ? cnda.ifourier (dim) : cnda.fourier (dim)); + } + } + else + { + gripe_wrong_type_arg (fcn, arg); + } } return retval; diff -r 45f5faba05a2 -r 82be108cc558 src/DLD-FUNCTIONS/fft2.cc --- a/src/DLD-FUNCTIONS/fft2.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/DLD-FUNCTIONS/fft2.cc Sun Apr 27 22:34:17 2008 +0200 @@ -108,29 +108,55 @@ if (dims.all_zero () || n_rows == 0 || n_cols == 0) return octave_value (Matrix ()); - if (arg.is_real_type ()) + if (arg.is_single_type ()) { - NDArray nda = arg.array_value (); + if (arg.is_real_type ()) + { + FloatNDArray nda = arg.float_array_value (); - if (! error_state) - { - nda.resize (dims, 0.0); - retval = (type != 0 ? nda.ifourier2d () : nda.fourier2d ()); + if (! error_state) + { + nda.resize (dims, 0.0); + retval = (type != 0 ? nda.ifourier2d () : nda.fourier2d ()); + } } - } - else if (arg.is_complex_type ()) - { - ComplexNDArray cnda = arg.complex_array_value (); + else + { + FloatComplexNDArray cnda = arg.float_complex_array_value (); - if (! error_state) - { - cnda.resize (dims, 0.0); - retval = (type != 0 ? cnda.ifourier2d () : cnda.fourier2d ()); + if (! error_state) + { + cnda.resize (dims, 0.0); + retval = (type != 0 ? cnda.ifourier2d () : cnda.fourier2d ()); + } } } else { - gripe_wrong_type_arg (fcn, arg); + if (arg.is_real_type ()) + { + NDArray nda = arg.array_value (); + + if (! error_state) + { + nda.resize (dims, 0.0); + retval = (type != 0 ? nda.ifourier2d () : nda.fourier2d ()); + } + } + else if (arg.is_complex_type ()) + { + ComplexNDArray cnda = arg.complex_array_value (); + + if (! error_state) + { + cnda.resize (dims, 0.0); + retval = (type != 0 ? cnda.ifourier2d () : cnda.fourier2d ()); + } + } + else + { + gripe_wrong_type_arg (fcn, arg); + } } return retval; diff -r 45f5faba05a2 -r 82be108cc558 src/DLD-FUNCTIONS/fftn.cc --- a/src/DLD-FUNCTIONS/fftn.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/DLD-FUNCTIONS/fftn.cc Sun Apr 27 22:34:17 2008 +0200 @@ -90,29 +90,55 @@ if (dims.all_zero ()) return octave_value (Matrix ()); - if (arg.is_real_type ()) + if (arg.is_single_type ()) { - NDArray nda = arg.array_value (); + if (arg.is_real_type ()) + { + FloatNDArray nda = arg.float_array_value (); - if (! error_state) - { - nda.resize (dims, 0.0); - retval = (type != 0 ? nda.ifourierNd () : nda.fourierNd ()); + if (! error_state) + { + nda.resize (dims, 0.0); + retval = (type != 0 ? nda.ifourierNd () : nda.fourierNd ()); + } } - } - else if (arg.is_complex_type ()) - { - ComplexNDArray cnda = arg.complex_array_value (); + else + { + FloatComplexNDArray cnda = arg.float_complex_array_value (); - if (! error_state) - { - cnda.resize (dims, 0.0); - retval = (type != 0 ? cnda.ifourierNd () : cnda.fourierNd ()); + if (! error_state) + { + cnda.resize (dims, 0.0); + retval = (type != 0 ? cnda.ifourierNd () : cnda.fourierNd ()); + } } } else { - gripe_wrong_type_arg (fcn, arg); + if (arg.is_real_type ()) + { + NDArray nda = arg.array_value (); + + if (! error_state) + { + nda.resize (dims, 0.0); + retval = (type != 0 ? nda.ifourierNd () : nda.fourierNd ()); + } + } + else if (arg.is_complex_type ()) + { + ComplexNDArray cnda = arg.complex_array_value (); + + if (! error_state) + { + cnda.resize (dims, 0.0); + retval = (type != 0 ? cnda.ifourierNd () : cnda.fourierNd ()); + } + } + else + { + gripe_wrong_type_arg (fcn, arg); + } } return retval; diff -r 45f5faba05a2 -r 82be108cc558 src/DLD-FUNCTIONS/fftw.cc --- a/src/DLD-FUNCTIONS/fftw.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/DLD-FUNCTIONS/fftw.cc Sun Apr 27 22:34:17 2008 +0200 @@ -143,27 +143,42 @@ arg1.begin (), tolower); octave_fftw_planner::FftwMethod meth = octave_fftw_planner::UNKNOWN; + octave_float_fftw_planner::FftwMethod methf + = octave_float_fftw_planner::UNKNOWN; if (arg1 == "estimate") - meth = fftw_planner.method - (octave_fftw_planner::ESTIMATE); + { + meth = octave_fftw_planner::ESTIMATE; + methf = octave_float_fftw_planner::ESTIMATE; + } else if (arg1 == "measure") - meth = fftw_planner.method - (octave_fftw_planner::MEASURE); + { + meth = octave_fftw_planner::MEASURE; + methf = octave_float_fftw_planner::MEASURE; + } else if (arg1 == "patient") - meth = fftw_planner.method - (octave_fftw_planner::PATIENT); + { + meth = octave_fftw_planner::PATIENT; + methf = octave_float_fftw_planner::PATIENT; + } else if (arg1 == "exhaustive") - meth = fftw_planner.method - (octave_fftw_planner::EXHAUSTIVE); + { + meth = octave_fftw_planner::EXHAUSTIVE; + methf = octave_float_fftw_planner::EXHAUSTIVE; + } else if (arg1 == "hybrid") - meth = fftw_planner.method - (octave_fftw_planner::HYBRID); + { + meth = octave_fftw_planner::HYBRID; + methf = octave_float_fftw_planner::HYBRID; + } else error ("unrecognized planner method"); if (!error_state) { + meth = fftw_planner.method (meth); + float_fftw_planner.method (methf); + if (meth == octave_fftw_planner::MEASURE) retval = octave_value ("measure"); else if (meth == octave_fftw_planner::PATIENT) @@ -191,7 +206,19 @@ free (str); } else if (arg0 == "swisdom") - error ("single precision wisdom is not supported"); + { + char *str = fftwf_export_wisdom_to_string (); + + if (arg1.length() < 1) + fftwf_forget_wisdom (); + else if (! fftwf_import_wisdom_from_string (arg1.c_str())) + error ("could not import supplied wisdom"); + + if (!error_state) + retval = octave_value (std::string (str)); + + free (str); + } else error ("unrecognized argument"); } @@ -221,7 +248,11 @@ free (str); } else if (arg0 == "swisdom") - error ("single precision wisdom is not supported"); + { + char *str = fftwf_export_wisdom_to_string (); + retval = octave_value (std::string (str)); + free (str); + } else error ("unrecognized argument"); } diff -r 45f5faba05a2 -r 82be108cc558 src/DLD-FUNCTIONS/filter.cc --- a/src/DLD-FUNCTIONS/filter.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/DLD-FUNCTIONS/filter.cc Sun Apr 27 22:34:17 2008 +0200 @@ -45,6 +45,12 @@ extern MArrayN filter (MArray&, MArray&, MArrayN&, int dim); + +extern MArrayN +filter (MArray&, MArray&, MArrayN&, int dim); + +extern MArrayN +filter (MArray&, MArray&, MArrayN&, int dim); #endif template @@ -65,7 +71,7 @@ T norm = a (0); - if (norm == 0.0) + if (norm == static_cast(0.0)) { error ("filter: the first element of a must be non-zero"); return y; @@ -111,7 +117,7 @@ } } - if (norm != 1.0) + if (norm != static_cast(1.0)) { a = a / norm; b = b / norm; @@ -225,6 +231,14 @@ extern MArrayN filter (MArray&, MArray&, MArrayN&, MArrayN&, int dim); + +extern MArrayN +filter (MArray&, MArray&, MArrayN&, + MArrayN&, int dim); + +extern MArrayN +filter (MArray&, MArray&, MArrayN&, + MArrayN&, int dim); #endif template @@ -397,122 +411,247 @@ dim = 0; } + bool isfloat = (args(0).is_single_type () + || args(1).is_single_type () + || args(2).is_single_type () + || (nargin >= 4 && args(3).is_single_type ())); + if (args(0).is_complex_type () || args(1).is_complex_type () || args(2).is_complex_type () || (nargin >= 4 && args(3).is_complex_type ())) { - ComplexColumnVector b (args(0).complex_vector_value ()); - ComplexColumnVector a (args(1).complex_vector_value ()); - - ComplexNDArray x (args(2).complex_array_value ()); - - if (! error_state) + if (isfloat) { - ComplexNDArray si; - - if (nargin == 3 || args(3).is_empty ()) - { - octave_idx_type a_len = a.length (); - octave_idx_type b_len = b.length (); - - octave_idx_type si_len = (a_len > b_len ? a_len : b_len) - 1; + FloatComplexColumnVector b (args(0).float_complex_vector_value ()); + FloatComplexColumnVector a (args(1).float_complex_vector_value ()); - dim_vector si_dims = x.dims (); - for (int i = dim; i > 0; i--) - si_dims(i) = si_dims(i-1); - si_dims(0) = si_len; - - si.resize (si_dims, 0.0); - } - else - { - dim_vector si_dims = args (3).dims (); - bool si_is_vector = true; - for (int i = 0; i < si_dims.length (); i++) - if (si_dims(i) != 1 && si_dims(i) < si_dims.numel ()) - { - si_is_vector = false; - break; - } - - si = args(3).complex_array_value (); - - if (si_is_vector) - si = si.reshape (dim_vector (si.numel (), 1)); - } + FloatComplexNDArray x (args(2).float_complex_array_value ()); if (! error_state) { - ComplexNDArray y (filter (b, a, x, si, dim)); + FloatComplexNDArray si; + + if (nargin == 3 || args(3).is_empty ()) + { + octave_idx_type a_len = a.length (); + octave_idx_type b_len = b.length (); + + octave_idx_type si_len = (a_len > b_len ? a_len : b_len) - 1; + + dim_vector si_dims = x.dims (); + for (int i = dim; i > 0; i--) + si_dims(i) = si_dims(i-1); + si_dims(0) = si_len; - if (nargout == 2) - retval(1) = si; + si.resize (si_dims, 0.0); + } + else + { + dim_vector si_dims = args (3).dims (); + bool si_is_vector = true; + for (int i = 0; i < si_dims.length (); i++) + if (si_dims(i) != 1 && si_dims(i) < si_dims.numel ()) + { + si_is_vector = false; + break; + } + + si = args(3).float_complex_array_value (); - retval(0) = y; + if (si_is_vector) + si = si.reshape (dim_vector (si.numel (), 1)); + } + + if (! error_state) + { + FloatComplexNDArray y (filter (b, a, x, si, dim)); + + if (nargout == 2) + retval(1) = si; + + retval(0) = y; + } + else + error (errmsg); } else error (errmsg); } else - error (errmsg); + { + ComplexColumnVector b (args(0).complex_vector_value ()); + ComplexColumnVector a (args(1).complex_vector_value ()); + + ComplexNDArray x (args(2).complex_array_value ()); + + if (! error_state) + { + ComplexNDArray si; + + if (nargin == 3 || args(3).is_empty ()) + { + octave_idx_type a_len = a.length (); + octave_idx_type b_len = b.length (); + + octave_idx_type si_len = (a_len > b_len ? a_len : b_len) - 1; + + dim_vector si_dims = x.dims (); + for (int i = dim; i > 0; i--) + si_dims(i) = si_dims(i-1); + si_dims(0) = si_len; + + si.resize (si_dims, 0.0); + } + else + { + dim_vector si_dims = args (3).dims (); + bool si_is_vector = true; + for (int i = 0; i < si_dims.length (); i++) + if (si_dims(i) != 1 && si_dims(i) < si_dims.numel ()) + { + si_is_vector = false; + break; + } + + si = args(3).complex_array_value (); + + if (si_is_vector) + si = si.reshape (dim_vector (si.numel (), 1)); + } + + if (! error_state) + { + ComplexNDArray y (filter (b, a, x, si, dim)); + + if (nargout == 2) + retval(1) = si; + + retval(0) = y; + } + else + error (errmsg); + } + else + error (errmsg); + } } else { - ColumnVector b (args(0).vector_value ()); - ColumnVector a (args(1).vector_value ()); - - NDArray x (args(2).array_value ()); - - if (! error_state) + if (isfloat) { - NDArray si; - - if (nargin == 3 || args(3).is_empty ()) - { - octave_idx_type a_len = a.length (); - octave_idx_type b_len = b.length (); - - octave_idx_type si_len = (a_len > b_len ? a_len : b_len) - 1; + FloatColumnVector b (args(0).float_vector_value ()); + FloatColumnVector a (args(1).float_vector_value ()); - dim_vector si_dims = x.dims (); - for (int i = dim; i > 0; i--) - si_dims(i) = si_dims(i-1); - si_dims(0) = si_len; - - si.resize (si_dims, 0.0); - } - else - { - dim_vector si_dims = args (3).dims (); - bool si_is_vector = true; - for (int i = 0; i < si_dims.length (); i++) - if (si_dims(i) != 1 && si_dims(i) < si_dims.numel ()) - { - si_is_vector = false; - break; - } - - si = args(3).array_value (); - - if (si_is_vector) - si = si.reshape (dim_vector (si.numel (), 1)); - } + FloatNDArray x (args(2).float_array_value ()); if (! error_state) { - NDArray y (filter (b, a, x, si, dim)); + FloatNDArray si; + + if (nargin == 3 || args(3).is_empty ()) + { + octave_idx_type a_len = a.length (); + octave_idx_type b_len = b.length (); + + octave_idx_type si_len = (a_len > b_len ? a_len : b_len) - 1; + + dim_vector si_dims = x.dims (); + for (int i = dim; i > 0; i--) + si_dims(i) = si_dims(i-1); + si_dims(0) = si_len; - if (nargout == 2) - retval(1) = si; + si.resize (si_dims, 0.0); + } + else + { + dim_vector si_dims = args (3).dims (); + bool si_is_vector = true; + for (int i = 0; i < si_dims.length (); i++) + if (si_dims(i) != 1 && si_dims(i) < si_dims.numel ()) + { + si_is_vector = false; + break; + } + + si = args(3).float_array_value (); - retval(0) = y; + if (si_is_vector) + si = si.reshape (dim_vector (si.numel (), 1)); + } + + if (! error_state) + { + FloatNDArray y (filter (b, a, x, si, dim)); + + if (nargout == 2) + retval(1) = si; + + retval(0) = y; + } + else + error (errmsg); } else error (errmsg); } else - error (errmsg); + { + ColumnVector b (args(0).vector_value ()); + ColumnVector a (args(1).vector_value ()); + + NDArray x (args(2).array_value ()); + + if (! error_state) + { + NDArray si; + + if (nargin == 3 || args(3).is_empty ()) + { + octave_idx_type a_len = a.length (); + octave_idx_type b_len = b.length (); + + octave_idx_type si_len = (a_len > b_len ? a_len : b_len) - 1; + + dim_vector si_dims = x.dims (); + for (int i = dim; i > 0; i--) + si_dims(i) = si_dims(i-1); + si_dims(0) = si_len; + + si.resize (si_dims, 0.0); + } + else + { + dim_vector si_dims = args (3).dims (); + bool si_is_vector = true; + for (int i = 0; i < si_dims.length (); i++) + if (si_dims(i) != 1 && si_dims(i) < si_dims.numel ()) + { + si_is_vector = false; + break; + } + + si = args(3).array_value (); + + if (si_is_vector) + si = si.reshape (dim_vector (si.numel (), 1)); + } + + if (! error_state) + { + NDArray y (filter (b, a, x, si, dim)); + + if (nargout == 2) + retval(1) = si; + + retval(0) = y; + } + else + error (errmsg); + } + else + error (errmsg); + } } return retval; @@ -532,6 +671,20 @@ template MArrayN filter (MArray&, MArray&, MArrayN&, int dim); +template MArrayN +filter (MArray&, MArray&, MArrayN&, + MArrayN&, int dim); + +template MArrayN +filter (MArray&, MArray&, MArrayN&, int dim); + +template MArrayN +filter (MArray&, MArray&, MArrayN&, + MArrayN&, int dim); + +template MArrayN +filter (MArray&, MArray&, MArrayN&, int dim); + /* ;;; Local Variables: *** ;;; mode: C++ *** diff -r 45f5faba05a2 -r 82be108cc558 src/DLD-FUNCTIONS/find.cc --- a/src/DLD-FUNCTIONS/find.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/DLD-FUNCTIONS/find.cc Sun Apr 27 22:34:17 2008 +0200 @@ -62,7 +62,7 @@ { OCTAVE_QUIT; - if (nda(k) != 0.0) + if (nda(k) != static_cast (0.0)) { end_el = k; if (start_el == -1) @@ -125,7 +125,7 @@ { OCTAVE_QUIT; - if (nda(k) != 0.0) + if (nda(k) != static_cast (0.0)) { idx(count) = k + 1; @@ -178,6 +178,12 @@ template octave_value_list find_nonzero_elem_idx (const Array&, int, octave_idx_type, int); +template octave_value_list find_nonzero_elem_idx (const Array&, int, + octave_idx_type, int); + +template octave_value_list find_nonzero_elem_idx (const Array&, + int, octave_idx_type, int); + template octave_value_list find_nonzero_elem_idx (const Sparse& v, int nargout, @@ -458,33 +464,55 @@ } else { - if (arg.is_real_type ()) + if (arg.is_single_type ()) { - NDArray nda = arg.array_value (); - - if (! error_state) - retval = find_nonzero_elem_idx (nda, nargout, - n_to_find, direction); - } - else if (arg.is_complex_type ()) - { - ComplexNDArray cnda = arg.complex_array_value (); + if (arg.is_real_type ()) + { + FloatNDArray nda = arg.float_array_value (); - if (! error_state) - retval = find_nonzero_elem_idx (cnda, nargout, - n_to_find, direction); - } - else if (arg.is_string ()) - { - charNDArray cnda = arg.char_array_value (); + if (! error_state) + retval = find_nonzero_elem_idx (nda, nargout, + n_to_find, direction); + } + else if (arg.is_complex_type ()) + { + FloatComplexNDArray cnda = arg.float_complex_array_value (); - if (! error_state) - retval = find_nonzero_elem_idx (cnda, nargout, - n_to_find, direction); + if (! error_state) + retval = find_nonzero_elem_idx (cnda, nargout, + n_to_find, direction); + } } else { - gripe_wrong_type_arg ("find", arg); + if (arg.is_real_type ()) + { + NDArray nda = arg.array_value (); + + if (! error_state) + retval = find_nonzero_elem_idx (nda, nargout, + n_to_find, direction); + } + else if (arg.is_complex_type ()) + { + ComplexNDArray cnda = arg.complex_array_value (); + + if (! error_state) + retval = find_nonzero_elem_idx (cnda, nargout, + n_to_find, direction); + } + else if (arg.is_string ()) + { + charNDArray cnda = arg.char_array_value (); + + if (! error_state) + retval = find_nonzero_elem_idx (cnda, nargout, + n_to_find, direction); + } + else + { + gripe_wrong_type_arg ("find", arg); + } } } diff -r 45f5faba05a2 -r 82be108cc558 src/DLD-FUNCTIONS/gammainc.cc --- a/src/DLD-FUNCTIONS/gammainc.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/DLD-FUNCTIONS/gammainc.cc Sun Apr 27 22:34:17 2008 +0200 @@ -74,47 +74,98 @@ octave_value x_arg = args(0); octave_value a_arg = args(1); - if (x_arg.is_scalar_type ()) + // FIXME Can we make a template version of the duplicated code below + if (x_arg.is_single_type () || a_arg.is_single_type ()) { - double x = x_arg.double_value (); + if (x_arg.is_scalar_type ()) + { + float x = x_arg.float_value (); - if (! error_state) - { - if (a_arg.is_scalar_type ()) + if (! error_state) { - double a = a_arg.double_value (); + if (a_arg.is_scalar_type ()) + { + float a = a_arg.float_value (); + + if (! error_state) + retval = gammainc (x, a); + } + else + { + FloatNDArray a = a_arg.float_array_value (); - if (! error_state) - retval = gammainc (x, a); + if (! error_state) + retval = gammainc (x, a); + } } - else + } + else + { + FloatNDArray x = x_arg.float_array_value (); + + if (! error_state) { - NDArray a = a_arg.array_value (); + if (a_arg.is_scalar_type ()) + { + float a = a_arg.float_value (); - if (! error_state) - retval = gammainc (x, a); + if (! error_state) + retval = gammainc (x, a); + } + else + { + FloatNDArray a = a_arg.float_array_value (); + + if (! error_state) + retval = gammainc (x, a); + } } } } else { - NDArray x = x_arg.array_value (); - - if (! error_state) + if (x_arg.is_scalar_type ()) { - if (a_arg.is_scalar_type ()) + double x = x_arg.double_value (); + + if (! error_state) { - double a = a_arg.double_value (); + if (a_arg.is_scalar_type ()) + { + double a = a_arg.double_value (); + + if (! error_state) + retval = gammainc (x, a); + } + else + { + NDArray a = a_arg.array_value (); - if (! error_state) - retval = gammainc (x, a); + if (! error_state) + retval = gammainc (x, a); + } } - else + } + else + { + NDArray x = x_arg.array_value (); + + if (! error_state) { - NDArray a = a_arg.array_value (); + if (a_arg.is_scalar_type ()) + { + double a = a_arg.double_value (); - if (! error_state) - retval = gammainc (x, a); + if (! error_state) + retval = gammainc (x, a); + } + else + { + NDArray a = a_arg.array_value (); + + if (! error_state) + retval = gammainc (x, a); + } } } } diff -r 45f5faba05a2 -r 82be108cc558 src/DLD-FUNCTIONS/givens.cc --- a/src/DLD-FUNCTIONS/givens.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/DLD-FUNCTIONS/givens.cc Sun Apr 27 22:34:17 2008 +0200 @@ -75,62 +75,128 @@ } else { - if (args(0).is_complex_type () || args(1).is_complex_type ()) + if (args(0).is_single_type () || args(1).is_single_type ()) { - Complex cx = args(0).complex_value (); - Complex cy = args(1).complex_value (); - - if (! error_state) + if (args(0).is_complex_type () || args(1).is_complex_type ()) { - ComplexMatrix result = Givens (cx, cy); + FloatComplex cx = args(0).float_complex_value (); + FloatComplex cy = args(1).float_complex_value (); if (! error_state) { - switch (nargout) + FloatComplexMatrix result = Givens (cx, cy); + + if (! error_state) { - case 0: - case 1: - retval(0) = result; - break; + switch (nargout) + { + case 0: + case 1: + retval(0) = result; + break; - case 2: - retval(1) = result (0, 1); - retval(0) = result (0, 0); - break; + case 2: + retval(1) = result (0, 1); + retval(0) = result (0, 0); + break; + + default: + error ("givens: invalid number of output arguments"); + break; + } + } + } + } + else + { + float x = args(0).float_value (); + float y = args(1).float_value (); - default: - error ("givens: invalid number of output arguments"); - break; + if (! error_state) + { + FloatMatrix result = Givens (x, y); + + if (! error_state) + { + switch (nargout) + { + case 0: + case 1: + retval(0) = result; + break; + + case 2: + retval(1) = result (0, 1); + retval(0) = result (0, 0); + break; + + default: + error ("givens: invalid number of output arguments"); + break; + } } } } } else { - double x = args(0).double_value (); - double y = args(1).double_value (); - - if (! error_state) + if (args(0).is_complex_type () || args(1).is_complex_type ()) { - Matrix result = Givens (x, y); + Complex cx = args(0).complex_value (); + Complex cy = args(1).complex_value (); if (! error_state) { - switch (nargout) + ComplexMatrix result = Givens (cx, cy); + + if (! error_state) { - case 0: - case 1: - retval(0) = result; - break; + switch (nargout) + { + case 0: + case 1: + retval(0) = result; + break; - case 2: - retval(1) = result (0, 1); - retval(0) = result (0, 0); - break; + case 2: + retval(1) = result (0, 1); + retval(0) = result (0, 0); + break; + + default: + error ("givens: invalid number of output arguments"); + break; + } + } + } + } + else + { + double x = args(0).double_value (); + double y = args(1).double_value (); - default: - error ("givens: invalid number of output arguments"); - break; + if (! error_state) + { + Matrix result = Givens (x, y); + + if (! error_state) + { + switch (nargout) + { + case 0: + case 1: + retval(0) = result; + break; + + case 2: + retval(1) = result (0, 1); + retval(0) = result (0, 0); + break; + + default: + error ("givens: invalid number of output arguments"); + break; + } } } } diff -r 45f5faba05a2 -r 82be108cc558 src/DLD-FUNCTIONS/hess.cc --- a/src/DLD-FUNCTIONS/hess.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/DLD-FUNCTIONS/hess.cc Sun Apr 27 22:34:17 2008 +0200 @@ -27,6 +27,8 @@ #include "CmplxHESS.h" #include "dbleHESS.h" +#include "fCmplxHESS.h" +#include "floatHESS.h" #include "defun-dld.h" #include "error.h" @@ -89,51 +91,63 @@ return retval; } - if (arg.is_real_type ()) + if (arg.is_single_type ()) { - Matrix tmp = arg.matrix_value (); - - if (! error_state) + if (arg.is_real_type ()) { - HESS result (tmp); + FloatMatrix tmp = arg.float_matrix_value (); - if (nargout == 0 || nargout == 1) + if (! error_state) { - retval.resize (1); - retval(0) = result.hess_matrix (); - } - else - { - retval.resize (2); + FloatHESS result (tmp); + + retval(1) = result.hess_matrix (); retval(0) = result.unitary_hess_matrix (); - retval(1) = result.hess_matrix (); } } - } - else if (arg.is_complex_type ()) - { - ComplexMatrix ctmp = arg.complex_matrix_value (); - - if (! error_state) + else if (arg.is_complex_type ()) { - ComplexHESS result (ctmp); + FloatComplexMatrix ctmp = arg.float_complex_matrix_value (); - if (nargout == 0 || nargout == 1) + if (! error_state) { - retval.resize (1); - retval(0) = result.hess_matrix (); - } - else - { - retval.resize (2); + FloatComplexHESS result (ctmp); + + retval(1) = result.hess_matrix (); retval(0) = result.unitary_hess_matrix (); - retval(1) = result.hess_matrix (); } } } else { - gripe_wrong_type_arg ("hess", arg); + if (arg.is_real_type ()) + { + Matrix tmp = arg.matrix_value (); + + if (! error_state) + { + HESS result (tmp); + + retval(1) = result.hess_matrix (); + retval(0) = result.unitary_hess_matrix (); + } + } + else if (arg.is_complex_type ()) + { + ComplexMatrix ctmp = arg.complex_matrix_value (); + + if (! error_state) + { + ComplexHESS result (ctmp); + + retval(1) = result.hess_matrix (); + retval(0) = result.unitary_hess_matrix (); + } + } + else + { + gripe_wrong_type_arg ("hess", arg); + } } return retval; diff -r 45f5faba05a2 -r 82be108cc558 src/DLD-FUNCTIONS/inv.cc --- a/src/DLD-FUNCTIONS/inv.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/DLD-FUNCTIONS/inv.cc Sun Apr 27 22:34:17 2008 +0200 @@ -77,62 +77,91 @@ octave_value result; octave_idx_type info; double rcond = 0.0; - if (arg.is_real_type ()) + float frcond = 0.0; + bool isfloat = arg.is_single_type (); + + if (isfloat) { - if (arg.is_sparse_type ()) + if (arg.is_real_type ()) { - SparseMatrix m = arg.sparse_matrix_value (); + FloatMatrix m = arg.float_matrix_value (); if (! error_state) { MatrixType mattyp = args(0).matrix_type (); - result = m.inverse (mattyp, info, rcond, 1); + result = m.inverse (mattyp, info, frcond, 1); args(0).matrix_type (mattyp); } } - else + else if (arg.is_complex_type ()) { - Matrix m = arg.matrix_value (); + FloatComplexMatrix m = arg.float_complex_matrix_value (); if (! error_state) { MatrixType mattyp = args(0).matrix_type (); - result = m.inverse (mattyp, info, rcond, 1); + result = m.inverse (mattyp, info, frcond, 1); args(0).matrix_type (mattyp); } } } - else if (arg.is_complex_type ()) + else { - if (arg.is_sparse_type ()) + if (arg.is_real_type ()) { - SparseComplexMatrix m = arg.sparse_complex_matrix_value (); - if (! error_state) + if (arg.is_sparse_type ()) + { + SparseMatrix m = arg.sparse_matrix_value (); + if (! error_state) + { + MatrixType mattyp = args(0).matrix_type (); + result = m.inverse (mattyp, info, rcond, 1); + args(0).matrix_type (mattyp); + } + } + else { - MatrixType mattyp = args(0).matrix_type (); - result = m.inverse (mattyp, info, rcond, 1); - args(0).matrix_type (mattyp); + Matrix m = arg.matrix_value (); + if (! error_state) + { + MatrixType mattyp = args(0).matrix_type (); + result = m.inverse (mattyp, info, rcond, 1); + args(0).matrix_type (mattyp); + } + } + } + else if (arg.is_complex_type ()) + { + if (arg.is_sparse_type ()) + { + SparseComplexMatrix m = arg.sparse_complex_matrix_value (); + if (! error_state) + { + MatrixType mattyp = args(0).matrix_type (); + result = m.inverse (mattyp, info, rcond, 1); + args(0).matrix_type (mattyp); + } + } + else + { + ComplexMatrix m = arg.complex_matrix_value (); + if (! error_state) + { + MatrixType mattyp = args(0).matrix_type (); + result = m.inverse (mattyp, info, rcond, 1); + args(0).matrix_type (mattyp); + } } } else - { - ComplexMatrix m = arg.complex_matrix_value (); - if (! error_state) - { - MatrixType mattyp = args(0).matrix_type (); - result = m.inverse (mattyp, info, rcond, 1); - args(0).matrix_type (mattyp); - } - } - - + gripe_wrong_type_arg ("inv", arg); } - else - gripe_wrong_type_arg ("inv", arg); - if (! error_state) { if (nargout > 1) - retval(1) = rcond; + if (isfloat) + retval(1) = frcond; + else + retval(1) = rcond; retval(0) = result; diff -r 45f5faba05a2 -r 82be108cc558 src/DLD-FUNCTIONS/kron.cc --- a/src/DLD-FUNCTIONS/kron.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/DLD-FUNCTIONS/kron.cc Sun Apr 27 22:34:17 2008 +0200 @@ -40,6 +40,13 @@ extern void kron (const Array2&, const Array2&, Array2&); + +extern void +kron (const Array2&, const Array2&, Array2&); + +extern void +kron (const Array2&, const Array2&, + Array2&); #endif template @@ -69,6 +76,12 @@ template void kron (const Array2&, const Array2&, Array2&); +template void +kron (const Array2&, const Array2&, Array2&); + +template void +kron (const Array2&, const Array2&, + Array2&); #if !defined (CXX_NEW_FRIEND_TEMPLATE_DECL) extern void @@ -171,28 +184,58 @@ } else { - if (args(0).is_complex_type () || args(1).is_complex_type ()) + if (args(0).is_single_type () || args(1).is_single_type ()) { - ComplexMatrix a (args(0).complex_matrix_value()); - ComplexMatrix b (args(1).complex_matrix_value()); + if (args(0).is_complex_type () || args(1).is_complex_type ()) + { + FloatComplexMatrix a (args(0).float_complex_matrix_value()); + FloatComplexMatrix b (args(1).float_complex_matrix_value()); - if (! error_state) + if (! error_state) + { + FloatComplexMatrix c; + kron (a, b, c); + retval(0) = c; + } + } + else { - ComplexMatrix c; - kron (a, b, c); - retval(0) = c; + FloatMatrix a (args(0).float_matrix_value ()); + FloatMatrix b (args(1).float_matrix_value ()); + + if (! error_state) + { + FloatMatrix c; + kron (a, b, c); + retval (0) = c; + } } } else { - Matrix a (args(0).matrix_value ()); - Matrix b (args(1).matrix_value ()); + if (args(0).is_complex_type () || args(1).is_complex_type ()) + { + ComplexMatrix a (args(0).complex_matrix_value()); + ComplexMatrix b (args(1).complex_matrix_value()); - if (! error_state) + if (! error_state) + { + ComplexMatrix c; + kron (a, b, c); + retval(0) = c; + } + } + else { - Matrix c; - kron (a, b, c); - retval (0) = c; + Matrix a (args(0).matrix_value ()); + Matrix b (args(1).matrix_value ()); + + if (! error_state) + { + Matrix c; + kron (a, b, c); + retval (0) = c; + } } } } diff -r 45f5faba05a2 -r 82be108cc558 src/DLD-FUNCTIONS/lookup.cc --- a/src/DLD-FUNCTIONS/lookup.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/DLD-FUNCTIONS/lookup.cc Sun Apr 27 22:34:17 2008 +0200 @@ -180,33 +180,64 @@ // in the case of a complex array, absolute values will be used for compatibility // (though it's not too meaningful). + ArrayN idx; - NDArray table = (argtable.is_complex_type ()) - ? argtable.complex_array_value ().abs () - : argtable.array_value (); + if (argtable.is_single_type () || argy.is_single_type ()) + { + FloatNDArray table = (argtable.is_complex_type ()) + ? argtable.float_complex_array_value ().abs () + : argtable.float_array_value (); - NDArray y = (argy.is_complex_type ()) - ? argy.complex_array_value ().abs () - : argy.array_value (); + FloatNDArray y = (argy.is_complex_type ()) + ? argy.float_complex_array_value ().abs () + : argy.float_array_value (); + + idx = ArrayN (y.dims ()); - ArrayN idx (y.dims ()); + // determine whether the array is descending. + bool desc = is_descending (table.data (), table.length ()); + octave_idx_type offset = left_inf ? 1 : 0; + octave_idx_type size = table.length () - offset - (right_inf ? 1 : 0); + if (size < 0) + size = 0; - // determine whether the array is descending. - bool desc = is_descending (table.data (), table.length ()); - octave_idx_type offset = left_inf ? 1 : 0; - octave_idx_type size = table.length () - offset - (right_inf ? 1 : 0); - if (size < 0) - size = 0; + if (desc) + seq_lookup (table.data (), offset, size, + y.data (), y.length (), idx.fortran_vec (), + std::greater ()); + else + seq_lookup (table.data (), offset, size, + y.data (), y.length (), idx.fortran_vec (), + std::less ()); + } + else + { + NDArray table = (argtable.is_complex_type ()) + ? argtable.complex_array_value ().abs () + : argtable.array_value (); + + NDArray y = (argy.is_complex_type ()) + ? argy.complex_array_value ().abs () + : argy.array_value (); - if (desc) - seq_lookup (table.data (), offset, size, - y.data (), y.length (), idx.fortran_vec (), - std::greater ()); - else - seq_lookup (table.data (), offset, size, - y.data (), y.length (), idx.fortran_vec (), - std::less ()); + idx = ArrayN (y.dims ()); + + // determine whether the array is descending. + bool desc = is_descending (table.data (), table.length ()); + octave_idx_type offset = left_inf ? 1 : 0; + octave_idx_type size = table.length () - offset - (right_inf ? 1 : 0); + if (size < 0) + size = 0; + if (desc) + seq_lookup (table.data (), offset, size, + y.data (), y.length (), idx.fortran_vec (), + std::greater ()); + else + seq_lookup (table.data (), offset, size, + y.data (), y.length (), idx.fortran_vec (), + std::less ()); + } //retval(0) = idx; assign (retval(0), idx); diff -r 45f5faba05a2 -r 82be108cc558 src/DLD-FUNCTIONS/lu.cc --- a/src/DLD-FUNCTIONS/lu.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/DLD-FUNCTIONS/lu.cc Sun Apr 27 22:34:17 2008 +0200 @@ -27,6 +27,8 @@ #include "CmplxLU.h" #include "dbleLU.h" +#include "fCmplxLU.h" +#include "floatLU.h" #include "SparseCmplxLU.h" #include "SparsedbleLU.h" @@ -337,77 +339,159 @@ if (arg.is_real_type ()) { - Matrix m = arg.matrix_value (); + if (arg.is_single_type ()) + { + FloatMatrix m = arg.float_matrix_value (); - if (! error_state) - { - LU fact (m); + if (! error_state) + { + FloatLU fact (m); - switch (nargout) - { - case 0: - case 1: - retval(0) = fact.Y (); - break; + switch (nargout) + { + case 0: + case 1: + retval(0) = fact.Y (); + break; + + case 2: + { + FloatMatrix P = fact.P (); + FloatMatrix L = P.transpose () * fact.L (); + retval(1) = fact.U (); + retval(0) = L; + } + break; - case 2: - { - Matrix P = fact.P (); - Matrix L = P.transpose () * fact.L (); - retval(1) = fact.U (); - retval(0) = L; - } - break; + case 3: + default: + { + if (vecout) + retval(2) = fact.P_vec (); + else + retval(2) = fact.P (); + retval(1) = fact.U (); + retval(0) = fact.L (); + } + break; + } + } + } + else + { + Matrix m = arg.matrix_value (); + + if (! error_state) + { + LU fact (m); - case 3: - default: - { - if (vecout) - retval(2) = fact.P_vec (); - else - retval(2) = fact.P (); - retval(1) = fact.U (); - retval(0) = fact.L (); - } - break; + switch (nargout) + { + case 0: + case 1: + retval(0) = fact.Y (); + break; + + case 2: + { + Matrix P = fact.P (); + Matrix L = P.transpose () * fact.L (); + retval(1) = fact.U (); + retval(0) = L; + } + break; + + case 3: + default: + { + if (vecout) + retval(2) = fact.P_vec (); + else + retval(2) = fact.P (); + retval(1) = fact.U (); + retval(0) = fact.L (); + } + break; + } } } } else if (arg.is_complex_type ()) { - ComplexMatrix m = arg.complex_matrix_value (); + if (arg.is_single_type ()) + { + FloatComplexMatrix m = arg.float_complex_matrix_value (); - if (! error_state) - { - ComplexLU fact (m); + if (! error_state) + { + FloatComplexLU fact (m); - switch (nargout) - { - case 0: - case 1: - retval(0) = fact.Y (); - break; + switch (nargout) + { + case 0: + case 1: + retval(0) = fact.Y (); + break; + + case 2: + { + FloatMatrix P = fact.P (); + FloatComplexMatrix L = P.transpose () * fact.L (); + retval(1) = fact.U (); + retval(0) = L; + } + break; - case 2: - { - Matrix P = fact.P (); - ComplexMatrix L = P.transpose () * fact.L (); - retval(1) = fact.U (); - retval(0) = L; - } - break; + case 3: + default: + { + if (vecout) + retval(2) = fact.P_vec (); + else + retval(2) = fact.P (); + retval(1) = fact.U (); + retval(0) = fact.L (); + } + break; + } + } + } + else + { + ComplexMatrix m = arg.complex_matrix_value (); + + if (! error_state) + { + ComplexLU fact (m); - case 3: - default: - { - if (vecout) - retval(2) = fact.P_vec (); - else - retval(2) = fact.P (); - retval(1) = fact.U (); - retval(0) = fact.L (); - } - break; + switch (nargout) + { + case 0: + case 1: + retval(0) = fact.Y (); + break; + + case 2: + { + Matrix P = fact.P (); + ComplexMatrix L = P.transpose () * fact.L (); + retval(1) = fact.U (); + retval(0) = L; + } + break; + + case 3: + default: + { + if (vecout) + retval(2) = fact.P_vec (); + else + retval(2) = fact.P (); + retval(1) = fact.U (); + retval(0) = fact.L (); + } + break; + } } } } diff -r 45f5faba05a2 -r 82be108cc558 src/DLD-FUNCTIONS/matrix_type.cc --- a/src/DLD-FUNCTIONS/matrix_type.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/DLD-FUNCTIONS/matrix_type.cc Sun Apr 27 22:34:17 2008 +0200 @@ -321,11 +321,23 @@ if (mattyp.is_unknown ()) { - ComplexMatrix m = args(0).complex_matrix_value (); - if (!error_state) + if (args(0).is_single_type ()) { - mattyp = MatrixType (m); - args(0).matrix_type (mattyp); + FloatComplexMatrix m = args(0).float_complex_matrix_value (); + if (!error_state) + { + mattyp = MatrixType (m); + args(0).matrix_type (mattyp); + } + } + else + { + ComplexMatrix m = args(0).complex_matrix_value (); + if (!error_state) + { + mattyp = MatrixType (m); + args(0).matrix_type (mattyp); + } } } } @@ -335,11 +347,23 @@ if (mattyp.is_unknown ()) { - Matrix m = args(0).matrix_value (); - if (!error_state) + if (args(0).is_single_type ()) { - mattyp = MatrixType (m); - args(0).matrix_type (mattyp); + FloatMatrix m = args(0).float_matrix_value (); + if (!error_state) + { + mattyp = MatrixType (m); + args(0).matrix_type (mattyp); + } + } + else + { + Matrix m = args(0).matrix_value (); + if (!error_state) + { + mattyp = MatrixType (m); + args(0).matrix_type (mattyp); + } } } } @@ -440,13 +464,28 @@ if (! error_state) { // Set the matrix type - if (args(0).is_complex_type()) - retval = - octave_value (args(0).complex_matrix_value (), - mattyp); + if (args(0).is_single_type ()) + { + if (args(0).is_complex_type()) + retval = octave_value + (args(0).float_complex_matrix_value (), + mattyp); + else + retval = octave_value + (args(0).float_matrix_value (), + mattyp); + } else - retval = octave_value (args(0).matrix_value (), - mattyp); + { + if (args(0).is_complex_type()) + retval = octave_value + (args(0).complex_matrix_value (), + mattyp); + else + retval = octave_value + (args(0).matrix_value (), + mattyp); + } } } } @@ -461,7 +500,7 @@ ## FIXME ## Disable tests for lower under-determined and upper over-determined -## matrices and this detection is disabled in MatrixType due to issues +## matrices as this detection is disabled in MatrixType due to issues ## of non minimum norm solution being found. %!assert(matrix_type(speye(10,10)),"Diagonal"); diff -r 45f5faba05a2 -r 82be108cc558 src/DLD-FUNCTIONS/max.cc --- a/src/DLD-FUNCTIONS/max.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/DLD-FUNCTIONS/max.cc Sun Apr 27 22:34:17 2008 +0200 @@ -217,6 +217,183 @@ } \ } +#define MINMAX_SINGLE_BODY(FCN) \ +{ \ + bool single_arg = (nargin == 1) || (arg2.is_empty() && nargin == 3); \ + \ + if (single_arg && (nargout == 1 || nargout == 0)) \ + { \ + if (arg1.is_real_type ()) \ + { \ + FloatNDArray m = arg1.float_array_value (); \ + \ + if (! error_state) \ + { \ + FloatNDArray n = m. FCN (dim); \ + retval(0) = n; \ + } \ + } \ + else if (arg1.is_complex_type ()) \ + { \ + FloatComplexNDArray m = arg1.float_complex_array_value (); \ + \ + if (! error_state) \ + { \ + FloatComplexNDArray n = m. FCN (dim); \ + retval(0) = n; \ + } \ + } \ + else \ + gripe_wrong_type_arg (#FCN, arg1); \ + } \ + else if (single_arg && nargout == 2) \ + { \ + ArrayN index; \ + \ + if (arg1.is_real_type ()) \ + { \ + FloatNDArray m = arg1.float_array_value (); \ + \ + if (! error_state) \ + { \ + FloatNDArray n = m. FCN (index, dim); \ + retval(0) = n; \ + } \ + } \ + else if (arg1.is_complex_type ()) \ + { \ + FloatComplexNDArray m = arg1.float_complex_array_value (); \ + \ + if (! error_state) \ + { \ + FloatComplexNDArray n = m. FCN (index, dim); \ + retval(0) = n; \ + } \ + } \ + else \ + gripe_wrong_type_arg (#FCN, arg1); \ + \ + octave_idx_type len = index.numel (); \ + \ + if (len > 0) \ + { \ + float nan_val = lo_ieee_nan_value (); \ + \ + FloatNDArray idx (index.dims ()); \ + \ + for (octave_idx_type i = 0; i < len; i++) \ + { \ + OCTAVE_QUIT; \ + octave_idx_type tmp = index.elem (i) + 1; \ + idx.elem (i) = (tmp <= 0) \ + ? nan_val : static_cast (tmp); \ + } \ + \ + retval(1) = idx; \ + } \ + else \ + retval(1) = FloatNDArray (); \ + } \ + else \ + { \ + int arg1_is_scalar = arg1.is_scalar_type (); \ + int arg2_is_scalar = arg2.is_scalar_type (); \ + \ + int arg1_is_complex = arg1.is_complex_type (); \ + int arg2_is_complex = arg2.is_complex_type (); \ + \ + if (arg1_is_scalar) \ + { \ + if (arg1_is_complex || arg2_is_complex) \ + { \ + FloatComplex c1 = arg1.float_complex_value (); \ + FloatComplexNDArray m2 = arg2.float_complex_array_value (); \ + if (! error_state) \ + { \ + FloatComplexNDArray result = FCN (c1, m2); \ + if (! error_state) \ + retval(0) = result; \ + } \ + } \ + else \ + { \ + float d1 = arg1.float_value (); \ + FloatNDArray m2 = arg2.float_array_value (); \ + \ + if (! error_state) \ + { \ + FloatNDArray result = FCN (d1, m2); \ + if (! error_state) \ + retval(0) = result; \ + } \ + } \ + } \ + else if (arg2_is_scalar) \ + { \ + if (arg1_is_complex || arg2_is_complex) \ + { \ + FloatComplexNDArray m1 = arg1.float_complex_array_value (); \ + \ + if (! error_state) \ + { \ + FloatComplex c2 = arg2.float_complex_value (); \ + FloatComplexNDArray result = FCN (m1, c2); \ + if (! error_state) \ + retval(0) = result; \ + } \ + } \ + else \ + { \ + FloatNDArray m1 = arg1.float_array_value (); \ + \ + if (! error_state) \ + { \ + float d2 = arg2.float_value (); \ + FloatNDArray result = FCN (m1, d2); \ + if (! error_state) \ + retval(0) = result; \ + } \ + } \ + } \ + else \ + { \ + if (arg1_is_complex || arg2_is_complex) \ + { \ + FloatComplexNDArray m1 = arg1.float_complex_array_value (); \ + \ + if (! error_state) \ + { \ + FloatComplexNDArray m2 = arg2.float_complex_array_value (); \ + \ + if (! error_state) \ + { \ + FloatComplexNDArray result = FCN (m1, m2); \ + if (! error_state) \ + retval(0) = result; \ + } \ + } \ + } \ + else \ + { \ + FloatNDArray m1 = arg1.float_array_value (); \ + \ + if (! error_state) \ + { \ + FloatNDArray m2 = arg2.float_array_value (); \ + \ + if (! error_state) \ + { \ + FloatNDArray result = FCN (m1, m2); \ + if (! error_state) \ + retval(0) = result; \ + } \ + } \ + } \ + } \ + } \ +} + + #define MINMAX_INT_BODY(FCN, TYP) \ { \ bool single_arg = (nargin == 1) || (arg2.is_empty() && nargin == 3); \ @@ -541,6 +718,8 @@ } \ else if (arg1.is_sparse_type ()) \ MINMAX_SPARSE_BODY (FCN) \ + else if (arg1.is_single_type ()) \ + MINMAX_SINGLE_BODY (FCN) \ else \ MINMAX_DOUBLE_BODY (FCN) \ \ diff -r 45f5faba05a2 -r 82be108cc558 src/DLD-FUNCTIONS/pinv.cc --- a/src/DLD-FUNCTIONS/pinv.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/DLD-FUNCTIONS/pinv.cc Sun Apr 27 22:34:17 2008 +0200 @@ -58,19 +58,6 @@ octave_value arg = args(0); - double tol = 0.0; - if (nargin == 2) - tol = args(1).double_value (); - - if (error_state) - return retval; - - if (tol < 0.0) - { - error ("pinv: tol must be greater than zero"); - return retval; - } - int arg_is_empty = empty_arg ("pinv", arg.rows (), arg.columns ()); if (arg_is_empty < 0) @@ -78,23 +65,73 @@ else if (arg_is_empty > 0) return octave_value (Matrix ()); - if (arg.is_real_type ()) + if (arg.is_single_type ()) { - Matrix m = arg.matrix_value (); + float tol = 0.0; + if (nargin == 2) + tol = args(1).float_value (); + + if (error_state) + return retval; + + if (tol < 0.0) + { + error ("pinv: tol must be greater than zero"); + return retval; + } - if (! error_state) - retval = m.pseudo_inverse (tol); - } - else if (arg.is_complex_type ()) - { - ComplexMatrix m = arg.complex_matrix_value (); + if (arg.is_real_type ()) + { + FloatMatrix m = arg.float_matrix_value (); - if (! error_state) - retval = m.pseudo_inverse (tol); + if (! error_state) + retval = m.pseudo_inverse (tol); + } + else if (arg.is_complex_type ()) + { + FloatComplexMatrix m = arg.float_complex_matrix_value (); + + if (! error_state) + retval = m.pseudo_inverse (tol); + } + else + { + gripe_wrong_type_arg ("pinv", arg); + } } else { - gripe_wrong_type_arg ("pinv", arg); + double tol = 0.0; + if (nargin == 2) + tol = args(1).double_value (); + + if (error_state) + return retval; + + if (tol < 0.0) + { + error ("pinv: tol must be greater than zero"); + return retval; + } + + if (arg.is_real_type ()) + { + Matrix m = arg.matrix_value (); + + if (! error_state) + retval = m.pseudo_inverse (tol); + } + else if (arg.is_complex_type ()) + { + ComplexMatrix m = arg.complex_matrix_value (); + + if (! error_state) + retval = m.pseudo_inverse (tol); + } + else + { + gripe_wrong_type_arg ("pinv", arg); + } } return retval; diff -r 45f5faba05a2 -r 82be108cc558 src/DLD-FUNCTIONS/qr.cc --- a/src/DLD-FUNCTIONS/qr.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/DLD-FUNCTIONS/qr.cc Sun Apr 27 22:34:17 2008 +0200 @@ -32,6 +32,10 @@ #include "CmplxQRP.h" #include "dbleQR.h" #include "dbleQRP.h" +#include "fCmplxQR.h" +#include "fCmplxQRP.h" +#include "floatQR.h" +#include "floatQRP.h" #include "SparseQR.h" #include "SparseCmplxQR.h" @@ -275,78 +279,154 @@ QR::type type = (nargout == 0 || nargout == 1) ? QR::raw : (nargin == 2 ? QR::economy : QR::std); - if (arg.is_real_type ()) + if (arg.is_single_type ()) { - Matrix m = arg.matrix_value (); + if (arg.is_real_type ()) + { + FloatMatrix m = arg.float_matrix_value (); - if (! error_state) - { - switch (nargout) + if (! error_state) { - case 0: - case 1: - { - QR fact (m, type); - retval(0) = fact.R (); - } - break; + switch (nargout) + { + case 0: + case 1: + { + FloatQR fact (m, type); + retval(0) = fact.R (); + } + break; - case 2: - { - QR fact (m, type); - retval(1) = fact.R (); - retval(0) = fact.Q (); - } - break; + case 2: + { + FloatQR fact (m, type); + retval(1) = fact.R (); + retval(0) = fact.Q (); + } + break; - default: - { - QRP fact (m, type); - retval(2) = fact.P (); - retval(1) = fact.R (); - retval(0) = fact.Q (); - } - break; + default: + { + FloatQRP fact (m, type); + retval(2) = fact.P (); + retval(1) = fact.R (); + retval(0) = fact.Q (); + } + break; + } } } - } - else if (arg.is_complex_type ()) - { - ComplexMatrix m = arg.complex_matrix_value (); - - if (! error_state) + else if (arg.is_complex_type ()) { - switch (nargout) + FloatComplexMatrix m = arg.float_complex_matrix_value (); + + if (! error_state) { - case 0: - case 1: - { - ComplexQR fact (m, type); - retval(0) = fact.R (); - } - break; + switch (nargout) + { + case 0: + case 1: + { + FloatComplexQR fact (m, type); + retval(0) = fact.R (); + } + break; - case 2: - { - ComplexQR fact (m, type); - retval(1) = fact.R (); - retval(0) = fact.Q (); - } - break; + case 2: + { + FloatComplexQR fact (m, type); + retval(1) = fact.R (); + retval(0) = fact.Q (); + } + break; - default: - { - ComplexQRP fact (m, type); - retval(2) = fact.P (); - retval(1) = fact.R (); - retval(0) = fact.Q (); - } - break; + default: + { + FloatComplexQRP fact (m, type); + retval(2) = fact.P (); + retval(1) = fact.R (); + retval(0) = fact.Q (); + } + break; + } } } } else - gripe_wrong_type_arg ("qr", arg); + { + if (arg.is_real_type ()) + { + Matrix m = arg.matrix_value (); + + if (! error_state) + { + switch (nargout) + { + case 0: + case 1: + { + QR fact (m, type); + retval(0) = fact.R (); + } + break; + + case 2: + { + QR fact (m, type); + retval(1) = fact.R (); + retval(0) = fact.Q (); + } + break; + + default: + { + QRP fact (m, type); + retval(2) = fact.P (); + retval(1) = fact.R (); + retval(0) = fact.Q (); + } + break; + } + } + } + else if (arg.is_complex_type ()) + { + ComplexMatrix m = arg.complex_matrix_value (); + + if (! error_state) + { + switch (nargout) + { + case 0: + case 1: + { + ComplexQR fact (m, type); + retval(0) = fact.R (); + } + break; + + case 2: + { + ComplexQR fact (m, type); + retval(1) = fact.R (); + retval(0) = fact.Q (); + } + break; + + default: + { + ComplexQRP fact (m, type); + retval(2) = fact.P (); + retval(1) = fact.R (); + retval(0) = fact.Q (); + } + break; + } + } + } + else + gripe_wrong_type_arg ("qr", arg); + } } return retval; diff -r 45f5faba05a2 -r 82be108cc558 src/DLD-FUNCTIONS/schur.cc --- a/src/DLD-FUNCTIONS/schur.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/DLD-FUNCTIONS/schur.cc Sun Apr 27 22:34:17 2008 +0200 @@ -29,6 +29,8 @@ #include "CmplxSCHUR.h" #include "dbleSCHUR.h" +#include "fCmplxSCHUR.h" +#include "floatSCHUR.h" #include "defun-dld.h" #include "error.h" @@ -295,48 +297,93 @@ return retval; } - if (arg.is_real_type ()) + if (arg.is_single_type ()) { - Matrix tmp = arg.matrix_value (); + if (arg.is_real_type ()) + { + FloatMatrix tmp = arg.float_matrix_value (); - if (! error_state) - { - if (nargout == 0 || nargout == 1) + if (! error_state) { - SCHUR result (tmp, ord, false); - retval(0) = result.schur_matrix (); + if (nargout == 0 || nargout == 1) + { + FloatSCHUR result (tmp, ord, false); + retval(0) = result.schur_matrix (); + } + else + { + FloatSCHUR result (tmp, ord, true); + retval(1) = result.schur_matrix (); + retval(0) = result.unitary_matrix (); + } } - else + } + else if (arg.is_complex_type ()) + { + FloatComplexMatrix ctmp = arg.float_complex_matrix_value (); + + if (! error_state) { - SCHUR result (tmp, ord, true); - retval(1) = result.schur_matrix (); - retval(0) = result.unitary_matrix (); + + if (nargout == 0 || nargout == 1) + { + FloatComplexSCHUR result (ctmp, ord, false); + retval(0) = result.schur_matrix (); + } + else + { + FloatComplexSCHUR result (ctmp, ord, true); + retval(1) = result.schur_matrix (); + retval(0) = result.unitary_matrix (); + } } } } - else if (arg.is_complex_type ()) + else { - ComplexMatrix ctmp = arg.complex_matrix_value (); + if (arg.is_real_type ()) + { + Matrix tmp = arg.matrix_value (); - if (! error_state) - { - - if (nargout == 0 || nargout == 1) + if (! error_state) { - ComplexSCHUR result (ctmp, ord, false); - retval(0) = result.schur_matrix (); - } - else - { - ComplexSCHUR result (ctmp, ord, true); - retval(1) = result.schur_matrix (); - retval(0) = result.unitary_matrix (); + if (nargout == 0 || nargout == 1) + { + SCHUR result (tmp, ord, false); + retval(0) = result.schur_matrix (); + } + else + { + SCHUR result (tmp, ord, true); + retval(1) = result.schur_matrix (); + retval(0) = result.unitary_matrix (); + } } } - } - else - { - gripe_wrong_type_arg ("schur", arg); + else if (arg.is_complex_type ()) + { + ComplexMatrix ctmp = arg.complex_matrix_value (); + + if (! error_state) + { + + if (nargout == 0 || nargout == 1) + { + ComplexSCHUR result (ctmp, ord, false); + retval(0) = result.schur_matrix (); + } + else + { + ComplexSCHUR result (ctmp, ord, true); + retval(1) = result.schur_matrix (); + retval(0) = result.unitary_matrix (); + } + } + } + else + { + gripe_wrong_type_arg ("schur", arg); + } } return retval; diff -r 45f5faba05a2 -r 82be108cc558 src/DLD-FUNCTIONS/sqrtm.cc --- a/src/DLD-FUNCTIONS/sqrtm.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/DLD-FUNCTIONS/sqrtm.cc Sun Apr 27 22:34:17 2008 +0200 @@ -27,6 +27,7 @@ #include #include "CmplxSCHUR.h" +#include "fCmplxSCHUR.h" #include "lo-ieee.h" #include "lo-mappers.h" @@ -35,14 +36,16 @@ #include "gripes.h" #include "utils.h" -static inline double -getmin (double x, double y) +template +static inline T +getmin (T x, T y) { return x < y ? x : y; } -static inline double -getmax (double x, double y) +template +static inline T +getmax (T x, T y) { return x > y ? x : y; } @@ -70,6 +73,28 @@ return sqrt (sum); } +static float +frobnorm (const FloatComplexMatrix& A) +{ + float sum = 0; + + for (octave_idx_type i = 0; i < A.rows (); i++) + for (octave_idx_type j = 0; j < A.columns (); j++) + sum += real (A(i,j) * conj (A(i,j))); + + return sqrt (sum); +} + +static float +frobnorm (const FloatMatrix& A) +{ + float sum = 0; + for (octave_idx_type i = 0; i < A.rows (); i++) + for (octave_idx_type j = 0; j < A.columns (); j++) + sum += A(i,j) * A(i,j); + + return sqrt (sum); +} static ComplexMatrix sqrtm_from_schur (const ComplexMatrix& U, const ComplexMatrix& T) @@ -108,6 +133,43 @@ return U * R * U.hermitian (); } +static FloatComplexMatrix +sqrtm_from_schur (const FloatComplexMatrix& U, const FloatComplexMatrix& T) +{ + const octave_idx_type n = U.rows (); + + FloatComplexMatrix R (n, n, 0.0); + + for (octave_idx_type j = 0; j < n; j++) + R(j,j) = sqrt (T(j,j)); + + const float fudge = sqrt (FLT_MIN); + + for (octave_idx_type p = 0; p < n-1; p++) + { + for (octave_idx_type i = 0; i < n-(p+1); i++) + { + const octave_idx_type j = i + p + 1; + + FloatComplex s = T(i,j); + + for (octave_idx_type k = i+1; k < j; k++) + s -= R(i,k) * R(k,j); + + // dividing + // R(i,j) = s/(R(i,i)+R(j,j)); + // screwing around to not / 0 + + const FloatComplex d = R(i,i) + R(j,j) + fudge; + const FloatComplex conjd = conj (d); + + R(i,j) = (s*conjd)/(d*conjd); + } + } + + return U * R * U.hermitian (); +} + DEFUN_DLD (sqrtm, args, nargout, "-*- texinfo -*-\n\ @deftypefn {Loadable Function} {[@var{result}, @var{error_estimate}] =} sqrtm (@var{a})\n\ @@ -150,125 +212,249 @@ retval(1) = lo_ieee_inf_value (); retval(0) = lo_ieee_nan_value (); - if (arg.is_real_scalar ()) + + if (arg.is_single_type ()) { - double d = arg.double_value (); - if (d > 0.0) + if (arg.is_real_scalar ()) { - retval(0) = sqrt (d); - retval(1) = 0.0; + float d = arg.float_value (); + if (d > 0.0) + { + retval(0) = sqrt (d); + retval(1) = 0.0; + } + else + { + retval(0) = FloatComplex (0.0, sqrt (d)); + retval(1) = 0.0; + } } - else + else if (arg.is_complex_scalar ()) { - retval(0) = Complex (0.0, sqrt (d)); + FloatComplex c = arg.float_complex_value (); + retval(0) = sqrt (c); retval(1) = 0.0; } - } - else if (arg.is_complex_scalar ()) - { - Complex c = arg.complex_value (); - retval(0) = sqrt (c); - retval(1) = 0.0; - } - else if (arg.is_matrix_type ()) - { - double err, minT; + else if (arg.is_matrix_type ()) + { + float err, minT; + + if (arg.is_real_matrix ()) + { + FloatMatrix A = arg.float_matrix_value(); - if (arg.is_real_matrix ()) - { - Matrix A = arg.matrix_value(); + if (error_state) + return retval; - if (error_state) - return retval; + // FIXME -- eventually, FloatComplexSCHUR will accept a + // real matrix arg. - // FIXME -- eventually, ComplexSCHUR will accept a - // real matrix arg. + FloatComplexMatrix Ac (A); - ComplexMatrix Ac (A); + const FloatComplexSCHUR schur (Ac, std::string ()); - const ComplexSCHUR schur (Ac, std::string ()); + if (error_state) + return retval; - if (error_state) - return retval; - - const ComplexMatrix U (schur.unitary_matrix ()); - const ComplexMatrix T (schur.schur_matrix ()); - const ComplexMatrix X (sqrtm_from_schur (U, T)); + const FloatComplexMatrix U (schur.unitary_matrix ()); + const FloatComplexMatrix T (schur.schur_matrix ()); + const FloatComplexMatrix X (sqrtm_from_schur (U, T)); - // Check for minimal imaginary part - double normX = 0.0; - double imagX = 0.0; - for (octave_idx_type i = 0; i < n; i++) - for (octave_idx_type j = 0; j < n; j++) - { - imagX = getmax (imagX, imag (X(i,j))); - normX = getmax (normX, abs (X(i,j))); - } + // Check for minimal imaginary part + float normX = 0.0; + float imagX = 0.0; + for (octave_idx_type i = 0; i < n; i++) + for (octave_idx_type j = 0; j < n; j++) + { + imagX = getmax (imagX, imag (X(i,j))); + normX = getmax (normX, abs (X(i,j))); + } - if (imagX < normX * 100 * DBL_EPSILON) - retval(0) = real (X); - else - retval(0) = X; + if (imagX < normX * 100 * DBL_EPSILON) + retval(0) = real (X); + else + retval(0) = X; - // Compute error - // FIXME can we estimate the error without doing the - // matrix multiply? + // Compute error + // FIXME can we estimate the error without doing the + // matrix multiply? + + err = frobnorm (X*X - FloatComplexMatrix (A)) / frobnorm (A); - err = frobnorm (X*X - ComplexMatrix (A)) / frobnorm (A); + if (xisnan (err)) + err = lo_ieee_float_inf_value (); - if (xisnan (err)) - err = lo_ieee_inf_value (); + // Find min diagonal + minT = lo_ieee_float_inf_value (); + for (octave_idx_type i=0; i < n; i++) + minT = getmin(minT, abs(T(i,i))); + } + else + { + FloatComplexMatrix A = arg.float_complex_matrix_value (); - // Find min diagonal - minT = lo_ieee_inf_value (); - for (octave_idx_type i=0; i < n; i++) - minT = getmin(minT, abs(T(i,i))); - } - else - { - ComplexMatrix A = arg.complex_matrix_value (); + if (error_state) + return retval; + + const FloatComplexSCHUR schur (A, std::string ()); - if (error_state) - return retval; + if (error_state) + return retval; - const ComplexSCHUR schur (A, std::string ()); - - if (error_state) - return retval; + const FloatComplexMatrix U (schur.unitary_matrix ()); + const FloatComplexMatrix T (schur.schur_matrix ()); + const FloatComplexMatrix X (sqrtm_from_schur (U, T)); - const ComplexMatrix U (schur.unitary_matrix ()); - const ComplexMatrix T (schur.schur_matrix ()); - const ComplexMatrix X (sqrtm_from_schur (U, T)); + retval(0) = X; + + err = frobnorm (X*X - A) / frobnorm (A); - retval(0) = X; + if (xisnan (err)) + err = lo_ieee_float_inf_value (); - err = frobnorm (X*X - A) / frobnorm (A); - - if (xisnan (err)) - err = lo_ieee_inf_value (); + minT = lo_ieee_float_inf_value (); + for (octave_idx_type i = 0; i < n; i++) + minT = getmin (minT, abs (T(i,i))); + } - minT = lo_ieee_inf_value (); - for (octave_idx_type i = 0; i < n; i++) - minT = getmin (minT, abs (T(i,i))); - } - - retval(1) = err; + retval(1) = err; - if (nargout < 2) - { - if (err > 100*(minT+DBL_EPSILON)*n) + if (nargout < 2) { - if (minT == 0.0) - error ("sqrtm: A is singular, sqrt may not exist"); - else if (minT <= sqrt (DBL_MIN)) - error ("sqrtm: A is nearly singular, failed to find sqrt"); - else - error ("sqrtm: failed to find sqrt"); + if (err > 100*(minT+DBL_EPSILON)*n) + { + if (minT == 0.0) + error ("sqrtm: A is singular, sqrt may not exist"); + else if (minT <= sqrt (DBL_MIN)) + error ("sqrtm: A is nearly singular, failed to find sqrt"); + else + error ("sqrtm: failed to find sqrt"); + } } } } else - gripe_wrong_type_arg ("sqrtm", arg); + { + if (arg.is_real_scalar ()) + { + double d = arg.double_value (); + if (d > 0.0) + { + retval(0) = sqrt (d); + retval(1) = 0.0; + } + else + { + retval(0) = Complex (0.0, sqrt (d)); + retval(1) = 0.0; + } + } + else if (arg.is_complex_scalar ()) + { + Complex c = arg.complex_value (); + retval(0) = sqrt (c); + retval(1) = 0.0; + } + else if (arg.is_matrix_type ()) + { + double err, minT; + + if (arg.is_real_matrix ()) + { + Matrix A = arg.matrix_value(); + + if (error_state) + return retval; + + // FIXME -- eventually, ComplexSCHUR will accept a + // real matrix arg. + + ComplexMatrix Ac (A); + + const ComplexSCHUR schur (Ac, std::string ()); + + if (error_state) + return retval; + + const ComplexMatrix U (schur.unitary_matrix ()); + const ComplexMatrix T (schur.schur_matrix ()); + const ComplexMatrix X (sqrtm_from_schur (U, T)); + + // Check for minimal imaginary part + double normX = 0.0; + double imagX = 0.0; + for (octave_idx_type i = 0; i < n; i++) + for (octave_idx_type j = 0; j < n; j++) + { + imagX = getmax (imagX, imag (X(i,j))); + normX = getmax (normX, abs (X(i,j))); + } + + if (imagX < normX * 100 * DBL_EPSILON) + retval(0) = real (X); + else + retval(0) = X; + + // Compute error + // FIXME can we estimate the error without doing the + // matrix multiply? + + err = frobnorm (X*X - ComplexMatrix (A)) / frobnorm (A); + + if (xisnan (err)) + err = lo_ieee_inf_value (); + + // Find min diagonal + minT = lo_ieee_inf_value (); + for (octave_idx_type i=0; i < n; i++) + minT = getmin(minT, abs(T(i,i))); + } + else + { + ComplexMatrix A = arg.complex_matrix_value (); + + if (error_state) + return retval; + + const ComplexSCHUR schur (A, std::string ()); + + if (error_state) + return retval; + + const ComplexMatrix U (schur.unitary_matrix ()); + const ComplexMatrix T (schur.schur_matrix ()); + const ComplexMatrix X (sqrtm_from_schur (U, T)); + + retval(0) = X; + + err = frobnorm (X*X - A) / frobnorm (A); + + if (xisnan (err)) + err = lo_ieee_inf_value (); + + minT = lo_ieee_inf_value (); + for (octave_idx_type i = 0; i < n; i++) + minT = getmin (minT, abs (T(i,i))); + } + + retval(1) = err; + + if (nargout < 2) + { + if (err > 100*(minT+DBL_EPSILON)*n) + { + if (minT == 0.0) + error ("sqrtm: A is singular, sqrt may not exist"); + else if (minT <= sqrt (DBL_MIN)) + error ("sqrtm: A is nearly singular, failed to find sqrt"); + else + error ("sqrtm: failed to find sqrt"); + } + } + } + else + gripe_wrong_type_arg ("sqrtm", arg); + } return retval; } diff -r 45f5faba05a2 -r 82be108cc558 src/DLD-FUNCTIONS/svd.cc --- a/src/DLD-FUNCTIONS/svd.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/DLD-FUNCTIONS/svd.cc Sun Apr 27 22:34:17 2008 +0200 @@ -27,6 +27,8 @@ #include "CmplxSVD.h" #include "dbleSVD.h" +#include "fCmplxSVD.h" +#include "floatSVD.h" #include "defun-dld.h" #include "error.h" @@ -132,16 +134,32 @@ octave_idx_type nr = arg.rows (); octave_idx_type nc = arg.columns (); + bool isfloat = arg.is_single_type (); + if (nr == 0 || nc == 0) { - if (nargout == 3) + if (isfloat) { - retval(3) = identity_matrix (nr, nr); - retval(2) = Matrix (nr, nc); - retval(1) = identity_matrix (nc, nc); + if (nargout == 3) + { + retval(3) = float_identity_matrix (nr, nr); + retval(2) = FloatMatrix (nr, nc); + retval(1) = float_identity_matrix (nc, nc); + } + else + retval(0) = FloatMatrix (0, 1); } else - retval(0) = Matrix (0, 1); + { + if (nargout == 3) + { + retval(3) = identity_matrix (nr, nr); + retval(2) = Matrix (nr, nc); + retval(1) = identity_matrix (nc, nc); + } + else + retval(0) = Matrix (0, 1); + } } else { @@ -149,66 +167,128 @@ ? SVD::sigma_only : (nargin == 2) ? SVD::economy : SVD::std); - if (arg.is_real_type ()) + if (isfloat) { - Matrix tmp = arg.matrix_value (); - - if (! error_state) + if (arg.is_real_type ()) { - if (tmp.any_element_is_inf_or_nan ()) + FloatMatrix tmp = arg.float_matrix_value (); + + if (! error_state) { - error ("svd: cannot take SVD of matrix containing Inf or NaN values"); - return retval; - } + if (tmp.any_element_is_inf_or_nan ()) + { + error ("svd: cannot take SVD of matrix containing Inf or NaN values"); + return retval; + } - SVD result (tmp, type); + FloatSVD result (tmp, type); - DiagMatrix sigma = result.singular_values (); + FloatDiagMatrix sigma = result.singular_values (); - if (nargout == 0 || nargout == 1) - { - retval(0) = sigma.diag (); - } - else - { - retval(2) = result.right_singular_matrix (); - retval(1) = sigma; - retval(0) = result.left_singular_matrix (); + if (nargout == 0 || nargout == 1) + { + retval(0) = sigma.diag (); + } + else + { + retval(2) = result.right_singular_matrix (); + retval(1) = sigma; + retval(0) = result.left_singular_matrix (); + } } } - } - else if (arg.is_complex_type ()) - { - ComplexMatrix ctmp = arg.complex_matrix_value (); + else if (arg.is_complex_type ()) + { + FloatComplexMatrix ctmp = arg.float_complex_matrix_value (); - if (! error_state) - { - if (ctmp.any_element_is_inf_or_nan ()) + if (! error_state) { - error ("svd: cannot take SVD of matrix containing Inf or NaN values"); - return retval; - } + if (ctmp.any_element_is_inf_or_nan ()) + { + error ("svd: cannot take SVD of matrix containing Inf or NaN values"); + return retval; + } - ComplexSVD result (ctmp, type); + FloatComplexSVD result (ctmp, type); - DiagMatrix sigma = result.singular_values (); + FloatDiagMatrix sigma = result.singular_values (); - if (nargout == 0 || nargout == 1) - { - retval(0) = sigma.diag (); - } - else - { - retval(2) = result.right_singular_matrix (); - retval(1) = sigma; - retval(0) = result.left_singular_matrix (); + if (nargout == 0 || nargout == 1) + { + retval(0) = sigma.diag (); + } + else + { + retval(2) = result.right_singular_matrix (); + retval(1) = sigma; + retval(0) = result.left_singular_matrix (); + } } } } else { - gripe_wrong_type_arg ("svd", arg); - return retval; + if (arg.is_real_type ()) + { + Matrix tmp = arg.matrix_value (); + + if (! error_state) + { + if (tmp.any_element_is_inf_or_nan ()) + { + error ("svd: cannot take SVD of matrix containing Inf or NaN values"); + return retval; + } + + SVD result (tmp, type); + + DiagMatrix sigma = result.singular_values (); + + if (nargout == 0 || nargout == 1) + { + retval(0) = sigma.diag (); + } + else + { + retval(2) = result.right_singular_matrix (); + retval(1) = sigma; + retval(0) = result.left_singular_matrix (); + } + } + } + else if (arg.is_complex_type ()) + { + ComplexMatrix ctmp = arg.complex_matrix_value (); + + if (! error_state) + { + if (ctmp.any_element_is_inf_or_nan ()) + { + error ("svd: cannot take SVD of matrix containing Inf or NaN values"); + return retval; + } + + ComplexSVD result (ctmp, type); + + DiagMatrix sigma = result.singular_values (); + + if (nargout == 0 || nargout == 1) + { + retval(0) = sigma.diag (); + } + else + { + retval(2) = result.right_singular_matrix (); + retval(1) = sigma; + retval(0) = result.left_singular_matrix (); + } + } + } + else + { + gripe_wrong_type_arg ("svd", arg); + return retval; + } } } diff -r 45f5faba05a2 -r 82be108cc558 src/DLD-FUNCTIONS/syl.cc --- a/src/DLD-FUNCTIONS/syl.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/DLD-FUNCTIONS/syl.cc Sun Apr 27 22:34:17 2008 +0200 @@ -87,8 +87,14 @@ int arg_b_is_empty = empty_arg ("syl", b_nr, b_nc); int arg_c_is_empty = empty_arg ("syl", c_nr, c_nc); + bool isfloat = arg_a.is_single_type () || arg_b.is_single_type () || + arg_c.is_single_type (); + if (arg_a_is_empty > 0 && arg_b_is_empty > 0 && arg_c_is_empty > 0) - return octave_value (Matrix ()); + if (isfloat) + return octave_value (FloatMatrix ()); + else + return octave_value (Matrix ()); else if (arg_a_is_empty || arg_b_is_empty || arg_c_is_empty) return retval; @@ -106,51 +112,100 @@ } // Dimensions look o.k., let's solve the problem. + if (isfloat) + { + if (arg_a.is_complex_type () + || arg_b.is_complex_type () + || arg_c.is_complex_type ()) + { + // Do everything in complex arithmetic; - if (arg_a.is_complex_type () - || arg_b.is_complex_type () - || arg_c.is_complex_type ()) - { - // Do everything in complex arithmetic; + FloatComplexMatrix ca = arg_a.float_complex_matrix_value (); + + if (error_state) + return retval; - ComplexMatrix ca = arg_a.complex_matrix_value (); + FloatComplexMatrix cb = arg_b.float_complex_matrix_value (); + + if (error_state) + return retval; + + FloatComplexMatrix cc = arg_c.float_complex_matrix_value (); - if (error_state) - return retval; + if (error_state) + return retval; - ComplexMatrix cb = arg_b.complex_matrix_value (); + retval = Sylvester (ca, cb, cc); + } + else + { + // Do everything in real arithmetic. + + FloatMatrix ca = arg_a.float_matrix_value (); - if (error_state) - return retval; + if (error_state) + return retval; + + FloatMatrix cb = arg_b.float_matrix_value (); - ComplexMatrix cc = arg_c.complex_matrix_value (); + if (error_state) + return retval; - if (error_state) - return retval; + FloatMatrix cc = arg_c.float_matrix_value (); + + if (error_state) + return retval; - retval = Sylvester (ca, cb, cc); - } - else - { - // Do everything in real arithmetic. + retval = Sylvester (ca, cb, cc); + } + } + else + { + if (arg_a.is_complex_type () + || arg_b.is_complex_type () + || arg_c.is_complex_type ()) + { + // Do everything in complex arithmetic; - Matrix ca = arg_a.matrix_value (); + ComplexMatrix ca = arg_a.complex_matrix_value (); + + if (error_state) + return retval; - if (error_state) - return retval; + ComplexMatrix cb = arg_b.complex_matrix_value (); + + if (error_state) + return retval; + + ComplexMatrix cc = arg_c.complex_matrix_value (); - Matrix cb = arg_b.matrix_value (); + if (error_state) + return retval; - if (error_state) - return retval; + retval = Sylvester (ca, cb, cc); + } + else + { + // Do everything in real arithmetic. + + Matrix ca = arg_a.matrix_value (); + + if (error_state) + return retval; - Matrix cc = arg_c.matrix_value (); + Matrix cb = arg_b.matrix_value (); + + if (error_state) + return retval; + + Matrix cc = arg_c.matrix_value (); - if (error_state) - return retval; + if (error_state) + return retval; - retval = Sylvester (ca, cb, cc); - } + retval = Sylvester (ca, cb, cc); + } + } return retval; } diff -r 45f5faba05a2 -r 82be108cc558 src/DLD-FUNCTIONS/symbfact.cc --- a/src/DLD-FUNCTIONS/symbfact.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/DLD-FUNCTIONS/symbfact.cc Sun Apr 27 22:34:17 2008 +0200 @@ -163,7 +163,7 @@ A->x = a.data(); } else - gripe_wrong_type_arg ("symbfact", arg(0)); + gripe_wrong_type_arg ("symbfact", args(0)); octave_idx_type coletree = false; octave_idx_type n = A->nrow; diff -r 45f5faba05a2 -r 82be108cc558 src/DLD-FUNCTIONS/typecast.cc --- a/src/DLD-FUNCTIONS/typecast.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/DLD-FUNCTIONS/typecast.cc Sun Apr 27 22:34:17 2008 +0200 @@ -112,6 +112,12 @@ typecast (x, y); retval = octave_value (y); } + else if (type == "single") + { + FloatNDArray y; + typecast (x, y); + retval = octave_value (y); + } else { NDArray y; @@ -155,9 +161,7 @@ { std::transform (type.begin (), type.end (), type.begin (), tolower); - if (type == "single") - error ("typecast: type 'single' is not supported"); - else if (type != "uint8" && type != "uint16" + if (type != "uint8" && type != "uint16" && type != "uint32" && type != "uint64" && type != "int8" && type != "int16" && type != "int32" && type != "int64" @@ -200,6 +204,8 @@ retval = typecast (args(0).int32_array_value (), type); else if (args(0).is_int64_type ()) retval = typecast (args(0).int64_array_value (), type); + else if (args(0).is_single_type ()) + retval = typecast (args(0).float_array_value (), type); else retval = typecast (args(0).array_value (), type); } diff -r 45f5faba05a2 -r 82be108cc558 src/Makefile.in --- a/src/Makefile.in Wed May 14 18:09:56 2008 +0200 +++ b/src/Makefile.in Sun Apr 27 22:34:17 2008 +0200 @@ -103,6 +103,7 @@ ov-cell.h ov.h ov-fcn.h ov-builtin.h ov-dld-fcn.h \ ov-mex-fcn.h ov-usr-fcn.h ov-fcn-handle.h \ ov-fcn-inline.h ov-class.h ov-typeinfo.h ov-type-conv.h \ + ov-flt-re-mat.h ov-flt-cx-mat.h ov-float.h ov-flt-complex.h \ $(OV_INTTYPE_INC) OV_SPARSE_INCLUDES := \ @@ -146,14 +147,22 @@ op-sm-cs.cc op-sm-m.cc op-sm-s.cc op-sm-scm.cc op-sm-sm.cc \ op-s-scm.cc op-s-sm.cc -OP_XSRC := op-b-b.cc op-b-bm.cc op-bm-b.cc op-bm-bm.cc op-cell.cc \ - op-chm.cc op-class.cc op-cm-cm.cc op-cm-cs.cc op-cm-m.cc \ +DOUBLE_OP_XSRC := op-cm-cm.cc op-cm-cs.cc op-cm-m.cc \ op-cm-s.cc op-cs-cm.cc op-cs-cs.cc op-cs-m.cc \ - op-cs-s.cc op-list.cc op-m-cm.cc \ - op-m-cs.cc op-m-m.cc op-m-s.cc op-range.cc op-s-cm.cc \ - op-s-cs.cc op-s-m.cc op-s-s.cc op-str-m.cc \ + op-cs-s.cc op-m-cm.cc \ + op-m-cs.cc op-m-m.cc op-m-s.cc op-s-cm.cc \ + op-s-cs.cc op-s-m.cc op-s-s.cc + +FLOAT_OP_XSRC := op-fcm-fcm.cc op-fcm-fcs.cc op-fcm-fm.cc \ + op-fcm-fs.cc op-fcs-fcm.cc op-fcs-fcs.cc op-fcs-fm.cc \ + op-fcs-fs.cc op-fm-fcm.cc \ + op-fm-fcs.cc op-fm-fm.cc op-fm-fs.cc op-fs-fcm.cc \ + op-fs-fcs.cc op-fs-fm.cc op-fs-fs.cc + +OP_XSRC := op-b-b.cc op-b-bm.cc op-bm-b.cc op-bm-bm.cc op-cell.cc \ + op-chm.cc op-class.cc op-list.cc op-range.cc op-str-m.cc \ op-str-s.cc op-str-str.cc op-streamoff.cc op-struct.cc \ - $(INTTYPE_OP_XSRC) \ + $(DOUBLE_OP_XSRC) $(FLOAT_OP_XSRC) $(INTTYPE_OP_XSRC) \ $(SPARSE_OP_XSRC) OP_SRC := $(addprefix OPERATORS/, $(OP_XSRC)) @@ -175,6 +184,7 @@ ov.cc ov-fcn.cc ov-builtin.cc ov-dld-fcn.cc \ ov-mex-fcn.cc ov-usr-fcn.cc ov-fcn-handle.cc ov-fcn-inline.cc \ ov-class.cc ov-typeinfo.cc \ + ov-flt-re-mat.cc ov-flt-cx-mat.cc ov-float.cc ov-flt-complex.cc \ $(OV_INTTYPE_SRC) \ $(OV_SPARSE_SRC) diff -r 45f5faba05a2 -r 82be108cc558 src/OPERATORS/op-b-b.cc --- a/src/OPERATORS/op-b-b.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/OPERATORS/op-b-b.cc Sun Apr 27 22:34:17 2008 +0200 @@ -31,6 +31,7 @@ #include "ov-bool.h" #include "ov-bool-mat.h" #include "ov-scalar.h" +#include "ov-float.h" #include "ov-re-mat.h" #include "ov-typeinfo.h" #include "ops.h" @@ -68,6 +69,8 @@ DEFNDCATOP_FN (b_b, bool, bool, bool_array, bool_array, concat) DEFNDCATOP_FN (b_s, bool, scalar, array, array, concat) DEFNDCATOP_FN (s_b, scalar, bool, array, array, concat) +DEFNDCATOP_FN (b_f, bool, float_scalar, float_array, float_array, concat) +DEFNDCATOP_FN (f_b, float_scalar, bool, float_array, float_array, concat) void install_b_b_ops (void) @@ -86,6 +89,8 @@ INSTALL_CATOP (octave_bool, octave_bool, b_b); INSTALL_CATOP (octave_bool, octave_scalar, b_s); INSTALL_CATOP (octave_scalar, octave_bool, s_b); + INSTALL_CATOP (octave_bool, octave_float_scalar, b_f); + INSTALL_CATOP (octave_float_scalar, octave_bool, f_b); INSTALL_ASSIGNCONV (octave_bool, octave_bool, octave_bool_matrix); } diff -r 45f5faba05a2 -r 82be108cc558 src/OPERATORS/op-b-bm.cc --- a/src/OPERATORS/op-b-bm.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/OPERATORS/op-b-bm.cc Sun Apr 27 22:34:17 2008 +0200 @@ -30,7 +30,9 @@ #include "ov-bool.h" #include "ov-bool-mat.h" #include "ov-scalar.h" +#include "ov-float.h" #include "ov-re-mat.h" +#include "ov-flt-re-mat.h" #include "ov-typeinfo.h" #include "ops.h" #include "xdiv.h" @@ -45,6 +47,9 @@ DEFNDCATOP_FN (b_m, bool, matrix, array, array, concat) DEFNDCATOP_FN (s_bm, scalar, bool_matrix, array, array, concat) +DEFNDCATOP_FN (b_fm, bool, float_matrix, float_array, float_array, concat) +DEFNDCATOP_FN (f_bm, float_scalar, bool_matrix, float_array, float_array, concat) + DEFCONV (bool_matrix_conv, bool, bool_matrix) { CAST_CONV_ARG (const octave_bool&); @@ -61,6 +66,8 @@ INSTALL_CATOP (octave_bool, octave_bool_matrix, b_bm); INSTALL_CATOP (octave_bool, octave_matrix, b_m); INSTALL_CATOP (octave_scalar, octave_bool_matrix, s_bm); + INSTALL_CATOP (octave_bool, octave_float_matrix, b_fm); + INSTALL_CATOP (octave_float_scalar, octave_bool_matrix, f_bm); INSTALL_ASSIGNCONV (octave_bool, octave_bool_matrix, octave_bool_matrix); diff -r 45f5faba05a2 -r 82be108cc558 src/OPERATORS/op-bm-b.cc --- a/src/OPERATORS/op-bm-b.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/OPERATORS/op-bm-b.cc Sun Apr 27 22:34:17 2008 +0200 @@ -30,7 +30,9 @@ #include "ov-bool.h" #include "ov-bool-mat.h" #include "ov-scalar.h" +#include "ov-float.h" #include "ov-re-mat.h" +#include "ov-flt-re-mat.h" #include "ov-str-mat.h" #include "ov-int8.h" #include "ov-int16.h" @@ -53,6 +55,8 @@ DEFNDCATOP_FN (bm_b, bool_matrix, bool, bool_array, bool_array, concat) DEFNDCATOP_FN (bm_s, bool_matrix, scalar, array, array, concat) DEFNDCATOP_FN (m_b, matrix, bool, array, array, concat) +DEFNDCATOP_FN (bm_f, bool_matrix, float_scalar, float_array, float_array, concat) +DEFNDCATOP_FN (fm_b, float_matrix, bool, float_array, float_array, concat) DEFNDASSIGNOP_FN (assign, bool_matrix, bool, bool_array, assign) @@ -83,6 +87,8 @@ INSTALL_CATOP (octave_bool_matrix, octave_bool, bm_b); INSTALL_CATOP (octave_bool_matrix, octave_scalar, bm_s); INSTALL_CATOP (octave_matrix, octave_bool, m_b); + INSTALL_CATOP (octave_bool_matrix, octave_float_scalar, bm_f); + INSTALL_CATOP (octave_float_matrix, octave_bool, fm_b); INSTALL_ASSIGNOP (op_asn_eq, octave_bool_matrix, octave_bool, assign); diff -r 45f5faba05a2 -r 82be108cc558 src/OPERATORS/op-bm-bm.cc --- a/src/OPERATORS/op-bm-bm.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/OPERATORS/op-bm-bm.cc Sun Apr 27 22:34:17 2008 +0200 @@ -32,6 +32,7 @@ #include "ov-scalar.h" #include "ov-range.h" #include "ov-re-mat.h" +#include "ov-flt-re-mat.h" #include "ov-re-sparse.h" #include "ov-str-mat.h" #include "ov-int8.h" @@ -80,6 +81,8 @@ DEFNDCATOP_FN (bm_bm, bool_matrix, bool_matrix, bool_array, bool_array, concat) DEFNDCATOP_FN (bm_m, bool_matrix, matrix, array, array, concat) DEFNDCATOP_FN (m_bm, matrix, bool_matrix, array, array, concat) +DEFNDCATOP_FN (bm_fm, bool_matrix, float_matrix, float_array, float_array, concat) +DEFNDCATOP_FN (fm_bm, float_matrix, bool_matrix, float_array, float_array, concat) DEFNDASSIGNOP_FN (assign, bool_matrix, bool_matrix, bool_array, assign) @@ -122,6 +125,8 @@ INSTALL_CATOP (octave_bool_matrix, octave_bool_matrix, bm_bm); INSTALL_CATOP (octave_bool_matrix, octave_matrix, bm_m); INSTALL_CATOP (octave_matrix, octave_bool_matrix, m_bm); + INSTALL_CATOP (octave_bool_matrix, octave_float_matrix, bm_fm); + INSTALL_CATOP (octave_float_matrix, octave_bool_matrix, fm_bm); INSTALL_CONVOP (octave_matrix, octave_bool_matrix, matrix_to_bool_matrix); INSTALL_CONVOP (octave_scalar, octave_bool_matrix, scalar_to_bool_matrix); diff -r 45f5faba05a2 -r 82be108cc558 src/OPERATORS/op-cm-cm.cc --- a/src/OPERATORS/op-cm-cm.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/OPERATORS/op-cm-cm.cc Sun Apr 27 22:34:17 2008 +0200 @@ -29,6 +29,7 @@ #include "oct-obj.h" #include "ov.h" #include "ov-cx-mat.h" +#include "ov-flt-cx-mat.h" #include "ov-typeinfo.h" #include "ops.h" #include "xdiv.h" @@ -131,6 +132,13 @@ DEFNDASSIGNOP_FN (assign, complex_matrix, complex_matrix, complex_array, assign) +CONVDECL (complex_matrix_to_float_complex_matrix) +{ + CAST_CONV_ARG (const octave_complex_matrix&); + + return new octave_float_complex_matrix (FloatComplexNDArray (v.complex_array_value ())); +} + void install_cm_cm_ops (void) { @@ -165,6 +173,9 @@ INSTALL_CATOP (octave_complex_matrix, octave_complex_matrix, cm_cm); INSTALL_ASSIGNOP (op_asn_eq, octave_complex_matrix, octave_complex_matrix, assign); + + INSTALL_CONVOP (octave_complex_matrix, octave_float_complex_matrix, + complex_matrix_to_float_complex_matrix); } /* diff -r 45f5faba05a2 -r 82be108cc558 src/OPERATORS/op-cs-cs.cc --- a/src/OPERATORS/op-cs-cs.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/OPERATORS/op-cs-cs.cc Sun Apr 27 22:34:17 2008 +0200 @@ -30,6 +30,7 @@ #include "ov.h" #include "ov-complex.h" #include "ov-cx-mat.h" +#include "ov-flt-cx-mat.h" #include "ov-typeinfo.h" #include "ops.h" #include "xdiv.h" @@ -176,6 +177,13 @@ DEFNDCATOP_FN (cs_cs, complex, complex, complex_array, complex_array, concat) +CONVDECL (complex_to_float_complex) +{ + CAST_CONV_ARG (const octave_complex&); + + return new octave_float_complex_matrix (FloatComplexMatrix (1, 1, static_cast(v.complex_value ()))); +} + void install_cs_cs_ops (void) { @@ -210,6 +218,9 @@ INSTALL_CATOP (octave_complex, octave_complex, cs_cs); INSTALL_ASSIGNCONV (octave_complex, octave_complex, octave_complex_matrix); + + INSTALL_CONVOP (octave_complex, octave_float_complex_matrix, + complex_to_float_complex); } /* diff -r 45f5faba05a2 -r 82be108cc558 src/OPERATORS/op-fcm-fcm.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/OPERATORS/op-fcm-fcm.cc Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,227 @@ +/* + +Copyright (C) 1996, 1997, 1998, 2000, 2002, 2003, 2004, 2005, 2006, + 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "gripes.h" +#include "oct-obj.h" +#include "ov.h" +#include "ov-cx-mat.h" +#include "ov-flt-cx-mat.h" +#include "ov-typeinfo.h" +#include "ops.h" +#include "xdiv.h" +#include "xpow.h" + +// unary complex matrix ops. + +DEFNDUNOP_OP (not, float_complex_matrix, float_complex_array, !) +DEFNDUNOP_OP (uplus, float_complex_matrix, float_complex_array, /* no-op */) +DEFNDUNOP_OP (uminus, float_complex_matrix, float_complex_array, -) + +DEFUNOP (transpose, float_complex_matrix) +{ + CAST_UNOP_ARG (const octave_float_complex_matrix&); + + if (v.ndims () > 2) + { + error ("transpose not defined for N-d objects"); + return octave_value (); + } + else + return octave_value (v.float_complex_matrix_value().transpose ()); +} + +DEFUNOP (hermitian, float_complex_matrix) +{ + CAST_UNOP_ARG (const octave_float_complex_matrix&); + + if (v.ndims () > 2) + { + error ("complex-conjugate transpose not defined for N-d objects"); + return octave_value (); + } + else + return octave_value (v.float_complex_matrix_value().hermitian ()); +} + +DEFNCUNOP_METHOD (incr, float_complex_matrix, increment) +DEFNCUNOP_METHOD (decr, float_complex_matrix, decrement) + +// complex matrix by complex matrix ops. + +DEFNDBINOP_OP (add, float_complex_matrix, float_complex_matrix, + float_complex_array, float_complex_array, +) +DEFNDBINOP_OP (sub, float_complex_matrix, float_complex_matrix, + float_complex_array, float_complex_array, -) + +DEFBINOP_OP (mul, float_complex_matrix, float_complex_matrix, *) + +DEFBINOP (div, float_complex_matrix, float_complex_matrix) +{ + CAST_BINOP_ARGS (const octave_float_complex_matrix&, + const octave_float_complex_matrix&); + MatrixType typ = v2.matrix_type (); + + FloatComplexMatrix ret = xdiv (v1.float_complex_matrix_value (), + v2.float_complex_matrix_value (), typ); + + v2.matrix_type (typ); + return ret; +} + +DEFBINOPX (pow, float_complex_matrix, float_complex_matrix) +{ + error ("can't do A ^ B for A and B both matrices"); + return octave_value (); +} + +DEFBINOP (ldiv, float_complex_matrix, float_complex_matrix) +{ + CAST_BINOP_ARGS (const octave_float_complex_matrix&, + const octave_float_complex_matrix&); + MatrixType typ = v1.matrix_type (); + + FloatComplexMatrix ret = xleftdiv (v1.float_complex_matrix_value (), + v2.float_complex_matrix_value (), typ); + + v1.matrix_type (typ); + return ret; +} + +DEFNDBINOP_FN (lt, float_complex_matrix, float_complex_matrix, + float_complex_array, float_complex_array, mx_el_lt) +DEFNDBINOP_FN (le, float_complex_matrix, float_complex_matrix, + float_complex_array, float_complex_array, mx_el_le) +DEFNDBINOP_FN (eq, float_complex_matrix, float_complex_matrix, + float_complex_array, float_complex_array, mx_el_eq) +DEFNDBINOP_FN (ge, float_complex_matrix, float_complex_matrix, + float_complex_array, float_complex_array, mx_el_ge) +DEFNDBINOP_FN (gt, float_complex_matrix, float_complex_matrix, + float_complex_array, float_complex_array, mx_el_gt) +DEFNDBINOP_FN (ne, float_complex_matrix, float_complex_matrix, + float_complex_array, float_complex_array, mx_el_ne) + +DEFNDBINOP_FN (el_mul, float_complex_matrix, float_complex_matrix, + float_complex_array, float_complex_array, product) +DEFNDBINOP_FN (el_div, float_complex_matrix, float_complex_matrix, + float_complex_array, float_complex_array, quotient) +DEFNDBINOP_FN (el_pow, float_complex_matrix, float_complex_matrix, + float_complex_array, float_complex_array, elem_xpow) + +DEFBINOP (el_ldiv, float_complex_matrix, float_complex_matrix) +{ + CAST_BINOP_ARGS (const octave_float_complex_matrix&, + const octave_float_complex_matrix&); + + return octave_value (quotient (v2.float_complex_array_value (), v1.float_complex_array_value ())); +} + +DEFNDBINOP_FN (el_and, float_complex_matrix, float_complex_matrix, + float_complex_array, float_complex_array, mx_el_and) +DEFNDBINOP_FN (el_or, float_complex_matrix, float_complex_matrix, + float_complex_array, float_complex_array, mx_el_or) + +DEFNDCATOP_FN (fcm_fcm, float_complex_matrix, float_complex_matrix, + float_complex_array, float_complex_array, concat) + +DEFNDASSIGNOP_FN (assign, float_complex_matrix, float_complex_matrix, + float_complex_array, assign) +DEFNDASSIGNOP_FN (dbl_assign, complex_matrix, float_complex_matrix, + complex_array, assign) + +CONVDECL (float_complex_matrix_to_complex_matrix) +{ + CAST_CONV_ARG (const octave_float_complex_matrix&); + + return new octave_complex_matrix (ComplexNDArray (v.float_complex_array_value ())); +} + +void +install_fcm_fcm_ops (void) +{ + INSTALL_UNOP (op_not, octave_float_complex_matrix, not); + INSTALL_UNOP (op_uplus, octave_float_complex_matrix, uplus); + INSTALL_UNOP (op_uminus, octave_float_complex_matrix, uminus); + INSTALL_UNOP (op_transpose, octave_float_complex_matrix, transpose); + INSTALL_UNOP (op_hermitian, octave_float_complex_matrix, hermitian); + + INSTALL_NCUNOP (op_incr, octave_float_complex_matrix, incr); + INSTALL_NCUNOP (op_decr, octave_float_complex_matrix, decr); + + INSTALL_BINOP (op_add, octave_float_complex_matrix, + octave_float_complex_matrix, add); + INSTALL_BINOP (op_sub, octave_float_complex_matrix, + octave_float_complex_matrix, sub); + INSTALL_BINOP (op_mul, octave_float_complex_matrix, + octave_float_complex_matrix, mul); + INSTALL_BINOP (op_div, octave_float_complex_matrix, + octave_float_complex_matrix, div); + INSTALL_BINOP (op_pow, octave_float_complex_matrix, + octave_float_complex_matrix, pow); + INSTALL_BINOP (op_ldiv, octave_float_complex_matrix, + octave_float_complex_matrix, ldiv); + INSTALL_BINOP (op_lt, octave_float_complex_matrix, + octave_float_complex_matrix, lt); + INSTALL_BINOP (op_le, octave_float_complex_matrix, + octave_float_complex_matrix, le); + INSTALL_BINOP (op_eq, octave_float_complex_matrix, + octave_float_complex_matrix, eq); + INSTALL_BINOP (op_ge, octave_float_complex_matrix, + octave_float_complex_matrix, ge); + INSTALL_BINOP (op_gt, octave_float_complex_matrix, + octave_float_complex_matrix, gt); + INSTALL_BINOP (op_ne, octave_float_complex_matrix, + octave_float_complex_matrix, ne); + INSTALL_BINOP (op_el_mul, octave_float_complex_matrix, + octave_float_complex_matrix, el_mul); + INSTALL_BINOP (op_el_div, octave_float_complex_matrix, + octave_float_complex_matrix, el_div); + INSTALL_BINOP (op_el_pow, octave_float_complex_matrix, + octave_float_complex_matrix, el_pow); + INSTALL_BINOP (op_el_ldiv, octave_float_complex_matrix, + octave_float_complex_matrix, el_ldiv); + INSTALL_BINOP (op_el_and, octave_float_complex_matrix, + octave_float_complex_matrix, el_and); + INSTALL_BINOP (op_el_or, octave_float_complex_matrix, + octave_float_complex_matrix, el_or); + + INSTALL_CATOP (octave_float_complex_matrix, + octave_float_complex_matrix, fcm_fcm); + + INSTALL_ASSIGNOP (op_asn_eq, octave_float_complex_matrix, + octave_float_complex_matrix, assign); + INSTALL_ASSIGNOP (op_asn_eq, octave_float_complex_matrix, + octave_complex_matrix, dbl_assign); + + INSTALL_CONVOP (octave_float_complex_matrix, octave_complex_matrix, + float_complex_matrix_to_complex_matrix); +} + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 src/OPERATORS/op-fcm-fcs.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/OPERATORS/op-fcm-fcs.cc Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,176 @@ +/* + +Copyright (C) 1996, 1997, 2000, 2001, 2002, 2003, 2004, 2005, 2006, + 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "gripes.h" +#include "oct-obj.h" +#include "ov.h" +#include "ov-cx-mat.h" +#include "ov-flt-cx-mat.h" +#include "ov-flt-complex.h" +#include "ov-typeinfo.h" +#include "ops.h" +#include "xdiv.h" +#include "xpow.h" + +// complex matrix by complex scalar ops. + +DEFNDBINOP_OP (add, float_complex_matrix, float_complex, + float_complex_array, float_complex, +) +DEFNDBINOP_OP (sub, float_complex_matrix, float_complex, + float_complex_array, float_complex, -) +DEFNDBINOP_OP (mul, float_complex_matrix, float_complex, + float_complex_array, float_complex, *) + +DEFBINOP (div, float_complex_matrix, float_complex) +{ + CAST_BINOP_ARGS (const octave_float_complex_matrix&, + const octave_float_complex&); + + FloatComplex d = v2.float_complex_value (); + + if (d == static_cast(0.0)) + gripe_divide_by_zero (); + + return octave_value (v1.float_complex_array_value () / d); +} + +DEFBINOP_FN (pow, float_complex_matrix, float_complex, xpow) + +DEFBINOP (ldiv, float_complex_matrix, float_complex) +{ + CAST_BINOP_ARGS (const octave_float_complex_matrix&, + const octave_float_complex&); + + FloatComplexMatrix m1 = v1.float_complex_matrix_value (); + FloatComplexMatrix m2 = v2.float_complex_matrix_value (); + MatrixType typ = v1.matrix_type (); + + FloatComplexMatrix ret = xleftdiv (m1, m2, typ); + v1.matrix_type (typ); + return ret; +} + +DEFNDBINOP_FN (lt, float_complex_matrix, float_complex, + float_complex_array, float_complex, mx_el_lt) +DEFNDBINOP_FN (le, float_complex_matrix, float_complex, + float_complex_array, float_complex, mx_el_le) +DEFNDBINOP_FN (eq, float_complex_matrix, float_complex, + float_complex_array, float_complex, mx_el_eq) +DEFNDBINOP_FN (ge, float_complex_matrix, float_complex, + float_complex_array, float_complex, mx_el_ge) +DEFNDBINOP_FN (gt, float_complex_matrix, float_complex, + float_complex_array, float_complex, mx_el_gt) +DEFNDBINOP_FN (ne, float_complex_matrix, float_complex, + float_complex_array, float_complex, mx_el_ne) + +DEFNDBINOP_OP (el_mul, float_complex_matrix, float_complex, + float_complex_array, float_complex, *) + +DEFBINOP (el_div, float_complex_matrix, float_complex) +{ + CAST_BINOP_ARGS (const octave_float_complex_matrix&, + const octave_float_complex&); + + FloatComplex d = v2.float_complex_value (); + + if (d == static_cast(0.0)) + gripe_divide_by_zero (); + + return octave_value (v1.float_complex_array_value () / d); +} + +DEFNDBINOP_FN (el_pow, float_complex_matrix, float_complex, + float_complex_array, float_complex, elem_xpow) + +DEFBINOP (el_ldiv, float_complex_matrix, float_complex) +{ + CAST_BINOP_ARGS (const octave_float_complex_matrix&, + const octave_float_complex&); + + return x_el_div (v2.float_complex_value (), v1.float_complex_array_value ()); +} + +DEFNDBINOP_FN (el_and, float_complex_matrix, float_complex, + float_complex_array, float_complex, mx_el_and) +DEFNDBINOP_FN (el_or, float_complex_matrix, float_complex, + float_complex_array, float_complex, mx_el_or) + +DEFNDCATOP_FN (fcm_fcs, float_complex_matrix, float_complex, + float_complex_array, float_complex_array, concat) + +DEFNDASSIGNOP_FN (assign, float_complex_matrix, float_complex, + float_complex_array, assign) +DEFNDASSIGNOP_FN (dbl_assign, complex_matrix, float_complex, + complex_array, assign) + +void +install_fcm_fcs_ops (void) +{ + INSTALL_BINOP (op_add, octave_float_complex_matrix, + octave_float_complex, add); + INSTALL_BINOP (op_sub, octave_float_complex_matrix, + octave_float_complex, sub); + INSTALL_BINOP (op_mul, octave_float_complex_matrix, + octave_float_complex, mul); + INSTALL_BINOP (op_div, octave_float_complex_matrix, + octave_float_complex, div); + INSTALL_BINOP (op_pow, octave_float_complex_matrix, + octave_float_complex, pow); + INSTALL_BINOP (op_ldiv, octave_float_complex_matrix, + octave_float_complex, ldiv); + INSTALL_BINOP (op_lt, octave_float_complex_matrix, octave_float_complex, lt); + INSTALL_BINOP (op_le, octave_float_complex_matrix, octave_float_complex, le); + INSTALL_BINOP (op_eq, octave_float_complex_matrix, octave_float_complex, eq); + INSTALL_BINOP (op_ge, octave_float_complex_matrix, octave_float_complex, ge); + INSTALL_BINOP (op_gt, octave_float_complex_matrix, octave_float_complex, gt); + INSTALL_BINOP (op_ne, octave_float_complex_matrix, octave_float_complex, ne); + INSTALL_BINOP (op_el_mul, octave_float_complex_matrix, + octave_float_complex, el_mul); + INSTALL_BINOP (op_el_div, octave_float_complex_matrix, + octave_float_complex, el_div); + INSTALL_BINOP (op_el_pow, octave_float_complex_matrix, + octave_float_complex, el_pow); + INSTALL_BINOP (op_el_ldiv, octave_float_complex_matrix, + octave_float_complex, el_ldiv); + INSTALL_BINOP (op_el_and, octave_float_complex_matrix, + octave_float_complex, el_and); + INSTALL_BINOP (op_el_or, octave_float_complex_matrix, + octave_float_complex, el_or); + + INSTALL_CATOP (octave_float_complex_matrix, octave_float_complex, fcm_fcs); + + INSTALL_ASSIGNOP (op_asn_eq, octave_float_complex_matrix, + octave_float_complex, assign); + INSTALL_ASSIGNOP (op_asn_eq, octave_complex_matrix, + octave_float_complex, dbl_assign); +} + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 src/OPERATORS/op-fcm-fm.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/OPERATORS/op-fcm-fm.cc Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,166 @@ +/* + +Copyright (C) 1996, 1997, 2000, 2001, 2002, 2003, 2004, 2005, 2006, + 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "mx-fcm-fm.h" +#include "mx-fm-fcm.h" +#include "mx-fcnda-fnda.h" +#include "mx-fnda-fcnda.h" + +#include "gripes.h" +#include "oct-obj.h" +#include "ov.h" +#include "ov-cx-mat.h" +#include "ov-flt-cx-mat.h" +#include "ov-flt-re-mat.h" +#include "ov-typeinfo.h" +#include "ops.h" +#include "xdiv.h" +#include "xpow.h" + +// complex matrix by matrix ops. + +DEFNDBINOP_OP (add, float_complex_matrix, float_matrix, float_complex_array, float_array, +) +DEFNDBINOP_OP (sub, float_complex_matrix, float_matrix, float_complex_array, float_array, -) + +DEFBINOP_OP (mul, float_complex_matrix, float_matrix, *) + +DEFBINOP (div, float_complex_matrix, float_matrix) +{ + CAST_BINOP_ARGS (const octave_float_complex_matrix&, + const octave_float_matrix&); + MatrixType typ = v2.matrix_type (); + + FloatComplexMatrix ret = xdiv (v1.float_complex_matrix_value (), + v2.float_matrix_value (), typ); + + v2.matrix_type (typ); + return ret; +} + + +DEFBINOPX (pow, float_complex_matrix, float_matrix) +{ + error ("can't do A ^ B for A and B both matrices"); + return octave_value (); +} + +DEFBINOP (ldiv, float_complex_matrix, float_matrix) +{ + CAST_BINOP_ARGS (const octave_float_complex_matrix&, + const octave_float_matrix&); + MatrixType typ = v1.matrix_type (); + + FloatComplexMatrix ret = xleftdiv (v1.float_complex_matrix_value (), + v2.float_matrix_value (), typ); + + v1.matrix_type (typ); + return ret; +} + +DEFNDBINOP_FN (lt, float_complex_matrix, float_matrix, + float_complex_array, float_array, mx_el_lt) +DEFNDBINOP_FN (le, float_complex_matrix, float_matrix, + float_complex_array, float_array, mx_el_le) +DEFNDBINOP_FN (eq, float_complex_matrix, float_matrix, + float_complex_array, float_array, mx_el_eq) +DEFNDBINOP_FN (ge, float_complex_matrix, float_matrix, + float_complex_array, float_array, mx_el_ge) +DEFNDBINOP_FN (gt, float_complex_matrix, float_matrix, + float_complex_array, float_array, mx_el_gt) +DEFNDBINOP_FN (ne, float_complex_matrix, float_matrix, + float_complex_array, float_array, mx_el_ne) + +DEFNDBINOP_FN (el_mul, float_complex_matrix, float_matrix, + float_complex_array, float_array, product) +DEFNDBINOP_FN (el_div, float_complex_matrix, float_matrix, + float_complex_array, float_array, quotient) +DEFNDBINOP_FN (el_pow, float_complex_matrix, float_matrix, + float_complex_array, float_array, elem_xpow) + +DEFBINOP (el_ldiv, float_complex_matrix, float_matrix) +{ + CAST_BINOP_ARGS (const octave_float_complex_matrix&, + const octave_float_matrix&); + + return quotient (v2.float_array_value (), v1.float_complex_array_value ()); +} + +DEFNDBINOP_FN (el_and, float_complex_matrix, float_matrix, + float_complex_array, float_array, mx_el_and) +DEFNDBINOP_FN (el_or, float_complex_matrix, float_matrix, + float_complex_array, float_array, mx_el_or) + +DEFNDCATOP_FN (fcm_fm, float_complex_matrix, float_matrix, + float_complex_array, float_array, concat) + +DEFNDASSIGNOP_FN (assign, float_complex_matrix, float_matrix, + float_complex_array, assign) +DEFNDASSIGNOP_FN (dbl_assign, complex_matrix, float_matrix, + complex_array, assign) + +void +install_fcm_fm_ops (void) +{ + INSTALL_BINOP (op_add, octave_float_complex_matrix, octave_float_matrix, add); + INSTALL_BINOP (op_sub, octave_float_complex_matrix, octave_float_matrix, sub); + INSTALL_BINOP (op_mul, octave_float_complex_matrix, octave_float_matrix, mul); + INSTALL_BINOP (op_div, octave_float_complex_matrix, octave_float_matrix, div); + INSTALL_BINOP (op_pow, octave_float_complex_matrix, octave_float_matrix, pow); + INSTALL_BINOP (op_ldiv, octave_float_complex_matrix, + octave_float_matrix, ldiv); + INSTALL_BINOP (op_lt, octave_float_complex_matrix, octave_float_matrix, lt); + INSTALL_BINOP (op_le, octave_float_complex_matrix, octave_float_matrix, le); + INSTALL_BINOP (op_eq, octave_float_complex_matrix, octave_float_matrix, eq); + INSTALL_BINOP (op_ge, octave_float_complex_matrix, octave_float_matrix, ge); + INSTALL_BINOP (op_gt, octave_float_complex_matrix, octave_float_matrix, gt); + INSTALL_BINOP (op_ne, octave_float_complex_matrix, octave_float_matrix, ne); + INSTALL_BINOP (op_el_mul, octave_float_complex_matrix, + octave_float_matrix, el_mul); + INSTALL_BINOP (op_el_div, octave_float_complex_matrix, + octave_float_matrix, el_div); + INSTALL_BINOP (op_el_pow, octave_float_complex_matrix, + octave_float_matrix, el_pow); + INSTALL_BINOP (op_el_ldiv, octave_float_complex_matrix, + octave_float_matrix, el_ldiv); + INSTALL_BINOP (op_el_and, octave_float_complex_matrix, + octave_float_matrix, el_and); + INSTALL_BINOP (op_el_or, octave_float_complex_matrix, + octave_float_matrix, el_or); + + INSTALL_CATOP (octave_float_complex_matrix, octave_float_matrix, fcm_fm); + + INSTALL_ASSIGNOP (op_asn_eq, octave_float_complex_matrix, + octave_float_matrix, assign); + INSTALL_ASSIGNOP (op_asn_eq, octave_complex_matrix, + octave_float_matrix, dbl_assign); +} + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 src/OPERATORS/op-fcm-fs.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/OPERATORS/op-fcm-fs.cc Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,160 @@ +/* + +Copyright (C) 1996, 1997, 2000, 2001, 2002, 2003, 2004, 2005, 2006, + 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "mx-cm-s.h" +#include "mx-cnda-s.h" + +#include "gripes.h" +#include "oct-obj.h" +#include "ov.h" +#include "ov-cx-mat.h" +#include "ov-flt-cx-mat.h" +#include "ov-flt-re-mat.h" +#include "ov-float.h" +#include "ov-typeinfo.h" +#include "ops.h" +#include "xdiv.h" +#include "xpow.h" + +// complex matrix by scalar ops. + +DEFNDBINOP_OP (add, float_complex_matrix, float_scalar, float_complex_array, float_scalar, +) +DEFNDBINOP_OP (sub, float_complex_matrix, float_scalar, float_complex_array, float_scalar, -) +DEFNDBINOP_OP (mul, float_complex_matrix, float_scalar, float_complex_array, float_scalar, *) + +DEFBINOP (div, float_complex_matrix, float) +{ + CAST_BINOP_ARGS (const octave_float_complex_matrix&, const octave_float_scalar&); + + float d = v2.float_value (); + + if (d == 0.0) + gripe_divide_by_zero (); + + return octave_value (v1.float_complex_array_value () / d); +} + +DEFBINOP_FN (pow, float_complex_matrix, float_scalar, xpow) + +DEFBINOP (ldiv, float_complex_matrix, float) +{ + CAST_BINOP_ARGS (const octave_float_complex_matrix&, const octave_float_scalar&); + + FloatComplexMatrix m1 = v1.float_complex_matrix_value (); + FloatMatrix m2 = v2.float_matrix_value (); + MatrixType typ = v1.matrix_type (); + + FloatComplexMatrix ret = xleftdiv (m1, m2, typ); + + v1.matrix_type (typ); + return ret; +} + +DEFNDBINOP_FN (lt, float_complex_matrix, float_scalar, float_complex_array, + float_scalar, mx_el_lt) +DEFNDBINOP_FN (le, float_complex_matrix, float_scalar, float_complex_array, + float_scalar, mx_el_le) +DEFNDBINOP_FN (eq, float_complex_matrix, float_scalar, float_complex_array, + float_scalar, mx_el_eq) +DEFNDBINOP_FN (ge, float_complex_matrix, float_scalar, float_complex_array, + float_scalar, mx_el_ge) +DEFNDBINOP_FN (gt, float_complex_matrix, float_scalar, float_complex_array, + float_scalar, mx_el_gt) +DEFNDBINOP_FN (ne, float_complex_matrix, float_scalar, float_complex_array, + float_scalar, mx_el_ne) + +DEFNDBINOP_OP (el_mul, float_complex_matrix, float_scalar, float_complex_array, + float_scalar, *) + +DEFBINOP (el_div, float_complex_matrix, float) +{ + CAST_BINOP_ARGS (const octave_float_complex_matrix&, const octave_float_scalar&); + + float d = v2.float_value (); + + if (d == 0.0) + gripe_divide_by_zero (); + + return octave_value (v1.float_complex_array_value () / d); +} + +DEFNDBINOP_FN (el_pow, float_complex_matrix, float_scalar, float_complex_array, + float_scalar, elem_xpow) + +DEFBINOP (el_ldiv, float_complex_matrix, float) +{ + CAST_BINOP_ARGS (const octave_float_complex_matrix&, const octave_float_scalar&); + + return x_el_div (v2.float_value (), v1.float_complex_array_value ()); +} + +DEFNDBINOP_FN (el_and, float_complex_matrix, float_scalar, float_complex_array, + float_scalar, mx_el_and) +DEFNDBINOP_FN (el_or, float_complex_matrix, float_scalar, float_complex_array, + float_scalar, mx_el_or) + +DEFNDCATOP_FN (fcm_fs, float_complex_matrix, float_scalar, float_complex_array, + float_array, concat) + +DEFNDASSIGNOP_FN (assign, float_complex_matrix, float_scalar, float_complex_array, assign) +DEFNDASSIGNOP_FN (dbl_assign, complex_matrix, float_scalar, complex_array, assign) + +void +install_fcm_fs_ops (void) +{ + INSTALL_BINOP (op_add, octave_float_complex_matrix, octave_float_scalar, add); + INSTALL_BINOP (op_sub, octave_float_complex_matrix, octave_float_scalar, sub); + INSTALL_BINOP (op_mul, octave_float_complex_matrix, octave_float_scalar, mul); + INSTALL_BINOP (op_div, octave_float_complex_matrix, octave_float_scalar, div); + INSTALL_BINOP (op_pow, octave_float_complex_matrix, octave_float_scalar, pow); + INSTALL_BINOP (op_ldiv, octave_float_complex_matrix, octave_float_scalar, ldiv); + INSTALL_BINOP (op_lt, octave_float_complex_matrix, octave_float_scalar, lt); + INSTALL_BINOP (op_le, octave_float_complex_matrix, octave_float_scalar, le); + INSTALL_BINOP (op_eq, octave_float_complex_matrix, octave_float_scalar, eq); + INSTALL_BINOP (op_ge, octave_float_complex_matrix, octave_float_scalar, ge); + INSTALL_BINOP (op_gt, octave_float_complex_matrix, octave_float_scalar, gt); + INSTALL_BINOP (op_ne, octave_float_complex_matrix, octave_float_scalar, ne); + INSTALL_BINOP (op_el_mul, octave_float_complex_matrix, octave_float_scalar, el_mul); + INSTALL_BINOP (op_el_div, octave_float_complex_matrix, octave_float_scalar, el_div); + INSTALL_BINOP (op_el_pow, octave_float_complex_matrix, octave_float_scalar, el_pow); + INSTALL_BINOP (op_el_ldiv, octave_float_complex_matrix, octave_float_scalar, el_ldiv); + INSTALL_BINOP (op_el_and, octave_float_complex_matrix, octave_float_scalar, el_and); + INSTALL_BINOP (op_el_or, octave_float_complex_matrix, octave_float_scalar, el_or); + + INSTALL_CATOP (octave_float_complex_matrix, octave_float_scalar, fcm_fs); + + INSTALL_ASSIGNOP (op_asn_eq, octave_float_complex_matrix, + octave_float_scalar, assign); + INSTALL_ASSIGNOP (op_asn_eq, octave_complex_matrix, + octave_float_scalar, dbl_assign); +} + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 src/OPERATORS/op-fcs-fcm.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/OPERATORS/op-fcs-fcm.cc Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,153 @@ +/* + +Copyright (C) 1996, 1997, 2000, 2001, 2002, 2003, 2004, 2005, 2006, + 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "gripes.h" +#include "oct-obj.h" +#include "ov.h" +#include "ov-complex.h" +#include "ov-flt-complex.h" +#include "ov-cx-mat.h" +#include "ov-flt-cx-mat.h" +#include "ov-typeinfo.h" +#include "ops.h" +#include "xdiv.h" +#include "xpow.h" + +// complex scalar by complex matrix ops. + +DEFNDBINOP_OP (add, float_complex, float_complex_matrix, float_complex, float_complex_array, +) +DEFNDBINOP_OP (sub, float_complex, float_complex_matrix, float_complex, float_complex_array, -) +DEFNDBINOP_OP (mul, float_complex, float_complex_matrix, float_complex, float_complex_array, *) + +DEFBINOP (div, float_complex, float_complex_matrix) +{ + CAST_BINOP_ARGS (const octave_float_complex&, const octave_float_complex_matrix&); + + FloatComplexMatrix m1 = v1.float_complex_matrix_value (); + FloatComplexMatrix m2 = v2.float_complex_matrix_value (); + MatrixType typ = v2.matrix_type (); + + FloatComplexMatrix ret = xdiv (m1, m2, typ); + + v2.matrix_type (typ); + return ret; +} + +DEFBINOP_FN (pow, float_complex, float_complex_matrix, xpow) + +DEFBINOP (ldiv, float_complex, float_complex_matrix) +{ + CAST_BINOP_ARGS (const octave_float_complex&, const octave_float_complex_matrix&); + + FloatComplex d = v1.float_complex_value (); + + if (d == static_cast(0.0)) + gripe_divide_by_zero (); + + return octave_value (v2.float_complex_array_value () / d); +} + +DEFNDBINOP_FN (lt, float_complex, float_complex_matrix, float_complex, + float_complex_array, mx_el_lt) +DEFNDBINOP_FN (le, float_complex, float_complex_matrix, float_complex, + float_complex_array, mx_el_le) +DEFNDBINOP_FN (eq, float_complex, float_complex_matrix, float_complex, + float_complex_array, mx_el_eq) +DEFNDBINOP_FN (ge, float_complex, float_complex_matrix, float_complex, + float_complex_array, mx_el_ge) +DEFNDBINOP_FN (gt, float_complex, float_complex_matrix, float_complex, + float_complex_array, mx_el_gt) +DEFNDBINOP_FN (ne, float_complex, float_complex_matrix, float_complex, + float_complex_array, mx_el_ne) + +DEFNDBINOP_OP (el_mul, float_complex, float_complex_matrix, float_complex, + float_complex_array, *) +DEFNDBINOP_FN (el_div, float_complex, float_complex_matrix, float_complex, + float_complex_array, x_el_div) +DEFNDBINOP_FN (el_pow, float_complex, float_complex_matrix, float_complex, + float_complex_array, elem_xpow) + +DEFBINOP (el_ldiv, float_complex, float_complex_matrix) +{ + CAST_BINOP_ARGS (const octave_float_complex&, const octave_float_complex_matrix&); + + FloatComplex d = v1.float_complex_value (); + + if (d == static_cast(0.0)) + gripe_divide_by_zero (); + + return octave_value (v2.float_complex_array_value () / d); +} + +DEFNDBINOP_FN (el_and, float_complex, float_complex_matrix, float_complex, float_complex_array, mx_el_and) +DEFNDBINOP_FN (el_or, float_complex, float_complex_matrix, float_complex, float_complex_array, mx_el_or) + +DEFNDCATOP_FN (fcs_fcm, float_complex, float_complex_matrix, float_complex_array, float_complex_array, concat) + +DEFCONV (float_complex_matrix_conv, float_complex, float_complex_matrix) +{ + CAST_CONV_ARG (const octave_float_complex&); + + return new octave_float_complex_matrix (v.float_complex_matrix_value ()); +} + +void +install_fcs_fcm_ops (void) +{ + INSTALL_BINOP (op_add, octave_float_complex, octave_float_complex_matrix, add); + INSTALL_BINOP (op_sub, octave_float_complex, octave_float_complex_matrix, sub); + INSTALL_BINOP (op_mul, octave_float_complex, octave_float_complex_matrix, mul); + INSTALL_BINOP (op_div, octave_float_complex, octave_float_complex_matrix, div); + INSTALL_BINOP (op_pow, octave_float_complex, octave_float_complex_matrix, pow); + INSTALL_BINOP (op_ldiv, octave_float_complex, octave_float_complex_matrix, ldiv); + INSTALL_BINOP (op_lt, octave_float_complex, octave_float_complex_matrix, lt); + INSTALL_BINOP (op_le, octave_float_complex, octave_float_complex_matrix, le); + INSTALL_BINOP (op_eq, octave_float_complex, octave_float_complex_matrix, eq); + INSTALL_BINOP (op_ge, octave_float_complex, octave_float_complex_matrix, ge); + INSTALL_BINOP (op_gt, octave_float_complex, octave_float_complex_matrix, gt); + INSTALL_BINOP (op_ne, octave_float_complex, octave_float_complex_matrix, ne); + INSTALL_BINOP (op_el_mul, octave_float_complex, octave_float_complex_matrix, el_mul); + INSTALL_BINOP (op_el_div, octave_float_complex, octave_float_complex_matrix, el_div); + INSTALL_BINOP (op_el_pow, octave_float_complex, octave_float_complex_matrix, el_pow); + INSTALL_BINOP (op_el_ldiv, octave_float_complex, octave_float_complex_matrix, el_ldiv); + INSTALL_BINOP (op_el_and, octave_float_complex, octave_float_complex_matrix, el_and); + INSTALL_BINOP (op_el_or, octave_float_complex, octave_float_complex_matrix, el_or); + + INSTALL_CATOP (octave_float_complex, octave_float_complex_matrix, fcs_fcm); + + INSTALL_ASSIGNCONV (octave_float_complex, octave_float_complex_matrix, octave_float_complex_matrix); + + INSTALL_ASSIGNCONV (octave_complex, octave_float_complex_matrix, octave_complex_matrix); + + INSTALL_WIDENOP (octave_float_complex, octave_float_complex_matrix, float_complex_matrix_conv); +} + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 src/OPERATORS/op-fcs-fcs.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/OPERATORS/op-fcs-fcs.cc Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,235 @@ +/* + +Copyright (C) 1996, 1997, 1998, 2000, 2002, 2003, 2004, 2005, 2007 + John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "gripes.h" +#include "oct-obj.h" +#include "ov.h" +#include "ov-complex.h" +#include "ov-flt-complex.h" +#include "ov-flt-cx-mat.h" +#include "ov-typeinfo.h" +#include "ops.h" +#include "xdiv.h" +#include "xpow.h" + +// unary complex scalar ops. + +DEFUNOP (not, float_complex) +{ + CAST_UNOP_ARG (const octave_float_complex&); + + return octave_value (v.float_complex_value () == 0.0); +} + +DEFUNOP_OP (uplus, float_complex, /* no-op */) +DEFUNOP_OP (uminus, float_complex, -) +DEFUNOP_OP (transpose, float_complex, /* no-op */) + +DEFUNOP (hermitian, float_complex) +{ + CAST_UNOP_ARG (const octave_float_complex&); + + return octave_value (conj (v.float_complex_value ())); +} + +DEFNCUNOP_METHOD (incr, float_complex, increment) +DEFNCUNOP_METHOD (decr, float_complex, decrement) + +// complex scalar by complex scalar ops. + +DEFBINOP_OP (add, float_complex, float_complex, +) +DEFBINOP_OP (sub, float_complex, float_complex, -) +DEFBINOP_OP (mul, float_complex, float_complex, *) + +DEFBINOP (div, float_complex, float_complex) +{ + CAST_BINOP_ARGS (const octave_float_complex&, const octave_float_complex&); + + FloatComplex d = v2.float_complex_value (); + + if (d == static_cast(0.0)) + gripe_divide_by_zero (); + + return octave_value (v1.float_complex_value () / d); +} + +DEFBINOP_FN (pow, float_complex, float_complex, xpow) + +DEFBINOP (ldiv, float_complex, float_complex) +{ + CAST_BINOP_ARGS (const octave_float_complex&, const octave_float_complex&); + + FloatComplex d = v1.float_complex_value (); + + if (d == static_cast(0.0)) + gripe_divide_by_zero (); + + return octave_value (v2.float_complex_value () / d); +} + +DEFBINOP (lt, float_complex, float_complex) +{ + CAST_BINOP_ARGS (const octave_float_complex&, const octave_float_complex&); + + return real (v1.float_complex_value ()) < real (v2.float_complex_value ()); +} + +DEFBINOP (le, float_complex, float_complex) +{ + CAST_BINOP_ARGS (const octave_float_complex&, const octave_float_complex&); + + return real (v1.float_complex_value ()) <= real (v2.float_complex_value ()); +} + +DEFBINOP (eq, float_complex, float_complex) +{ + CAST_BINOP_ARGS (const octave_float_complex&, const octave_float_complex&); + + return v1.float_complex_value () == v2.float_complex_value (); +} + +DEFBINOP (ge, float_complex, float_complex) +{ + CAST_BINOP_ARGS (const octave_float_complex&, const octave_float_complex&); + + return real (v1.float_complex_value ()) >= real (v2.float_complex_value ()); +} + +DEFBINOP (gt, float_complex, float_complex) +{ + CAST_BINOP_ARGS (const octave_float_complex&, const octave_float_complex&); + + return real (v1.float_complex_value ()) > real (v2.float_complex_value ()); +} + +DEFBINOP (ne, float_complex, float_complex) +{ + CAST_BINOP_ARGS (const octave_float_complex&, const octave_float_complex&); + + return v1.float_complex_value () != v2.float_complex_value (); +} + +DEFBINOP_OP (el_mul, float_complex, float_complex, *) + +DEFBINOP (el_div, float_complex, float_complex) +{ + CAST_BINOP_ARGS (const octave_float_complex&, const octave_float_complex&); + + FloatComplex d = v2.float_complex_value (); + + if (d == static_cast(0.0)) + gripe_divide_by_zero (); + + return octave_value (v1.float_complex_value () / d); +} + +DEFBINOP_FN (el_pow, float_complex, float_complex, xpow) + +DEFBINOP (el_ldiv, float_complex, float_complex) +{ + CAST_BINOP_ARGS (const octave_float_complex&, const octave_float_complex&); + + FloatComplex d = v1.float_complex_value (); + + if (d == static_cast(0.0)) + gripe_divide_by_zero (); + + return octave_value (v2.float_complex_value () / d); +} + +DEFBINOP (el_and, float_complex, float_complex) +{ + CAST_BINOP_ARGS (const octave_float_complex&, const octave_float_complex&); + + return (v1.float_complex_value () != static_cast(0.0) && + v2.float_complex_value () != static_cast(0.0)); +} + +DEFBINOP (el_or, float_complex, float_complex) +{ + CAST_BINOP_ARGS (const octave_float_complex&, const octave_float_complex&); + + return (v1.float_complex_value () != static_cast(0.0) || + v2.float_complex_value () != static_cast(0.0)); +} + +DEFNDCATOP_FN (fcs_fcs, float_complex, float_complex, float_complex_array, + float_complex_array, concat) + +CONVDECL (float_complex_to_complex) +{ + CAST_CONV_ARG (const octave_float_complex&); + + return new octave_complex_matrix (ComplexMatrix (1, 1, static_cast(v.float_complex_value ()))); +} + +void +install_fcs_fcs_ops (void) +{ + INSTALL_UNOP (op_not, octave_float_complex, not); + INSTALL_UNOP (op_uplus, octave_float_complex, uplus); + INSTALL_UNOP (op_uminus, octave_float_complex, uminus); + INSTALL_UNOP (op_transpose, octave_float_complex, transpose); + INSTALL_UNOP (op_hermitian, octave_float_complex, hermitian); + + INSTALL_NCUNOP (op_incr, octave_float_complex, incr); + INSTALL_NCUNOP (op_decr, octave_float_complex, decr); + + INSTALL_BINOP (op_add, octave_float_complex, octave_float_complex, add); + INSTALL_BINOP (op_sub, octave_float_complex, octave_float_complex, sub); + INSTALL_BINOP (op_mul, octave_float_complex, octave_float_complex, mul); + INSTALL_BINOP (op_div, octave_float_complex, octave_float_complex, div); + INSTALL_BINOP (op_pow, octave_float_complex, octave_float_complex, pow); + INSTALL_BINOP (op_ldiv, octave_float_complex, octave_float_complex, ldiv); + INSTALL_BINOP (op_lt, octave_float_complex, octave_float_complex, lt); + INSTALL_BINOP (op_le, octave_float_complex, octave_float_complex, le); + INSTALL_BINOP (op_eq, octave_float_complex, octave_float_complex, eq); + INSTALL_BINOP (op_ge, octave_float_complex, octave_float_complex, ge); + INSTALL_BINOP (op_gt, octave_float_complex, octave_float_complex, gt); + INSTALL_BINOP (op_ne, octave_float_complex, octave_float_complex, ne); + INSTALL_BINOP (op_el_mul, octave_float_complex, octave_float_complex, el_mul); + INSTALL_BINOP (op_el_div, octave_float_complex, octave_float_complex, el_div); + INSTALL_BINOP (op_el_pow, octave_float_complex, octave_float_complex, el_pow); + INSTALL_BINOP (op_el_ldiv, octave_float_complex, octave_float_complex, el_ldiv); + INSTALL_BINOP (op_el_and, octave_float_complex, octave_float_complex, el_and); + INSTALL_BINOP (op_el_or, octave_float_complex, octave_float_complex, el_or); + + INSTALL_CATOP (octave_float_complex, octave_float_complex, fcs_fcs); + + INSTALL_ASSIGNCONV (octave_float_complex, octave_float_complex, octave_float_complex_matrix); + + INSTALL_ASSIGNCONV (octave_complex, octave_float_complex, octave_complex_matrix); + + INSTALL_CONVOP (octave_float_complex, octave_complex_matrix, + float_complex_to_complex); +} + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 src/OPERATORS/op-fcs-fm.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/OPERATORS/op-fcs-fm.cc Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,154 @@ +/* + +Copyright (C) 1996, 1997, 2000, 2001, 2002, 2003, 2004, 2005, 2006, + 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "mx-cs-nda.h" +#include "mx-nda-cs.h" +#include "mx-cs-nda.h" +#include "mx-nda-cs.h" + +#include "gripes.h" +#include "oct-obj.h" +#include "ov.h" +#include "ov-complex.h" +#include "ov-flt-complex.h" +#include "ov-cx-mat.h" +#include "ov-flt-cx-mat.h" +#include "ov-flt-re-mat.h" +#include "ov-typeinfo.h" +#include "ops.h" +#include "xdiv.h" +#include "xpow.h" + +// complex scalar by matrix ops. + +DEFNDBINOP_OP (add, float_complex, float_matrix, float_complex, float_array, +) +DEFNDBINOP_OP (sub, float_complex, float_matrix, float_complex, float_array, -) +DEFNDBINOP_OP (mul, float_complex, float_matrix, float_complex, float_array, *) + +DEFBINOP (div, float_complex, float_matrix) +{ + CAST_BINOP_ARGS (const octave_float_complex&, const octave_float_matrix&); + + FloatComplexMatrix m1 = v1.float_complex_matrix_value (); + FloatMatrix m2 = v2.float_matrix_value (); + MatrixType typ = v2.matrix_type (); + + FloatComplexMatrix ret = xdiv (m1, m2, typ); + + v2.matrix_type (typ); + return ret; +} + +DEFBINOP_FN (pow, float_complex, float_matrix, xpow) + +DEFBINOP (ldiv, float_complex, float_matrix) +{ + CAST_BINOP_ARGS (const octave_float_complex&, const octave_float_matrix&); + + FloatComplex d = v1.float_complex_value (); + + if (d == static_cast(0.0)) + gripe_divide_by_zero (); + + return octave_value (v2.float_array_value () / d); +} + +DEFNDBINOP_FN (lt, float_complex, float_matrix, float_complex, + float_array, mx_el_lt) +DEFNDBINOP_FN (le, float_complex, float_matrix, float_complex, + float_array, mx_el_le) +DEFNDBINOP_FN (eq, float_complex, float_matrix, float_complex, + float_array, mx_el_eq) +DEFNDBINOP_FN (ge, float_complex, float_matrix, float_complex, + float_array, mx_el_ge) +DEFNDBINOP_FN (gt, float_complex, float_matrix, float_complex, + float_array, mx_el_gt) +DEFNDBINOP_FN (ne, float_complex, float_matrix, float_complex, + float_array, mx_el_ne) + +DEFNDBINOP_OP (el_mul, float_complex, float_matrix, float_complex, + float_array, *) +DEFNDBINOP_FN (el_div, float_complex, float_matrix, float_complex, + float_array, x_el_div) +DEFNDBINOP_FN (el_pow, float_complex, float_matrix, float_complex, + float_array, elem_xpow) + +DEFBINOP (el_ldiv, float_complex, float_matrix) +{ + CAST_BINOP_ARGS (const octave_float_complex&, const octave_float_matrix&); + + FloatComplex d = v1.float_complex_value (); + + if (d == static_cast(0.0)) + gripe_divide_by_zero (); + + return octave_value (v2.float_array_value () / d); +} + +DEFNDBINOP_FN (el_and, float_complex, float_matrix, float_complex, + float_array, mx_el_and) +DEFNDBINOP_FN (el_or, float_complex, float_matrix, float_complex, + float_array, mx_el_or) + +DEFNDCATOP_FN (fcs_fm, float_complex, float_matrix, float_complex_array, + float_array, concat) + +void +install_fcs_fm_ops (void) +{ + INSTALL_BINOP (op_add, octave_float_complex, octave_float_matrix, add); + INSTALL_BINOP (op_sub, octave_float_complex, octave_float_matrix, sub); + INSTALL_BINOP (op_mul, octave_float_complex, octave_float_matrix, mul); + INSTALL_BINOP (op_div, octave_float_complex, octave_float_matrix, div); + INSTALL_BINOP (op_pow, octave_float_complex, octave_float_matrix, pow); + INSTALL_BINOP (op_ldiv, octave_float_complex, octave_float_matrix, ldiv); + INSTALL_BINOP (op_lt, octave_float_complex, octave_float_matrix, lt); + INSTALL_BINOP (op_le, octave_float_complex, octave_float_matrix, le); + INSTALL_BINOP (op_eq, octave_float_complex, octave_float_matrix, eq); + INSTALL_BINOP (op_ge, octave_float_complex, octave_float_matrix, ge); + INSTALL_BINOP (op_gt, octave_float_complex, octave_float_matrix, gt); + INSTALL_BINOP (op_ne, octave_float_complex, octave_float_matrix, ne); + INSTALL_BINOP (op_el_mul, octave_float_complex, octave_float_matrix, el_mul); + INSTALL_BINOP (op_el_div, octave_float_complex, octave_float_matrix, el_div); + INSTALL_BINOP (op_el_pow, octave_float_complex, octave_float_matrix, el_pow); + INSTALL_BINOP (op_el_ldiv, octave_float_complex, octave_float_matrix, el_ldiv); + INSTALL_BINOP (op_el_and, octave_float_complex, octave_float_matrix, el_and); + INSTALL_BINOP (op_el_or, octave_float_complex, octave_float_matrix, el_or); + + INSTALL_CATOP (octave_float_complex, octave_float_matrix, fcs_fm); + + INSTALL_ASSIGNCONV (octave_float_complex, octave_float_matrix, + octave_float_complex_matrix); + INSTALL_ASSIGNCONV (octave_complex, octave_float_matrix, + octave_complex_matrix); +} + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 src/OPERATORS/op-fcs-fs.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/OPERATORS/op-fcs-fs.cc Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,196 @@ +/* + +Copyright (C) 1996, 1997, 2000, 2002, 2003, 2004, 2005, 2007 + John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "gripes.h" +#include "oct-obj.h" +#include "ov.h" +#include "ov-complex.h" +#include "ov-flt-complex.h" +#include "ov-cx-mat.h" +#include "ov-flt-cx-mat.h" +#include "ov-float.h" +#include "ov-typeinfo.h" +#include "ops.h" +#include "xdiv.h" +#include "xpow.h" + +// complex scalar by scalar ops. + +DEFBINOP_OP (add, float_complex, float_scalar, +) +DEFBINOP_OP (sub, float_complex, float_scalar, -) +DEFBINOP_OP (mul, float_complex, float_scalar, *) + +DEFBINOP (div, float_complex, float) +{ + CAST_BINOP_ARGS (const octave_float_complex&, const octave_float_scalar&); + + float d = v2.float_value (); + + if (d == 0.0) + gripe_divide_by_zero (); + + return octave_value (v1.float_complex_value () / d); +} + +DEFBINOP_FN (pow, float_complex, float_scalar, xpow) + +DEFBINOP (ldiv, float_complex, float) +{ + CAST_BINOP_ARGS (const octave_float_complex&, const octave_float_scalar&); + + FloatComplex d = v1.float_complex_value (); + + if (d == static_cast(0.0)) + gripe_divide_by_zero (); + + return octave_value (v2.float_value () / d); +} + +DEFBINOP (lt, float_complex, float) +{ + CAST_BINOP_ARGS (const octave_float_complex&, const octave_float_scalar&); + + return real (v1.float_complex_value ()) < v2.float_value (); +} + +DEFBINOP (le, float_complex, float) +{ + CAST_BINOP_ARGS (const octave_float_complex&, const octave_float_scalar&); + + return real (v1.float_complex_value ()) <= v2.float_value (); +} + +DEFBINOP (eq, float_complex, float) +{ + CAST_BINOP_ARGS (const octave_float_complex&, const octave_float_scalar&); + + return v1.float_complex_value () == v2.float_value (); +} + +DEFBINOP (ge, float_complex, float) +{ + CAST_BINOP_ARGS (const octave_float_complex&, const octave_float_scalar&); + + return real (v1.float_complex_value ()) >= v2.float_value (); +} + +DEFBINOP (gt, float_complex, float) +{ + CAST_BINOP_ARGS (const octave_float_complex&, const octave_float_scalar&); + + return real (v1.float_complex_value ()) > v2.float_value (); +} + +DEFBINOP (ne, float_complex, float) +{ + CAST_BINOP_ARGS (const octave_float_complex&, const octave_float_scalar&); + + return v1.float_complex_value () != v2.float_value (); +} + +DEFBINOP_OP (el_mul, float_complex, float_scalar, *) + +DEFBINOP (el_div, float_complex, float) +{ + CAST_BINOP_ARGS (const octave_float_complex&, const octave_float_scalar&); + + float d = v2.float_value (); + + if (d == 0.0) + gripe_divide_by_zero (); + + return octave_value (v1.float_complex_value () / d); +} + +DEFBINOP_FN (el_pow, float_complex, float_scalar, xpow) + +DEFBINOP (el_ldiv, float_complex, float) +{ + CAST_BINOP_ARGS (const octave_float_complex&, const octave_float_scalar&); + + FloatComplex d = v1.float_complex_value (); + + if (d == static_cast(0.0)) + gripe_divide_by_zero (); + + return octave_value (v2.float_value () / d); +} + +DEFBINOP (el_and, float_complex, float) +{ + CAST_BINOP_ARGS (const octave_float_complex&, const octave_float_scalar&); + + return (v1.float_complex_value () != static_cast(0.0) && + v2.float_value ()); +} + +DEFBINOP (el_or, float_complex, float) +{ + CAST_BINOP_ARGS (const octave_float_complex&, const octave_float_scalar&); + + return (v1.float_complex_value () != static_cast(0.0) || + v2.float_value ()); +} + +DEFNDCATOP_FN (fcs_fs, float_complex, float_scalar, float_complex_array, + float_array, concat) + +void +install_fcs_fs_ops (void) +{ + INSTALL_BINOP (op_add, octave_float_complex, octave_float_scalar, add); + INSTALL_BINOP (op_sub, octave_float_complex, octave_float_scalar, sub); + INSTALL_BINOP (op_mul, octave_float_complex, octave_float_scalar, mul); + INSTALL_BINOP (op_div, octave_float_complex, octave_float_scalar, div); + INSTALL_BINOP (op_pow, octave_float_complex, octave_float_scalar, pow); + INSTALL_BINOP (op_ldiv, octave_float_complex, octave_float_scalar, ldiv); + INSTALL_BINOP (op_lt, octave_float_complex, octave_float_scalar, lt); + INSTALL_BINOP (op_le, octave_float_complex, octave_float_scalar, le); + INSTALL_BINOP (op_eq, octave_float_complex, octave_float_scalar, eq); + INSTALL_BINOP (op_ge, octave_float_complex, octave_float_scalar, ge); + INSTALL_BINOP (op_gt, octave_float_complex, octave_float_scalar, gt); + INSTALL_BINOP (op_ne, octave_float_complex, octave_float_scalar, ne); + INSTALL_BINOP (op_el_mul, octave_float_complex, octave_float_scalar, el_mul); + INSTALL_BINOP (op_el_div, octave_float_complex, octave_float_scalar, el_div); + INSTALL_BINOP (op_el_pow, octave_float_complex, octave_float_scalar, el_pow); + INSTALL_BINOP (op_el_ldiv, octave_float_complex, octave_float_scalar, el_ldiv); + INSTALL_BINOP (op_el_and, octave_float_complex, octave_float_scalar, el_and); + INSTALL_BINOP (op_el_or, octave_float_complex, octave_float_scalar, el_or); + + INSTALL_CATOP (octave_float_complex, octave_float_scalar, fcs_fs); + + INSTALL_ASSIGNCONV (octave_float_complex, octave_float_scalar, + octave_float_complex_matrix); + INSTALL_ASSIGNCONV (octave_complex, octave_float_scalar, + octave_complex_matrix); +} + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 src/OPERATORS/op-fm-fcm.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/OPERATORS/op-fm-fcm.cc Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,173 @@ +/* + +Copyright (C) 1996, 1997, 2000, 2001, 2002, 2003, 2004, 2005, 2006, + 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "mx-fm-fcm.h" +#include "mx-fcm-fm.h" +#include "mx-fnda-fcnda.h" +#include "mx-fcnda-fnda.h" + +#include "gripes.h" +#include "oct-obj.h" +#include "ov.h" +#include "ov-re-mat.h" +#include "ov-flt-re-mat.h" +#include "ov-cx-mat.h" +#include "ov-flt-cx-mat.h" +#include "ov-typeinfo.h" +#include "ops.h" +#include "xdiv.h" +#include "xpow.h" + +// matrix by complex matrix ops. + +DEFNDBINOP_OP (add, float_matrix, float_complex_matrix, float_array, + float_complex_array, +) +DEFNDBINOP_OP (sub, float_matrix, float_complex_matrix, float_array, + float_complex_array, -) + +DEFBINOP_OP (mul, float_matrix, float_complex_matrix, *) + +DEFBINOP (div, float_matrix, float_complex_matrix) +{ + CAST_BINOP_ARGS (const octave_float_matrix&, + const octave_float_complex_matrix&); + MatrixType typ = v2.matrix_type (); + + FloatComplexMatrix ret = xdiv (v1.float_matrix_value (), + v2.float_complex_matrix_value (), typ); + + v2.matrix_type (typ); + return ret; +} + +DEFBINOPX (pow, float_matrix, float_complex_matrix) +{ + error ("can't do A ^ B for A and B both matrices"); + return octave_value (); +} + +DEFBINOP (ldiv, float_matrix, float_complex_matrix) +{ + CAST_BINOP_ARGS (const octave_float_matrix&, + const octave_float_complex_matrix&); + MatrixType typ = v1.matrix_type (); + + FloatComplexMatrix ret = xleftdiv (v1.float_matrix_value (), + v2.float_complex_matrix_value (), typ); + + v1.matrix_type (typ); + return ret; +} + +DEFNDBINOP_FN (lt, float_matrix, float_complex_matrix, float_array, + float_complex_array, mx_el_lt) +DEFNDBINOP_FN (le, float_matrix, float_complex_matrix, float_array, + float_complex_array, mx_el_le) +DEFNDBINOP_FN (eq, float_matrix, float_complex_matrix, float_array, + float_complex_array, mx_el_eq) +DEFNDBINOP_FN (ge, float_matrix, float_complex_matrix, float_array, + float_complex_array, mx_el_ge) +DEFNDBINOP_FN (gt, float_matrix, float_complex_matrix, float_array, + float_complex_array, mx_el_gt) +DEFNDBINOP_FN (ne, float_matrix, float_complex_matrix, float_array, + float_complex_array, mx_el_ne) + +DEFNDBINOP_FN (el_mul, float_matrix, float_complex_matrix, float_array, + float_complex_array, product) +DEFNDBINOP_FN (el_div, float_matrix, float_complex_matrix, float_array, + float_complex_array, quotient) +DEFNDBINOP_FN (el_pow, float_matrix, float_complex_matrix, float_array, + float_complex_array, elem_xpow) + +DEFBINOP (el_ldiv, float_matrix, float_complex_matrix) +{ + CAST_BINOP_ARGS (const octave_float_matrix&, + const octave_float_complex_matrix&); + + return quotient (v2.float_complex_array_value (), v1.float_array_value ()); +} + +DEFNDBINOP_FN (el_and, float_matrix, float_complex_matrix, float_array, + float_complex_array, mx_el_and) +DEFNDBINOP_FN (el_or, float_matrix, float_complex_matrix, float_array, + float_complex_array, mx_el_or) + +DEFNDCATOP_FN (fm_fcm, float_matrix, float_complex_matrix, float_array, + float_complex_array, concat) + +DEFCONV (float_complex_matrix_conv, float_matrix, float_complex_matrix) +{ + CAST_CONV_ARG (const octave_float_matrix&); + + return new octave_float_complex_matrix (FloatComplexNDArray (v.float_array_value ())); +} + +void +install_fm_fcm_ops (void) +{ + INSTALL_BINOP (op_add, octave_float_matrix, octave_float_complex_matrix, add); + INSTALL_BINOP (op_sub, octave_float_matrix, octave_float_complex_matrix, sub); + INSTALL_BINOP (op_mul, octave_float_matrix, octave_float_complex_matrix, mul); + INSTALL_BINOP (op_div, octave_float_matrix, octave_float_complex_matrix, div); + INSTALL_BINOP (op_pow, octave_float_matrix, octave_float_complex_matrix, pow); + INSTALL_BINOP (op_ldiv, octave_float_matrix, + octave_float_complex_matrix, ldiv); + INSTALL_BINOP (op_lt, octave_float_matrix, octave_float_complex_matrix, lt); + INSTALL_BINOP (op_le, octave_float_matrix, octave_float_complex_matrix, le); + INSTALL_BINOP (op_eq, octave_float_matrix, octave_float_complex_matrix, eq); + INSTALL_BINOP (op_ge, octave_float_matrix, octave_float_complex_matrix, ge); + INSTALL_BINOP (op_gt, octave_float_matrix, octave_float_complex_matrix, gt); + INSTALL_BINOP (op_ne, octave_float_matrix, octave_float_complex_matrix, ne); + INSTALL_BINOP (op_el_mul, octave_float_matrix, + octave_float_complex_matrix, el_mul); + INSTALL_BINOP (op_el_div, octave_float_matrix, + octave_float_complex_matrix, el_div); + INSTALL_BINOP (op_el_pow, octave_float_matrix, + octave_float_complex_matrix, el_pow); + INSTALL_BINOP (op_el_ldiv, octave_float_matrix, + octave_float_complex_matrix, el_ldiv); + INSTALL_BINOP (op_el_and, octave_float_matrix, + octave_float_complex_matrix, el_and); + INSTALL_BINOP (op_el_or, octave_float_matrix, + octave_float_complex_matrix, el_or); + + INSTALL_CATOP (octave_float_matrix, octave_float_complex_matrix, fm_fcm); + + INSTALL_ASSIGNCONV (octave_float_matrix, octave_float_complex_matrix, + octave_float_complex_matrix); + INSTALL_ASSIGNCONV (octave_matrix, octave_float_complex_matrix, + octave_complex_matrix); + + INSTALL_WIDENOP (octave_float_matrix, octave_float_complex_matrix, + float_complex_matrix_conv); +} + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 src/OPERATORS/op-fm-fcs.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/OPERATORS/op-fm-fcs.cc Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,160 @@ +/* + +Copyright (C) 1996, 1997, 2000, 2001, 2002, 2003, 2004, 2005, 2006, + 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "mx-fm-fcs.h" +#include "mx-fcs-fm.h" +#include "mx-fnda-fcs.h" +#include "mx-fcs-fnda.h" + +#include "gripes.h" +#include "oct-obj.h" +#include "ov.h" +#include "ov-re-mat.h" +#include "ov-flt-re-mat.h" +#include "ov-cx-mat.h" +#include "ov-flt-cx-mat.h" +#include "ov-flt-complex.h" +#include "ov-typeinfo.h" +#include "ops.h" +#include "xdiv.h" +#include "xpow.h" + +// matrix by complex scalar ops. + +DEFNDBINOP_OP (add, float_matrix, float_complex, float_array, float_complex, +) +DEFNDBINOP_OP (sub, float_matrix, float_complex, float_array, float_complex, -) +DEFNDBINOP_OP (mul, float_matrix, float_complex, float_array, float_complex, *) + +DEFBINOP (div, float_matrix, float_complex) +{ + CAST_BINOP_ARGS (const octave_float_matrix&, const octave_float_complex&); + + FloatComplex d = v2.float_complex_value (); + + if (d == static_cast(0.0)) + gripe_divide_by_zero (); + + return octave_value (v1.float_array_value () / d); +} + +DEFBINOP_FN (pow, float_matrix, float_complex, xpow) + +DEFBINOP (ldiv, float_matrix, float_complex) +{ + CAST_BINOP_ARGS (const octave_float_matrix&, const octave_float_complex&); + + FloatMatrix m1 = v1.float_matrix_value (); + FloatComplexMatrix m2 = v2.float_complex_matrix_value (); + MatrixType typ = v1.matrix_type (); + + FloatComplexMatrix ret = xleftdiv (m1, m2, typ); + + v1.matrix_type (typ); + return ret; +} + +DEFNDBINOP_FN (lt, float_matrix, float_complex, float_array, + float_complex, mx_el_lt) +DEFNDBINOP_FN (le, float_matrix, float_complex, float_array, + float_complex, mx_el_le) +DEFNDBINOP_FN (eq, float_matrix, float_complex, float_array, + float_complex, mx_el_eq) +DEFNDBINOP_FN (ge, float_matrix, float_complex, float_array, + float_complex, mx_el_ge) +DEFNDBINOP_FN (gt, float_matrix, float_complex, float_array, + float_complex, mx_el_gt) +DEFNDBINOP_FN (ne, float_matrix, float_complex, float_array, + float_complex, mx_el_ne) + +DEFNDBINOP_OP (el_mul, float_matrix, float_complex, float_array, + float_complex, *) + +DEFBINOP (el_div, float_matrix, float_complex) +{ + CAST_BINOP_ARGS (const octave_float_matrix&, const octave_float_complex&); + + FloatComplex d = v2.float_complex_value (); + + if (d == static_cast(0.0)) + gripe_divide_by_zero (); + + return octave_value (v1.float_array_value () / d); +} + +DEFNDBINOP_FN (el_pow, float_matrix, float_complex, float_array, + float_complex, elem_xpow) + +DEFBINOP (el_ldiv, float_matrix, flaot_complex) +{ + CAST_BINOP_ARGS (const octave_float_matrix&, const octave_float_complex&); + + return x_el_div (v2.float_complex_value (), v1.float_array_value ()); +} + +DEFNDBINOP_FN (el_and, float_matrix, float_complex, float_array, + float_complex, mx_el_and) +DEFNDBINOP_FN (el_or, float_matrix, float_complex, float_array, + float_complex, mx_el_or) + +DEFNDCATOP_FN (fm_fcs, float_matrix, float_complex, float_array, + float_complex_array, concat) + +void +install_fm_fcs_ops (void) +{ + INSTALL_BINOP (op_add, octave_float_matrix, octave_float_complex, add); + INSTALL_BINOP (op_sub, octave_float_matrix, octave_float_complex, sub); + INSTALL_BINOP (op_mul, octave_float_matrix, octave_float_complex, mul); + INSTALL_BINOP (op_div, octave_float_matrix, octave_float_complex, div); + INSTALL_BINOP (op_pow, octave_float_matrix, octave_float_complex, pow); + INSTALL_BINOP (op_ldiv, octave_float_matrix, octave_float_complex, ldiv); + INSTALL_BINOP (op_lt, octave_float_matrix, octave_float_complex, lt); + INSTALL_BINOP (op_le, octave_float_matrix, octave_float_complex, le); + INSTALL_BINOP (op_eq, octave_float_matrix, octave_float_complex, eq); + INSTALL_BINOP (op_ge, octave_float_matrix, octave_float_complex, ge); + INSTALL_BINOP (op_gt, octave_float_matrix, octave_float_complex, gt); + INSTALL_BINOP (op_ne, octave_float_matrix, octave_float_complex, ne); + INSTALL_BINOP (op_el_mul, octave_float_matrix, octave_float_complex, el_mul); + INSTALL_BINOP (op_el_div, octave_float_matrix, octave_float_complex, el_div); + INSTALL_BINOP (op_el_pow, octave_float_matrix, octave_float_complex, el_pow); + INSTALL_BINOP (op_el_ldiv, octave_float_matrix, octave_float_complex, el_ldiv); + INSTALL_BINOP (op_el_and, octave_float_matrix, octave_float_complex, el_and); + INSTALL_BINOP (op_el_or, octave_float_matrix, octave_float_complex, el_or); + + INSTALL_CATOP (octave_float_matrix, octave_float_complex, fm_fcs); + + INSTALL_ASSIGNCONV (octave_float_matrix, octave_float_complex, + octave_float_complex_matrix); + INSTALL_ASSIGNCONV (octave_matrix, octave_float_complex, + octave_complex_matrix); +} + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 src/OPERATORS/op-fm-fm.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/OPERATORS/op-fm-fm.cc Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,189 @@ +/* + +Copyright (C) 1996, 1997, 1998, 2000, 2001, 2002, 2003, 2004, 2005, + 2006, 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "gripes.h" +#include "oct-obj.h" +#include "ov.h" +#include "ov-re-mat.h" +#include "ov-flt-re-mat.h" +#include "ov-typeinfo.h" +#include "ops.h" +#include "xdiv.h" +#include "xpow.h" + +// matrix unary ops. + +DEFNDUNOP_OP (not, float_matrix, float_array, !) +DEFNDUNOP_OP (uplus, float_matrix, float_array, /* no-op */) +DEFNDUNOP_OP (uminus, float_matrix, float_array, -) + +DEFUNOP (transpose, float_matrix) +{ + CAST_UNOP_ARG (const octave_float_matrix&); + + if (v.ndims () > 2) + { + error ("transpose not defined for N-d objects"); + return octave_value (); + } + else + return octave_value (v.float_matrix_value().transpose ()); +} + +DEFNCUNOP_METHOD (incr, float_matrix, increment) +DEFNCUNOP_METHOD (decr, float_matrix, decrement) + +// matrix by matrix ops. + +DEFNDBINOP_OP (add, float_matrix, float_matrix, float_array, float_array, +) +DEFNDBINOP_OP (sub, float_matrix, float_matrix, float_array, float_array, -) + +DEFBINOP_OP (mul, float_matrix, float_matrix, *) + +DEFBINOP (div, float_matrix, float_matrix) +{ + CAST_BINOP_ARGS (const octave_float_matrix&, const octave_float_matrix&); + MatrixType typ = v2.matrix_type (); + + FloatMatrix ret = xdiv (v1.float_matrix_value (), + v2.float_matrix_value (), typ); + + v2.matrix_type (typ); + return ret; +} + +DEFBINOPX (pow, float_matrix, float_matrix) +{ + error ("can't do A ^ B for A and B both matrices"); + return octave_value (); +} + +DEFBINOP (ldiv, float_matrix, float_matrix) +{ + CAST_BINOP_ARGS (const octave_float_matrix&, const octave_float_matrix&); + MatrixType typ = v1.matrix_type (); + + FloatMatrix ret = xleftdiv (v1.float_matrix_value (), + v2.float_matrix_value (), typ); + + v1.matrix_type (typ); + return ret; +} + +DEFNDBINOP_FN (lt, float_matrix, float_matrix, float_array, + float_array, mx_el_lt) +DEFNDBINOP_FN (le, float_matrix, float_matrix, float_array, + float_array, mx_el_le) +DEFNDBINOP_FN (eq, float_matrix, float_matrix, float_array, + float_array, mx_el_eq) +DEFNDBINOP_FN (ge, float_matrix, float_matrix, float_array, + float_array, mx_el_ge) +DEFNDBINOP_FN (gt, float_matrix, float_matrix, float_array, + float_array, mx_el_gt) +DEFNDBINOP_FN (ne, float_matrix, float_matrix, float_array, + float_array, mx_el_ne) + +DEFNDBINOP_FN (el_mul, float_matrix, float_matrix, float_array, + float_array, product) +DEFNDBINOP_FN (el_div, float_matrix, float_matrix, float_array, + float_array, quotient) +DEFNDBINOP_FN (el_pow, float_matrix, float_matrix, float_array, + float_array, elem_xpow) + +DEFBINOP (el_ldiv, float_matrix, float_matrix) +{ + CAST_BINOP_ARGS (const octave_float_matrix&, const octave_float_matrix&); + + return octave_value (quotient (v2.float_array_value (), + v1.float_array_value ())); +} + +DEFNDBINOP_FN (el_and, float_matrix, float_matrix, float_array, + float_array, mx_el_and) +DEFNDBINOP_FN (el_or, float_matrix, float_matrix, float_array, + float_array, mx_el_or) + +DEFNDCATOP_FN (fm_fm, float_matrix, float_matrix, float_array, + float_array, concat) + +DEFNDASSIGNOP_FN (assign, float_matrix, float_matrix, float_array, assign) + +DEFNDASSIGNOP_FN (dbl_assign, matrix, float_matrix, array, assign) + +CONVDECL (float_matrix_to_matrix) +{ + CAST_CONV_ARG (const octave_float_matrix&); + + return new octave_matrix (v.array_value ()); +} + +void +install_fm_fm_ops (void) +{ + INSTALL_UNOP (op_not, octave_float_matrix, not); + INSTALL_UNOP (op_uplus, octave_float_matrix, uplus); + INSTALL_UNOP (op_uminus, octave_float_matrix, uminus); + INSTALL_UNOP (op_transpose, octave_float_matrix, transpose); + INSTALL_UNOP (op_hermitian, octave_float_matrix, transpose); + + INSTALL_NCUNOP (op_incr, octave_float_matrix, incr); + INSTALL_NCUNOP (op_decr, octave_float_matrix, decr); + + INSTALL_BINOP (op_add, octave_float_matrix, octave_float_matrix, add); + INSTALL_BINOP (op_sub, octave_float_matrix, octave_float_matrix, sub); + INSTALL_BINOP (op_mul, octave_float_matrix, octave_float_matrix, mul); + INSTALL_BINOP (op_div, octave_float_matrix, octave_float_matrix, div); + INSTALL_BINOP (op_pow, octave_float_matrix, octave_float_matrix, pow); + INSTALL_BINOP (op_ldiv, octave_float_matrix, octave_float_matrix, ldiv); + INSTALL_BINOP (op_lt, octave_float_matrix, octave_float_matrix, lt); + INSTALL_BINOP (op_le, octave_float_matrix, octave_float_matrix, le); + INSTALL_BINOP (op_eq, octave_float_matrix, octave_float_matrix, eq); + INSTALL_BINOP (op_ge, octave_float_matrix, octave_float_matrix, ge); + INSTALL_BINOP (op_gt, octave_float_matrix, octave_float_matrix, gt); + INSTALL_BINOP (op_ne, octave_float_matrix, octave_float_matrix, ne); + INSTALL_BINOP (op_el_mul, octave_float_matrix, octave_float_matrix, el_mul); + INSTALL_BINOP (op_el_div, octave_float_matrix, octave_float_matrix, el_div); + INSTALL_BINOP (op_el_pow, octave_float_matrix, octave_float_matrix, el_pow); + INSTALL_BINOP (op_el_ldiv, octave_float_matrix, octave_float_matrix, el_ldiv); + INSTALL_BINOP (op_el_and, octave_float_matrix, octave_float_matrix, el_and); + INSTALL_BINOP (op_el_or, octave_float_matrix, octave_float_matrix, el_or); + + INSTALL_CATOP (octave_float_matrix, octave_float_matrix, fm_fm); + + INSTALL_ASSIGNOP (op_asn_eq, octave_float_matrix, + octave_float_matrix, assign); + INSTALL_ASSIGNOP (op_asn_eq, octave_matrix, + octave_float_matrix, dbl_assign); + + INSTALL_CONVOP (octave_float_matrix, octave_matrix, float_matrix_to_matrix); +} + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 src/OPERATORS/op-fm-fs.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/OPERATORS/op-fm-fs.cc Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,152 @@ +/* + +Copyright (C) 1996, 1997, 2000, 2001, 2002, 2003, 2004, 2005, 2006, + 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "gripes.h" +#include "oct-obj.h" +#include "ov.h" +#include "ov-flt-re-mat.h" +#include "ov-float.h" +#include "ov-typeinfo.h" +#include "ops.h" +#include "xdiv.h" +#include "xpow.h" + +// matrix by scalar ops. + +DEFNDBINOP_OP (add, float_matrix, float_scalar, float_array, float_scalar, +) +DEFNDBINOP_OP (sub, float_matrix, float_scalar, float_array, float_scalar, -) +DEFNDBINOP_OP (mul, float_matrix, float_scalar, float_array, float_scalar, *) + +DEFBINOP (div, float_matrix, float) +{ + CAST_BINOP_ARGS (const octave_float_matrix&, const octave_float_scalar&); + + float d = v2.float_value (); + + if (d == 0.0) + gripe_divide_by_zero (); + + return octave_value (v1.float_array_value () / d); +} + +DEFBINOP_FN (pow, float_matrix, float_scalar, xpow) + +DEFBINOP (ldiv, float_matrix, float) +{ + CAST_BINOP_ARGS (const octave_float_matrix&, const octave_float_scalar&); + + FloatMatrix m1 = v1.float_matrix_value (); + FloatMatrix m2 = v2.float_matrix_value (); + MatrixType typ = v1.matrix_type (); + + FloatMatrix ret = xleftdiv (m1, m2, typ); + + v1.matrix_type (typ); + return ret; +} + +DEFNDBINOP_FN (lt, float_matrix, float_scalar, float_array, + float_scalar, mx_el_lt) +DEFNDBINOP_FN (le, float_matrix, float_scalar, float_array, + float_scalar, mx_el_le) +DEFNDBINOP_FN (eq, float_matrix, float_scalar, float_array, + float_scalar, mx_el_eq) +DEFNDBINOP_FN (ge, float_matrix, float_scalar, float_array, + float_scalar, mx_el_ge) +DEFNDBINOP_FN (gt, float_matrix, float_scalar, float_array, + float_scalar, mx_el_gt) +DEFNDBINOP_FN (ne, float_matrix, float_scalar, float_array, + float_scalar, mx_el_ne) + +DEFNDBINOP_OP (el_mul, float_matrix, float_scalar, float_array, float_scalar, *) + +DEFBINOP (el_div, float_matrix, float) +{ + CAST_BINOP_ARGS (const octave_float_matrix&, const octave_float_scalar&); + + float d = v2.float_value (); + + if (d == 0.0) + gripe_divide_by_zero (); + + return octave_value (v1.float_array_value () / d); +} + +DEFNDBINOP_FN (el_pow, float_matrix, float_scalar, float_array, + float_scalar, elem_xpow) + +DEFBINOP (el_ldiv, float_matrix, float) +{ + CAST_BINOP_ARGS (const octave_float_matrix&, const octave_float_scalar&); + + return x_el_div (v2.float_value (), v1.float_array_value ()); +} + +DEFNDBINOP_FN (el_and, float_matrix, float_scalar, float_array, + float_scalar, mx_el_and) +DEFNDBINOP_FN (el_or, float_matrix, float_scalar, float_array, + float_scalar, mx_el_or) + +DEFNDCATOP_FN (fm_fs, float_matrix, float_scalar, float_array, + float_array, concat) + +DEFNDASSIGNOP_FN (assign, float_matrix, float_scalar, float_array, assign) +DEFNDASSIGNOP_FN (dbl_assign, matrix, float_scalar, array, assign) + +void +install_fm_fs_ops (void) +{ + INSTALL_BINOP (op_add, octave_float_matrix, octave_float_scalar, add); + INSTALL_BINOP (op_sub, octave_float_matrix, octave_float_scalar, sub); + INSTALL_BINOP (op_mul, octave_float_matrix, octave_float_scalar, mul); + INSTALL_BINOP (op_div, octave_float_matrix, octave_float_scalar, div); + INSTALL_BINOP (op_pow, octave_float_matrix, octave_float_scalar, pow); + INSTALL_BINOP (op_ldiv, octave_float_matrix, octave_float_scalar, ldiv); + INSTALL_BINOP (op_lt, octave_float_matrix, octave_float_scalar, lt); + INSTALL_BINOP (op_le, octave_float_matrix, octave_float_scalar, le); + INSTALL_BINOP (op_eq, octave_float_matrix, octave_float_scalar, eq); + INSTALL_BINOP (op_ge, octave_float_matrix, octave_float_scalar, ge); + INSTALL_BINOP (op_gt, octave_float_matrix, octave_float_scalar, gt); + INSTALL_BINOP (op_ne, octave_float_matrix, octave_float_scalar, ne); + INSTALL_BINOP (op_el_mul, octave_float_matrix, octave_float_scalar, el_mul); + INSTALL_BINOP (op_el_div, octave_float_matrix, octave_float_scalar, el_div); + INSTALL_BINOP (op_el_pow, octave_float_matrix, octave_float_scalar, el_pow); + INSTALL_BINOP (op_el_ldiv, octave_float_matrix, octave_float_scalar, el_ldiv); + INSTALL_BINOP (op_el_and, octave_float_matrix, octave_float_scalar, el_and); + INSTALL_BINOP (op_el_or, octave_float_matrix, octave_float_scalar, el_or); + + INSTALL_CATOP (octave_float_matrix, octave_float_scalar, fm_fs); + + INSTALL_ASSIGNOP (op_asn_eq, octave_float_matrix, octave_float_scalar, assign); + INSTALL_ASSIGNOP (op_asn_eq, octave_matrix, octave_float_scalar, dbl_assign); +} + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 src/OPERATORS/op-fs-fcm.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/OPERATORS/op-fs-fcm.cc Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,177 @@ +/* + +Copyright (C) 1996, 1997, 1998, 2000, 2001, 2002, 2003, 2004, 2005, + 2006, 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "mx-fs-fcm.h" +#include "mx-fcm-fs.h" +#include "mx-fs-fcnda.h" +#include "mx-fcnda-fs.h" + +#include "gripes.h" +#include "oct-obj.h" +#include "ov.h" +#include "ov-scalar.h" +#include "ov-float.h" +#include "ov-cx-mat.h" +#include "ov-flt-cx-mat.h" +#include "ov-flt-re-mat.h" +#include "ov-typeinfo.h" +#include "ops.h" +#include "xdiv.h" +#include "xpow.h" + +// scalar by complex matrix ops. + +DEFNDBINOP_OP (add, float_scalar, float_complex_matrix, float_scalar, + float_complex_array, +) +DEFNDBINOP_OP (sub, float_scalar, float_complex_matrix, float_scalar, + float_complex_array, -) +DEFNDBINOP_OP (mul, float_scalar, float_complex_matrix, float_scalar, + float_complex_array, *) + +DEFBINOP (div, float_scalar, float_complex_matrix) +{ + CAST_BINOP_ARGS (const octave_float_scalar&, + const octave_float_complex_matrix&); + + FloatMatrix m1 = v1.float_matrix_value (); + FloatComplexMatrix m2 = v2.float_complex_matrix_value (); + MatrixType typ = v2.matrix_type (); + + FloatComplexMatrix ret = xdiv (m1, m2, typ); + + v2.matrix_type (typ); + return ret; +} + +DEFBINOP_FN (pow, float_scalar, float_complex_matrix, xpow) + +DEFBINOP (ldiv, float_scalar, float_complex_matrix) +{ + CAST_BINOP_ARGS (const octave_float_scalar&, + const octave_float_complex_matrix&); + + float d = v1.float_value (); + + if (d == 0.0) + gripe_divide_by_zero (); + + return octave_value (v2.float_complex_array_value () / d); +} + +DEFNDBINOP_FN (lt, float_scalar, float_complex_matrix, float_scalar, + float_complex_array, mx_el_lt) +DEFNDBINOP_FN (le, float_scalar, float_complex_matrix, float_scalar, + float_complex_array, mx_el_le) +DEFNDBINOP_FN (eq, float_scalar, float_complex_matrix, float_scalar, + float_complex_array, mx_el_eq) +DEFNDBINOP_FN (ge, float_scalar, float_complex_matrix, float_scalar, + float_complex_array, mx_el_ge) +DEFNDBINOP_FN (gt, float_scalar, float_complex_matrix, float_scalar, + float_complex_array, mx_el_gt) +DEFNDBINOP_FN (ne, float_scalar, float_complex_matrix, float_scalar, + float_complex_array, mx_el_ne) + +DEFNDBINOP_OP (el_mul, float_scalar, float_complex_matrix, float_scalar, + float_complex_array, *) +DEFNDBINOP_FN (el_div, float_scalar, float_complex_matrix, float_scalar, + float_complex_array, x_el_div) +DEFNDBINOP_FN (el_pow, float_scalar, float_complex_matrix, float_scalar, + float_complex_array, elem_xpow) + +DEFBINOP (el_ldiv, float_scalar, float_complex_matrix) +{ + CAST_BINOP_ARGS (const octave_float_scalar&, + const octave_float_complex_matrix&); + + float d = v1.float_value (); + + if (d == 0.0) + gripe_divide_by_zero (); + + return octave_value (v2.float_complex_array_value () / d); +} + +DEFNDBINOP_FN (el_and, float_scalar, float_complex_matrix, float_scalar, + float_complex_array, mx_el_and) +DEFNDBINOP_FN (el_or, float_scalar, float_complex_matrix, float_scalar, + float_complex_array, mx_el_or) + +DEFNDCATOP_FN (fs_fcm, float_scalar, float_complex_matrix, float_array, + float_complex_array, concat) + +DEFCONV (float_complex_matrix_conv, float_scalar, float_complex_matrix) +{ + CAST_CONV_ARG (const octave_float_scalar&); + + return new octave_float_complex_matrix (FloatComplexMatrix (v.float_matrix_value ())); +} + +void +install_fs_fcm_ops (void) +{ + INSTALL_BINOP (op_add, octave_float_scalar, octave_float_complex_matrix, add); + INSTALL_BINOP (op_sub, octave_float_scalar, octave_float_complex_matrix, sub); + INSTALL_BINOP (op_mul, octave_float_scalar, octave_float_complex_matrix, mul); + INSTALL_BINOP (op_div, octave_float_scalar, octave_float_complex_matrix, div); + INSTALL_BINOP (op_pow, octave_float_scalar, octave_float_complex_matrix, pow); + INSTALL_BINOP (op_ldiv, octave_float_scalar, + octave_float_complex_matrix, ldiv); + INSTALL_BINOP (op_lt, octave_float_scalar, octave_float_complex_matrix, lt); + INSTALL_BINOP (op_le, octave_float_scalar, octave_float_complex_matrix, le); + INSTALL_BINOP (op_eq, octave_float_scalar, octave_float_complex_matrix, eq); + INSTALL_BINOP (op_ge, octave_float_scalar, octave_float_complex_matrix, ge); + INSTALL_BINOP (op_gt, octave_float_scalar, octave_float_complex_matrix, gt); + INSTALL_BINOP (op_ne, octave_float_scalar, octave_float_complex_matrix, ne); + INSTALL_BINOP (op_el_mul, octave_float_scalar, + octave_float_complex_matrix, el_mul); + INSTALL_BINOP (op_el_div, octave_float_scalar, + octave_float_complex_matrix, el_div); + INSTALL_BINOP (op_el_pow, octave_float_scalar, + octave_float_complex_matrix, el_pow); + INSTALL_BINOP (op_el_ldiv, octave_float_scalar, + octave_float_complex_matrix, el_ldiv); + INSTALL_BINOP (op_el_and, octave_float_scalar, + octave_float_complex_matrix, el_and); + INSTALL_BINOP (op_el_or, octave_float_scalar, + octave_float_complex_matrix, el_or); + + INSTALL_CATOP (octave_float_scalar, octave_float_complex_matrix, fs_fcm); + + INSTALL_ASSIGNCONV (octave_float_scalar, octave_float_complex_matrix, + octave_float_complex_matrix); + INSTALL_ASSIGNCONV (octave_scalar, octave_float_complex_matrix, + octave_complex_matrix); + + INSTALL_WIDENOP (octave_float_scalar, octave_float_complex_matrix, + float_complex_matrix_conv); +} + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 src/OPERATORS/op-fs-fcs.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/OPERATORS/op-fs-fcs.cc Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,194 @@ +/* + +Copyright (C) 1996, 1997, 2000, 2002, 2003, 2004, 2005, 2007 + John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "gripes.h" +#include "oct-obj.h" +#include "ov.h" +#include "ov-scalar.h" +#include "ov-float.h" +#include "ov-flt-complex.h" +#include "ov-cx-mat.h" +#include "ov-flt-cx-mat.h" +#include "ov-typeinfo.h" +#include "ops.h" +#include "xdiv.h" +#include "xpow.h" + +// scalar by complex scalar ops. + +DEFBINOP_OP (add, float_scalar, float_complex, +) +DEFBINOP_OP (sub, float_scalar, float_complex, -) +DEFBINOP_OP (mul, float_scalar, float_complex, *) + +DEFBINOP (div, float_scalar, float_complex) +{ + CAST_BINOP_ARGS (const octave_float_scalar&, const octave_float_complex&); + + FloatComplex d = v2.float_complex_value (); + + if (d == static_cast(0.0)) + gripe_divide_by_zero (); + + return octave_value (v1.float_value () / d); +} + +DEFBINOP_FN (pow, float_scalar, float_complex, xpow) + +DEFBINOP (ldiv, float_scalar, float_complex) +{ + CAST_BINOP_ARGS (const octave_float_scalar&, const octave_float_complex&); + + float d = v1.float_value (); + + if (d == 0.0) + gripe_divide_by_zero (); + + return octave_value (v2.float_complex_value () / d); +} + +DEFBINOP (lt, float_scalar, float_complex) +{ + CAST_BINOP_ARGS (const octave_float_scalar&, const octave_float_complex&); + + return v1.float_value () < real (v2.float_complex_value ()); +} + +DEFBINOP (le, float_scalar, float_complex) +{ + CAST_BINOP_ARGS (const octave_float_scalar&, const octave_float_complex&); + + return v1.float_value () <= real (v2.float_complex_value ()); +} + +DEFBINOP (eq, float_scalar, float_complex) +{ + CAST_BINOP_ARGS (const octave_float_scalar&, const octave_float_complex&); + + return v1.float_value () == v2.float_complex_value (); +} + +DEFBINOP (ge, float_scalar, float_complex) +{ + CAST_BINOP_ARGS (const octave_float_scalar&, const octave_float_complex&); + + return v1.float_value () >= real (v2.float_complex_value ()); +} + +DEFBINOP (gt, float_scalar, float_complex) +{ + CAST_BINOP_ARGS (const octave_float_scalar&, const octave_float_complex&); + + return v1.float_value () > real (v2.float_complex_value ()); +} + +DEFBINOP (ne, float_scalar, float_complex) +{ + CAST_BINOP_ARGS (const octave_float_scalar&, const octave_float_complex&); + + return v1.float_value () != v2.float_complex_value (); +} + +DEFBINOP_OP (el_mul, float_scalar, float_complex, *) + +DEFBINOP (el_div, float_scalar, float_complex) +{ + CAST_BINOP_ARGS (const octave_float_scalar&, const octave_float_complex&); + + FloatComplex d = v2.float_complex_value (); + + if (d == static_cast(0.0)) + gripe_divide_by_zero (); + + return octave_value (v1.float_value () / d); +} + +DEFBINOP_FN (el_pow, float_scalar, float_complex, xpow) + +DEFBINOP (el_ldiv, float_scalar, float_complex) +{ + CAST_BINOP_ARGS (const octave_float_scalar&, const octave_float_complex&); + + float d = v1.float_value (); + + if (d == 0.0) + gripe_divide_by_zero (); + + return octave_value (v2.float_complex_value () / d); +} + +DEFBINOP (el_and, float_scalar, float_complex) +{ + CAST_BINOP_ARGS (const octave_float_scalar&, const octave_float_complex&); + + return octave_value (v1.float_scalar_value () && (v2.float_complex_value () != static_cast(0.0))); +} + +DEFBINOP (el_or, float_scalar, float_complex) +{ + CAST_BINOP_ARGS (const octave_float_scalar&, const octave_float_complex&); + + return octave_value (v1.float_scalar_value () || (v2.float_complex_value () != static_cast(0.0))); +} + +DEFNDCATOP_FN (fs_fcs, float_scalar, float_complex, float_array, + float_complex_array, concat) + +void +install_fs_fcs_ops (void) +{ + INSTALL_BINOP (op_add, octave_float_scalar, octave_float_complex, add); + INSTALL_BINOP (op_sub, octave_float_scalar, octave_float_complex, sub); + INSTALL_BINOP (op_mul, octave_float_scalar, octave_float_complex, mul); + INSTALL_BINOP (op_div, octave_float_scalar, octave_float_complex, div); + INSTALL_BINOP (op_pow, octave_float_scalar, octave_float_complex, pow); + INSTALL_BINOP (op_ldiv, octave_float_scalar, octave_float_complex, ldiv); + INSTALL_BINOP (op_lt, octave_float_scalar, octave_float_complex, lt); + INSTALL_BINOP (op_le, octave_float_scalar, octave_float_complex, le); + INSTALL_BINOP (op_eq, octave_float_scalar, octave_float_complex, eq); + INSTALL_BINOP (op_ge, octave_float_scalar, octave_float_complex, ge); + INSTALL_BINOP (op_gt, octave_float_scalar, octave_float_complex, gt); + INSTALL_BINOP (op_ne, octave_float_scalar, octave_float_complex, ne); + INSTALL_BINOP (op_el_mul, octave_float_scalar, octave_float_complex, el_mul); + INSTALL_BINOP (op_el_div, octave_float_scalar, octave_float_complex, el_div); + INSTALL_BINOP (op_el_pow, octave_float_scalar, octave_float_complex, el_pow); + INSTALL_BINOP (op_el_ldiv, octave_float_scalar, octave_float_complex, el_ldiv); + INSTALL_BINOP (op_el_and, octave_float_scalar, octave_float_complex, el_and); + INSTALL_BINOP (op_el_or, octave_float_scalar, octave_float_complex, el_or); + + INSTALL_CATOP (octave_float_scalar, octave_float_complex, fs_fcs); + + INSTALL_ASSIGNCONV (octave_float_scalar, octave_float_complex, + octave_float_complex_matrix); + INSTALL_ASSIGNCONV (octave_scalar, octave_float_complex, + octave_complex_matrix); +} + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 src/OPERATORS/op-fs-fm.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/OPERATORS/op-fs-fm.cc Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,155 @@ +/* + +Copyright (C) 1996, 1997, 2000, 2001, 2002, 2003, 2004, 2005, 2006, + 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "gripes.h" +#include "oct-obj.h" +#include "ov.h" +#include "ov-scalar.h" +#include "ov-float.h" +#include "ov-re-mat.h" +#include "ov-flt-re-mat.h" +#include "ov-typeinfo.h" +#include "ops.h" +#include "xdiv.h" +#include "xpow.h" + +// scalar by matrix ops. + +DEFNDBINOP_OP (add, float_scalar, float_matrix, float_scalar, float_array, +) +DEFNDBINOP_OP (sub, float_scalar, float_matrix, float_scalar, float_array, -) +DEFNDBINOP_OP (mul, float_scalar, float_matrix, float_scalar, float_array, *) + +DEFBINOP (div, float_scalar, float_matrix) +{ + CAST_BINOP_ARGS (const octave_float_scalar&, const octave_float_matrix&); + + FloatMatrix m1 = v1.float_matrix_value (); + FloatMatrix m2 = v2.float_matrix_value (); + MatrixType typ = v2.matrix_type (); + + FloatMatrix ret = xdiv (m1, m2, typ); + + v2.matrix_type (typ); + return ret; +} + +DEFBINOP_FN (pow, float_scalar, float_matrix, xpow) + +DEFBINOP (ldiv, float_scalar, float_matrix) +{ + CAST_BINOP_ARGS (const octave_float_scalar&, const octave_float_matrix&); + + float d = v1.float_value (); + + if (d == 0.0) + gripe_divide_by_zero (); + + return octave_value (v2.float_array_value () / d); +} + +DEFNDBINOP_FN (lt, float_scalar, float_matrix, float_scalar, + float_array, mx_el_lt) +DEFNDBINOP_FN (le, float_scalar, float_matrix, float_scalar, + float_array, mx_el_le) +DEFNDBINOP_FN (eq, float_scalar, float_matrix, float_scalar, + float_array, mx_el_eq) +DEFNDBINOP_FN (ge, float_scalar, float_matrix, float_scalar, + float_array, mx_el_ge) +DEFNDBINOP_FN (gt, float_scalar, float_matrix, float_scalar, +float_array, mx_el_gt) +DEFNDBINOP_FN (ne, float_scalar, float_matrix, float_scalar, + float_array, mx_el_ne) + +DEFNDBINOP_OP (el_mul, float_scalar, float_matrix, float_scalar, + float_array, *) +DEFNDBINOP_FN (el_div, float_scalar, float_matrix, float_scalar, + float_array, x_el_div) +DEFNDBINOP_FN (el_pow, float_scalar, float_matrix, float_scalar, + float_array, elem_xpow) + +DEFBINOP (el_ldiv, float_scalar, float_matrix) +{ + CAST_BINOP_ARGS (const octave_float_scalar&, const octave_float_matrix&); + + float d = v1.float_value (); + + if (d == 0.0) + gripe_divide_by_zero (); + + return octave_value (v2.float_array_value () / d); +} + +DEFNDBINOP_FN (el_and, float_scalar, float_matrix, float_scalar, + float_array, mx_el_and) +DEFNDBINOP_FN (el_or, float_scalar, float_matrix, float_scalar, + float_array, mx_el_or) + +DEFNDCATOP_FN (fs_fm, float_scalar, float_matrix, float_array, + float_array, concat) + +DEFCONV (matrix_conv, float_scalar, float_matrix) +{ + CAST_CONV_ARG (const octave_float_scalar&); + + return new octave_float_matrix (v.float_matrix_value ()); +} + +void +install_fs_fm_ops (void) +{ + INSTALL_BINOP (op_add, octave_float_scalar, octave_float_matrix, add); + INSTALL_BINOP (op_sub, octave_float_scalar, octave_float_matrix, sub); + INSTALL_BINOP (op_mul, octave_float_scalar, octave_float_matrix, mul); + INSTALL_BINOP (op_div, octave_float_scalar, octave_float_matrix, div); + INSTALL_BINOP (op_pow, octave_float_scalar, octave_float_matrix, pow); + INSTALL_BINOP (op_ldiv, octave_float_scalar, octave_float_matrix, ldiv); + INSTALL_BINOP (op_lt, octave_float_scalar, octave_float_matrix, lt); + INSTALL_BINOP (op_le, octave_float_scalar, octave_float_matrix, le); + INSTALL_BINOP (op_eq, octave_float_scalar, octave_float_matrix, eq); + INSTALL_BINOP (op_ge, octave_float_scalar, octave_float_matrix, ge); + INSTALL_BINOP (op_gt, octave_float_scalar, octave_float_matrix, gt); + INSTALL_BINOP (op_ne, octave_float_scalar, octave_float_matrix, ne); + INSTALL_BINOP (op_el_mul, octave_float_scalar, octave_float_matrix, el_mul); + INSTALL_BINOP (op_el_div, octave_float_scalar, octave_float_matrix, el_div); + INSTALL_BINOP (op_el_pow, octave_float_scalar, octave_float_matrix, el_pow); + INSTALL_BINOP (op_el_ldiv, octave_float_scalar, octave_float_matrix, el_ldiv); + INSTALL_BINOP (op_el_and, octave_float_scalar, octave_float_matrix, el_and); + INSTALL_BINOP (op_el_or, octave_float_scalar, octave_float_matrix, el_or); + + INSTALL_CATOP (octave_float_scalar, octave_float_matrix, fs_fm); + + INSTALL_ASSIGNCONV (octave_float_scalar, octave_float_matrix, octave_float_matrix); + INSTALL_ASSIGNCONV (octave_scalar, octave_float_matrix, octave_matrix); + + INSTALL_WIDENOP (octave_float_scalar, octave_float_matrix, matrix_conv); +} + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 src/OPERATORS/op-fs-fs.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/OPERATORS/op-fs-fs.cc Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,172 @@ +/* + +Copyright (C) 1996, 1997, 1998, 2000, 2001, 2002, 2003, 2004, 2005, + 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "gripes.h" +#include "oct-obj.h" +#include "ov.h" +#include "ov-scalar.h" +#include "ov-float.h" +#include "ov-flt-re-mat.h" +#include "ov-typeinfo.h" +#include "ops.h" +#include "xdiv.h" +#include "xpow.h" + +// scalar unary ops. + +DEFUNOP_OP (not, float_scalar, !) +DEFUNOP_OP (uplus, float_scalar, /* no-op */) +DEFUNOP_OP (uminus, float_scalar, -) +DEFUNOP_OP (transpose, float_scalar, /* no-op */) +DEFUNOP_OP (hermitian, float_scalar, /* no-op */) + +DEFNCUNOP_METHOD (incr, float_scalar, increment) +DEFNCUNOP_METHOD (decr, float_scalar, decrement) + +// float by float ops. + +DEFBINOP_OP (add, float_scalar, float_scalar, +) +DEFBINOP_OP (sub, float_scalar, float_scalar, -) +DEFBINOP_OP (mul, float_scalar, float_scalar, *) + +DEFBINOP (div, float_scalar, float_scalar) +{ + CAST_BINOP_ARGS (const octave_float_scalar&, const octave_float_scalar&); + + float d = v2.float_value (); + + if (d == 0.0) + gripe_divide_by_zero (); + + return octave_value (v1.float_value () / d); +} + +DEFBINOP_FN (pow, float_scalar, float_scalar, xpow) + +DEFBINOP (ldiv, float_scalar, float_scalar) +{ + CAST_BINOP_ARGS (const octave_float_scalar&, const octave_float_scalar&); + + float d = v1.float_value (); + + if (d == 0.0) + gripe_divide_by_zero (); + + return octave_value (v2.float_value () / d); +} + +DEFBINOP_OP (lt, float_scalar, float_scalar, <) +DEFBINOP_OP (le, float_scalar, float_scalar, <=) +DEFBINOP_OP (eq, float_scalar, float_scalar, ==) +DEFBINOP_OP (ge, float_scalar, float_scalar, >=) +DEFBINOP_OP (gt, float_scalar, float_scalar, >) +DEFBINOP_OP (ne, float_scalar, float_scalar, !=) + +DEFBINOP_OP (el_mul, float_scalar, float_scalar, *) + +DEFBINOP (el_div, float_scalar, float_scalar) +{ + CAST_BINOP_ARGS (const octave_float_scalar&, const octave_float_scalar&); + + float d = v2.float_value (); + + if (d == 0.0) + gripe_divide_by_zero (); + + return octave_value (v1.float_value () / d); +} + +DEFBINOP_FN (el_pow, float_scalar, float_scalar, xpow) + +DEFBINOP (el_ldiv, float_scalar, float_scalar) +{ + CAST_BINOP_ARGS (const octave_float_scalar&, const octave_float_scalar&); + + float d = v1.float_value (); + + if (d == 0.0) + gripe_divide_by_zero (); + + return octave_value (v2.float_value () / d); +} + +DEFBINOP_OP (el_and, float_scalar, float_scalar, &&) +DEFBINOP_OP (el_or, float_scalar, float_scalar, ||) + +DEFNDCATOP_FN (fs_fs, float_scalar, float_scalar, float_array, float_array, concat) + +CONVDECL (float_to_scalar) +{ + CAST_CONV_ARG (const octave_float_scalar&); + + return new octave_matrix (Matrix (1, 1, static_cast(v.float_value ()))); +} + +void +install_fs_fs_ops (void) +{ + INSTALL_UNOP (op_not, octave_float_scalar, not); + INSTALL_UNOP (op_uplus, octave_float_scalar, uplus); + INSTALL_UNOP (op_uminus, octave_float_scalar, uminus); + INSTALL_UNOP (op_transpose, octave_float_scalar, transpose); + INSTALL_UNOP (op_hermitian, octave_float_scalar, hermitian); + + INSTALL_NCUNOP (op_incr, octave_float_scalar, incr); + INSTALL_NCUNOP (op_decr, octave_float_scalar, decr); + + INSTALL_BINOP (op_add, octave_float_scalar, octave_float_scalar, add); + INSTALL_BINOP (op_sub, octave_float_scalar, octave_float_scalar, sub); + INSTALL_BINOP (op_mul, octave_float_scalar, octave_float_scalar, mul); + INSTALL_BINOP (op_div, octave_float_scalar, octave_float_scalar, div); + INSTALL_BINOP (op_pow, octave_float_scalar, octave_float_scalar, pow); + INSTALL_BINOP (op_ldiv, octave_float_scalar, octave_float_scalar, ldiv); + INSTALL_BINOP (op_lt, octave_float_scalar, octave_float_scalar, lt); + INSTALL_BINOP (op_le, octave_float_scalar, octave_float_scalar, le); + INSTALL_BINOP (op_eq, octave_float_scalar, octave_float_scalar, eq); + INSTALL_BINOP (op_ge, octave_float_scalar, octave_float_scalar, ge); + INSTALL_BINOP (op_gt, octave_float_scalar, octave_float_scalar, gt); + INSTALL_BINOP (op_ne, octave_float_scalar, octave_float_scalar, ne); + INSTALL_BINOP (op_el_mul, octave_float_scalar, octave_float_scalar, el_mul); + INSTALL_BINOP (op_el_div, octave_float_scalar, octave_float_scalar, el_div); + INSTALL_BINOP (op_el_pow, octave_float_scalar, octave_float_scalar, el_pow); + INSTALL_BINOP (op_el_ldiv, octave_float_scalar, octave_float_scalar, el_ldiv); + INSTALL_BINOP (op_el_and, octave_float_scalar, octave_float_scalar, el_and); + INSTALL_BINOP (op_el_or, octave_float_scalar, octave_float_scalar, el_or); + + INSTALL_CATOP (octave_float_scalar, octave_float_scalar, fs_fs); + + INSTALL_ASSIGNCONV (octave_float_scalar, octave_float_scalar, octave_float_matrix); + INSTALL_ASSIGNCONV (octave_scalar, octave_float_scalar, octave_matrix); + + INSTALL_CONVOP (octave_float_scalar, octave_matrix, float_to_scalar); +} + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 src/OPERATORS/op-i16-i16.cc --- a/src/OPERATORS/op-i16-i16.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/OPERATORS/op-i16-i16.cc Sun Apr 27 22:34:17 2008 +0200 @@ -69,9 +69,13 @@ #include "ov-uint64.h" #include "ov-uint8.h" #include "ov-scalar.h" +#include "ov-float.h" #include "ov-complex.h" +#include "ov-flt-complex.h" #include "ov-re-mat.h" +#include "ov-flt-re-mat.h" #include "ov-cx-mat.h" +#include "ov-flt-cx-mat.h" #include "ov-typeinfo.h" #include "ops.h" #include "xdiv.h" diff -r 45f5faba05a2 -r 82be108cc558 src/OPERATORS/op-i32-i32.cc --- a/src/OPERATORS/op-i32-i32.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/OPERATORS/op-i32-i32.cc Sun Apr 27 22:34:17 2008 +0200 @@ -69,9 +69,13 @@ #include "ov-uint64.h" #include "ov-uint8.h" #include "ov-scalar.h" +#include "ov-float.h" #include "ov-complex.h" +#include "ov-flt-complex.h" #include "ov-re-mat.h" +#include "ov-flt-re-mat.h" #include "ov-cx-mat.h" +#include "ov-flt-cx-mat.h" #include "ov-typeinfo.h" #include "ops.h" #include "xdiv.h" diff -r 45f5faba05a2 -r 82be108cc558 src/OPERATORS/op-i64-i64.cc --- a/src/OPERATORS/op-i64-i64.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/OPERATORS/op-i64-i64.cc Sun Apr 27 22:34:17 2008 +0200 @@ -69,9 +69,13 @@ #include "ov-uint64.h" #include "ov-uint8.h" #include "ov-scalar.h" +#include "ov-float.h" #include "ov-complex.h" +#include "ov-flt-complex.h" #include "ov-re-mat.h" +#include "ov-flt-re-mat.h" #include "ov-cx-mat.h" +#include "ov-flt-cx-mat.h" #include "ov-typeinfo.h" #include "ops.h" #include "xdiv.h" diff -r 45f5faba05a2 -r 82be108cc558 src/OPERATORS/op-i8-i8.cc --- a/src/OPERATORS/op-i8-i8.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/OPERATORS/op-i8-i8.cc Sun Apr 27 22:34:17 2008 +0200 @@ -69,9 +69,13 @@ #include "ov-uint64.h" #include "ov-uint8.h" #include "ov-scalar.h" +#include "ov-float.h" #include "ov-complex.h" +#include "ov-flt-complex.h" #include "ov-re-mat.h" +#include "ov-flt-re-mat.h" #include "ov-cx-mat.h" +#include "ov-flt-cx-mat.h" #include "ov-typeinfo.h" #include "ops.h" #include "xdiv.h" diff -r 45f5faba05a2 -r 82be108cc558 src/OPERATORS/op-int-concat.cc --- a/src/OPERATORS/op-int-concat.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/OPERATORS/op-int-concat.cc Sun Apr 27 22:34:17 2008 +0200 @@ -39,7 +39,9 @@ #include "ov-bool.h" #include "ov-bool-mat.h" #include "ov-scalar.h" +#include "ov-float.h" #include "ov-re-mat.h" +#include "ov-flt-re-mat.h" #include "ov-str-mat.h" #include "ov-typeinfo.h" #include "op-int.h" @@ -139,6 +141,26 @@ OCTAVE_DOUBLE_INT_CONCAT_FN (uint32) OCTAVE_DOUBLE_INT_CONCAT_FN (uint64) +OCTAVE_INT_FLOAT_CONCAT_FN (int8) +OCTAVE_INT_FLOAT_CONCAT_FN (int16) +OCTAVE_INT_FLOAT_CONCAT_FN (int32) +OCTAVE_INT_FLOAT_CONCAT_FN (int64) + +OCTAVE_INT_FLOAT_CONCAT_FN (uint8) +OCTAVE_INT_FLOAT_CONCAT_FN (uint16) +OCTAVE_INT_FLOAT_CONCAT_FN (uint32) +OCTAVE_INT_FLOAT_CONCAT_FN (uint64) + +OCTAVE_FLOAT_INT_CONCAT_FN (int8) +OCTAVE_FLOAT_INT_CONCAT_FN (int16) +OCTAVE_FLOAT_INT_CONCAT_FN (int32) +OCTAVE_FLOAT_INT_CONCAT_FN (int64) + +OCTAVE_FLOAT_INT_CONCAT_FN (uint8) +OCTAVE_FLOAT_INT_CONCAT_FN (uint16) +OCTAVE_FLOAT_INT_CONCAT_FN (uint32) +OCTAVE_FLOAT_INT_CONCAT_FN (uint64) + OCTAVE_INT_CHAR_CONCAT_FN (int8) OCTAVE_INT_CHAR_CONCAT_FN (int16) OCTAVE_INT_CHAR_CONCAT_FN (int32) @@ -254,6 +276,26 @@ OCTAVE_INSTALL_DOUBLE_INT_CONCAT_FN (uint32); OCTAVE_INSTALL_DOUBLE_INT_CONCAT_FN (uint64); + OCTAVE_INSTALL_INT_FLOAT_CONCAT_FN (int8); + OCTAVE_INSTALL_INT_FLOAT_CONCAT_FN (int16); + OCTAVE_INSTALL_INT_FLOAT_CONCAT_FN (int32); + OCTAVE_INSTALL_INT_FLOAT_CONCAT_FN (int64); + + OCTAVE_INSTALL_INT_FLOAT_CONCAT_FN (uint8); + OCTAVE_INSTALL_INT_FLOAT_CONCAT_FN (uint16); + OCTAVE_INSTALL_INT_FLOAT_CONCAT_FN (uint32); + OCTAVE_INSTALL_INT_FLOAT_CONCAT_FN (uint64); + + OCTAVE_INSTALL_FLOAT_INT_CONCAT_FN (int8); + OCTAVE_INSTALL_FLOAT_INT_CONCAT_FN (int16); + OCTAVE_INSTALL_FLOAT_INT_CONCAT_FN (int32); + OCTAVE_INSTALL_FLOAT_INT_CONCAT_FN (int64); + + OCTAVE_INSTALL_FLOAT_INT_CONCAT_FN (uint8); + OCTAVE_INSTALL_FLOAT_INT_CONCAT_FN (uint16); + OCTAVE_INSTALL_FLOAT_INT_CONCAT_FN (uint32); + OCTAVE_INSTALL_FLOAT_INT_CONCAT_FN (uint64); + OCTAVE_INSTALL_INT_CHAR_CONCAT_FN (int8); OCTAVE_INSTALL_INT_CHAR_CONCAT_FN (int16); OCTAVE_INSTALL_INT_CHAR_CONCAT_FN (int32); diff -r 45f5faba05a2 -r 82be108cc558 src/OPERATORS/op-int-conv.cc --- a/src/OPERATORS/op-int-conv.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/OPERATORS/op-int-conv.cc Sun Apr 27 22:34:17 2008 +0200 @@ -39,7 +39,9 @@ #include "ov-bool.h" #include "ov-bool-mat.h" #include "ov-scalar.h" +#include "ov-float.h" #include "ov-re-mat.h" +#include "ov-flt-re-mat.h" #include "ov-str-mat.h" #include "ov-typeinfo.h" #include "ops.h" @@ -66,6 +68,26 @@ DEFCONVFN (matrix_to_uint32, matrix, uint32) DEFCONVFN (matrix_to_uint64, matrix, uint64) +DEFCONVFN (float_scalar_to_int8, float_scalar, int8) +DEFCONVFN (float_scalar_to_int16, float_scalar, int16) +DEFCONVFN (float_scalar_to_int32, float_scalar, int32) +DEFCONVFN (float_scalar_to_int64, float_scalar, int64) + +DEFCONVFN (float_scalar_to_uint8, float_scalar, uint8) +DEFCONVFN (float_scalar_to_uint16, float_scalar, uint16) +DEFCONVFN (float_scalar_to_uint32, float_scalar, uint32) +DEFCONVFN (float_scalar_to_uint64, float_scalar, uint64) + +DEFCONVFN (float_matrix_to_int8, float_matrix, int8) +DEFCONVFN (float_matrix_to_int16, float_matrix, int16) +DEFCONVFN (float_matrix_to_int32, float_matrix, int32) +DEFCONVFN (float_matrix_to_int64, float_matrix, int64) + +DEFCONVFN (float_matrix_to_uint8, float_matrix, uint8) +DEFCONVFN (float_matrix_to_uint16, float_matrix, uint16) +DEFCONVFN (float_matrix_to_uint32, float_matrix, uint32) +DEFCONVFN (float_matrix_to_uint64, float_matrix, uint64) + DEFCONVFN (bool_to_int8, bool, int8) DEFCONVFN (bool_to_int16, bool, int16) DEFCONVFN (bool_to_int32, bool, int32) @@ -184,6 +206,8 @@ { INSTALL_CONVOPS (scalar) INSTALL_CONVOPS (matrix) + INSTALL_CONVOPS (float_scalar) + INSTALL_CONVOPS (float_matrix) INSTALL_CONVOPS (bool) INSTALL_CONVOPS (bool_matrix) INSTALL_CONVOPS (range) diff -r 45f5faba05a2 -r 82be108cc558 src/OPERATORS/op-int.h --- a/src/OPERATORS/op-int.h Wed May 14 18:09:56 2008 +0200 +++ b/src/OPERATORS/op-int.h Sun Apr 27 22:34:17 2008 +0200 @@ -58,6 +58,30 @@ INSTALL_CATOP (octave_ ## TYPE ## _matrix, octave_scalar, TYPE ## _ ## double ## _m_s) \ INSTALL_CATOP (octave_ ## TYPE ## _matrix, octave_matrix, TYPE ## _ ## double ## _m_m) +#define OCTAVE_FLOAT_INT_CONCAT_FN(TYPE) \ + DEFNDCATOP_FN2 (float ## _ ## TYPE ## _s_s, float_scalar, TYPE ## _scalar, TYPE ## NDArray, , float_array, TYPE ## _array, concat) \ + DEFNDCATOP_FN2 (float ## _ ## TYPE ## _s_m, float_scalar, TYPE ## _matrix, TYPE ## NDArray, , float_array, TYPE ## _array, concat) \ + DEFNDCATOP_FN2 (float ## _ ## TYPE ## _m_s, float_matrix, TYPE ## _scalar, TYPE ## NDArray, , float_array, TYPE ## _array, concat) \ + DEFNDCATOP_FN2 (float ## _ ## TYPE ## _m_m, float_matrix, TYPE ## _matrix, TYPE ## NDArray, , float_array, TYPE ## _array, concat) + +#define OCTAVE_INSTALL_FLOAT_INT_CONCAT_FN(TYPE) \ + INSTALL_CATOP (octave_float_scalar, octave_ ## TYPE ## _scalar, float ## _ ## TYPE ## _s_s) \ + INSTALL_CATOP (octave_float_scalar, octave_ ## TYPE ## _matrix, float ## _ ## TYPE ## _s_m) \ + INSTALL_CATOP (octave_float_matrix, octave_ ## TYPE ## _scalar, float ## _ ## TYPE ## _m_s) \ + INSTALL_CATOP (octave_float_matrix, octave_ ## TYPE ## _matrix, float ## _ ## TYPE ## _m_m) + +#define OCTAVE_INT_FLOAT_CONCAT_FN(TYPE) \ + DEFNDCATOP_FN2 (TYPE ## _ ## float ## _s_s, TYPE ## _scalar, float_scalar, , TYPE ## NDArray, TYPE ## _array, float_array, concat) \ + DEFNDCATOP_FN2 (TYPE ## _ ## float ## _s_m, TYPE ## _scalar, float_matrix, , TYPE ## NDArray, TYPE ## _array, float_array, concat) \ + DEFNDCATOP_FN2 (TYPE ## _ ## float ## _m_s, TYPE ## _matrix, float_scalar, , TYPE ## NDArray, TYPE ## _array, float_array, concat) \ + DEFNDCATOP_FN2 (TYPE ## _ ## float ## _m_m, TYPE ## _matrix, float_matrix, , TYPE ## NDArray, TYPE ## _array, float_array, concat) + +#define OCTAVE_INSTALL_INT_FLOAT_CONCAT_FN(TYPE) \ + INSTALL_CATOP (octave_ ## TYPE ## _scalar, octave_float_scalar, TYPE ## _ ## float ## _s_s) \ + INSTALL_CATOP (octave_ ## TYPE ## _scalar, octave_float_matrix, TYPE ## _ ## float ## _s_m) \ + INSTALL_CATOP (octave_ ## TYPE ## _matrix, octave_float_scalar, TYPE ## _ ## float ## _m_s) \ + INSTALL_CATOP (octave_ ## TYPE ## _matrix, octave_float_matrix, TYPE ## _ ## float ## _m_m) + // For compatibility, concatenation with a character always returns a // character. @@ -198,6 +222,18 @@ xpow (double a, const octave_ ## T1& b) \ { \ return pow (a, b); \ + } \ + \ + octave_value \ + xpow (const octave_ ## T1& a, float b) \ + { \ + return pow (a, b); \ + } \ + \ + octave_value \ + xpow (float a, const octave_ ## T1& b) \ + { \ + return pow (a, b); \ } #define OCTAVE_SS_INT_OPS(TYPE) \ @@ -206,12 +242,18 @@ OCTAVE_SS_INT_ARITH_OPS (ss, TYPE ## _, TYPE ## _) \ OCTAVE_SS_INT_ARITH_OPS (ssx, TYPE ## _, ) \ OCTAVE_SS_INT_ARITH_OPS (sxs, , TYPE ## _) \ + OCTAVE_SS_INT_ARITH_OPS (ssfx, TYPE ## _, float_) \ + OCTAVE_SS_INT_ARITH_OPS (sfxs, float_, TYPE ## _) \ OCTAVE_SS_INT_CMP_OPS (ss, TYPE ## _, TYPE ## _) \ OCTAVE_SS_INT_CMP_OPS (sx, TYPE ## _, ) \ OCTAVE_SS_INT_CMP_OPS (xs, , TYPE ## _) \ + OCTAVE_SS_INT_CMP_OPS (sfx, TYPE ## _, float_) \ + OCTAVE_SS_INT_CMP_OPS (fxs, float_, TYPE ## _) \ OCTAVE_SS_INT_BOOL_OPS (ss, TYPE ## _, TYPE ## _, octave_ ## TYPE (0), octave_ ## TYPE (0)) \ OCTAVE_SS_INT_BOOL_OPS (sx, TYPE ## _, , octave_ ## TYPE (0), 0) \ - OCTAVE_SS_INT_BOOL_OPS (xs, , TYPE ## _, 0, octave_ ## TYPE (0)) + OCTAVE_SS_INT_BOOL_OPS (xs, , TYPE ## _, 0, octave_ ## TYPE (0)) \ + OCTAVE_SS_INT_BOOL_OPS (sfx, TYPE ## _, float_, octave_ ## TYPE (0), 0) \ + OCTAVE_SS_INT_BOOL_OPS (fxs, float_, TYPE ## _, 0, octave_ ## TYPE (0)) #define OCTAVE_SM_INT_ARITH_OPS(PFX, TS, TM) \ /* scalar by matrix ops. */ \ @@ -309,6 +351,30 @@ result (i) = pow (a, b(i)); \ } \ return octave_value (result); \ + } \ +\ + octave_value \ + elem_xpow (const octave_ ## T1& a, const FloatNDArray& b) \ + { \ + T1 ## NDArray result (b.dims ()); \ + for (int i = 0; i < b.length (); i++) \ + { \ + OCTAVE_QUIT; \ + result (i) = pow (a, b(i)); \ + } \ + return octave_value (result); \ + } \ + \ + octave_value \ + elem_xpow (float a, const T2 ## NDArray& b) \ + { \ + T2 ## NDArray result (b.dims ()); \ + for (int i = 0; i < b.length (); i++) \ + { \ + OCTAVE_QUIT; \ + result (i) = pow (a, b(i)); \ + } \ + return octave_value (result); \ } @@ -325,14 +391,21 @@ OCTAVE_SM_INT_ARITH_OPS (sm, TYPE ## _, TYPE ## _) \ OCTAVE_SM_INT_ARITH_OPS (smx, TYPE ## _, ) \ OCTAVE_SM_INT_ARITH_OPS (sxm, , TYPE ## _) \ + OCTAVE_SM_INT_ARITH_OPS (smfx, TYPE ## _, float_) \ + OCTAVE_SM_INT_ARITH_OPS (sfxm, float_, TYPE ## _) \ OCTAVE_SM_INT_CMP_OPS (sm, TYPE ## _, TYPE ## _) \ OCTAVE_SM_INT_CMP_OPS (xm, , TYPE ## _) \ OCTAVE_SM_INT_CMP_OPS (smx, TYPE ## _, ) \ + OCTAVE_SM_INT_CMP_OPS (fxm, float_, TYPE ## _) \ + OCTAVE_SM_INT_CMP_OPS (smfx, TYPE ## _, float_) \ OCTAVE_SM_INT_BOOL_OPS (sm, TYPE ## _, TYPE ## _) \ OCTAVE_SM_INT_BOOL_OPS (xm, , TYPE ## _) \ OCTAVE_SM_INT_BOOL_OPS (smx, TYPE ## _, ) \ + OCTAVE_SM_INT_BOOL_OPS (fxm, float_, TYPE ## _) \ + OCTAVE_SM_INT_BOOL_OPS (smfx, TYPE ## _, float_) \ OCTAVE_SM_CONV (TYPE ## _, TYPE ## _) \ - OCTAVE_SM_CONV (TYPE ## _, complex_) + OCTAVE_SM_CONV (TYPE ## _, complex_) \ + OCTAVE_SM_CONV (TYPE ## _, float_complex_) #define OCTAVE_MS_INT_ARITH_OPS(PFX, TM, TS) \ /* matrix by scalar ops. */ \ @@ -431,6 +504,28 @@ result (i) = pow (a(i), b); \ } \ return octave_value (result); \ +} \ +\ +octave_value elem_xpow (T1 ## NDArray a, float b) \ +{ \ + T1 ## NDArray result (a.dims ()); \ + for (int i = 0; i < a.length (); i++) \ + { \ + OCTAVE_QUIT; \ + result (i) = pow (a(i), b); \ + } \ + return octave_value (result); \ +} \ +\ +octave_value elem_xpow (FloatNDArray a, octave_ ## T2 b) \ +{ \ + T2 ## NDArray result (a.dims ()); \ + for (int i = 0; i < a.length (); i++) \ + { \ + OCTAVE_QUIT; \ + result (i) = pow (a(i), b); \ + } \ + return octave_value (result); \ } @@ -439,14 +534,21 @@ OCTAVE_MS_INT_ARITH_OPS (ms, TYPE ## _, TYPE ## _) \ OCTAVE_MS_INT_ARITH_OPS (msx, TYPE ## _, ) \ OCTAVE_MS_INT_ARITH_OPS (mxs, , TYPE ## _) \ + OCTAVE_MS_INT_ARITH_OPS (msfx, TYPE ## _, float_) \ + OCTAVE_MS_INT_ARITH_OPS (mfxs, float_, TYPE ## _) \ OCTAVE_MS_INT_CMP_OPS (ms, TYPE ## _, TYPE ## _) \ OCTAVE_MS_INT_CMP_OPS (mx, TYPE ## _, ) \ OCTAVE_MS_INT_CMP_OPS (mxs, , TYPE ## _) \ + OCTAVE_MS_INT_CMP_OPS (mfx, TYPE ## _, float_) \ + OCTAVE_MS_INT_CMP_OPS (mfxs, float_, TYPE ## _) \ OCTAVE_MS_INT_BOOL_OPS (ms, TYPE ## _, TYPE ## _) \ OCTAVE_MS_INT_BOOL_OPS (mx, TYPE ## _, ) \ OCTAVE_MS_INT_BOOL_OPS (mxs, , TYPE ## _) \ + OCTAVE_MS_INT_BOOL_OPS (mfx, TYPE ## _, float_) \ + OCTAVE_MS_INT_BOOL_OPS (mfxs, float_, TYPE ## _) \ OCTAVE_MS_INT_ASSIGN_OPS (ms, TYPE ## _, TYPE ## _, TYPE ## _) \ - OCTAVE_MS_INT_ASSIGN_OPS (mx, TYPE ## _, , ) + OCTAVE_MS_INT_ASSIGN_OPS (mx, TYPE ## _, , ) \ + OCTAVE_MS_INT_ASSIGN_OPS (mfx, TYPE ## _, float_, float_) #define OCTAVE_M_INT_UNOPS(TYPE) \ /* matrix unary ops. */ \ @@ -572,6 +674,44 @@ result (i) = pow (a(i), b(i)); \ } \ return octave_value (result); \ + } \ +\ + octave_value \ + elem_xpow (const T1 ## NDArray& a, const FloatNDArray& b) \ + { \ + dim_vector a_dims = a.dims (); \ + dim_vector b_dims = b.dims (); \ + if (a_dims != b_dims) \ + { \ + gripe_nonconformant ("operator .^", a_dims, b_dims); \ + return octave_value (); \ + } \ + T1 ## NDArray result (a_dims); \ + for (int i = 0; i < a.length (); i++) \ + { \ + OCTAVE_QUIT; \ + result (i) = pow (a(i), b(i)); \ + } \ + return octave_value (result); \ + } \ +\ + octave_value \ + elem_xpow (const FloatNDArray& a, const T2 ## NDArray& b) \ + { \ + dim_vector a_dims = a.dims (); \ + dim_vector b_dims = b.dims (); \ + if (a_dims != b_dims) \ + { \ + gripe_nonconformant ("operator .^", a_dims, b_dims); \ + return octave_value (); \ + } \ + T2 ## NDArray result (a_dims); \ + for (int i = 0; i < a.length (); i++) \ + { \ + OCTAVE_QUIT; \ + result (i) = pow (a(i), b(i)); \ + } \ + return octave_value (result); \ } @@ -589,24 +729,40 @@ OCTAVE_MM_INT_ARITH_OPS (mm, TYPE ## _, TYPE ## _) \ OCTAVE_MM_INT_ARITH_OPS (mmx, TYPE ## _, ) \ OCTAVE_MM_INT_ARITH_OPS (mxm, , TYPE ## _) \ + OCTAVE_MM_INT_ARITH_OPS (mmfx, TYPE ## _, float_) \ + OCTAVE_MM_INT_ARITH_OPS (mfxm, float_, TYPE ## _) \ OCTAVE_MM_INT_CMP_OPS (mm, TYPE ## _, TYPE ## _) \ OCTAVE_MM_INT_CMP_OPS (mmx, TYPE ## _, ) \ + OCTAVE_MM_INT_CMP_OPS (mfxm, float_, TYPE ## _) \ + OCTAVE_MM_INT_CMP_OPS (mmfx, TYPE ## _, float_) \ OCTAVE_MM_INT_CMP_OPS (mxm, , TYPE ## _) \ OCTAVE_MM_INT_BOOL_OPS (mm, TYPE ## _, TYPE ## _) \ OCTAVE_MM_INT_BOOL_OPS (mmx, TYPE ## _, ) \ OCTAVE_MM_INT_BOOL_OPS (mxm, , TYPE ## _) \ + OCTAVE_MM_INT_BOOL_OPS (mmfx, TYPE ## _, float_) \ + OCTAVE_MM_INT_BOOL_OPS (mfxm, float_, TYPE ## _) \ OCTAVE_MM_INT_ASSIGN_OPS (mm, TYPE ## _, TYPE ## _, TYPE ## _) \ OCTAVE_MM_INT_ASSIGN_OPS (mmx, TYPE ## _, , ) \ - OCTAVE_MM_CONV(TYPE ## _, complex_) + OCTAVE_MM_INT_ASSIGN_OPS (mmfx, TYPE ## _, float_, float_) \ + OCTAVE_MM_CONV(TYPE ## _, complex_) \ + OCTAVE_MM_CONV(TYPE ## _, float_complex_) #define OCTAVE_RE_INT_ASSIGN_OPS(TYPE) \ DEFNDASSIGNOP_FN (TYPE ## ms_assign, matrix, TYPE ## _scalar, array, assign) \ DEFNDASSIGNOP_FN (TYPE ## mm_assign, matrix, TYPE ## _matrix, array, assign) +#define OCTAVE_FLT_RE_INT_ASSIGN_OPS(TYPE) \ + DEFNDASSIGNOP_FN (TYPE ## fms_assign, float_matrix, TYPE ## _scalar, float_array, assign) \ + DEFNDASSIGNOP_FN (TYPE ## fmm_assign, float_matrix, TYPE ## _matrix, float_array, assign) + #define OCTAVE_CX_INT_ASSIGN_OPS(TYPE) \ DEFNDASSIGNOP_FN (TYPE ## cms_assign, complex_matrix, TYPE ## _scalar, complex_array, assign) \ DEFNDASSIGNOP_FN (TYPE ## cmm_assign, complex_matrix, TYPE ## _matrix, complex_array, assign) +#define OCTAVE_FLT_CX_INT_ASSIGN_OPS(TYPE) \ + DEFNDASSIGNOP_FN (TYPE ## fcms_assign, float_complex_matrix, TYPE ## _scalar, float_complex_array, assign) \ + DEFNDASSIGNOP_FN (TYPE ## fcmm_assign, float_complex_matrix, TYPE ## _matrix, float_complex_array, assign) + #define OCTAVE_INT_OPS(TYPE) \ OCTAVE_SS_INT_OPS (TYPE) \ OCTAVE_SM_INT_OPS (TYPE) \ @@ -614,7 +770,9 @@ OCTAVE_MM_INT_OPS (TYPE) \ OCTAVE_CONCAT_FN (TYPE) \ OCTAVE_RE_INT_ASSIGN_OPS (TYPE) \ - OCTAVE_CX_INT_ASSIGN_OPS (TYPE) + OCTAVE_FLT_RE_INT_ASSIGN_OPS (TYPE) \ + OCTAVE_CX_INT_ASSIGN_OPS (TYPE) \ + OCTAVE_FLT_CX_INT_ASSIGN_OPS (TYPE) #define OCTAVE_INSTALL_S_INT_UNOPS(TYPE) \ INSTALL_UNOP (op_not, octave_ ## TYPE ## _scalar, s_not); \ @@ -655,15 +813,23 @@ OCTAVE_INSTALL_SS_INT_ARITH_OPS (ss, TYPE ## _, TYPE ## _) \ OCTAVE_INSTALL_SS_INT_ARITH_OPS (ssx, TYPE ## _, ) \ OCTAVE_INSTALL_SS_INT_ARITH_OPS (sxs, , TYPE ## _) \ + OCTAVE_INSTALL_SS_INT_ARITH_OPS (ssfx, TYPE ## _, float_) \ + OCTAVE_INSTALL_SS_INT_ARITH_OPS (sfxs, float_, TYPE ## _) \ OCTAVE_INSTALL_SS_INT_CMP_OPS (ss, TYPE ## _, TYPE ## _) \ OCTAVE_INSTALL_SS_INT_CMP_OPS (sx, TYPE ## _, ) \ OCTAVE_INSTALL_SS_INT_CMP_OPS (xs, , TYPE ## _) \ + OCTAVE_INSTALL_SS_INT_CMP_OPS (sfx, TYPE ## _, float_) \ + OCTAVE_INSTALL_SS_INT_CMP_OPS (fxs, float_, TYPE ## _) \ OCTAVE_INSTALL_SS_INT_BOOL_OPS (ss, TYPE ## _, TYPE ## _) \ OCTAVE_INSTALL_SS_INT_BOOL_OPS (sx, TYPE ## _, ) \ OCTAVE_INSTALL_SS_INT_BOOL_OPS (xs, , TYPE ## _) \ + OCTAVE_INSTALL_SS_INT_BOOL_OPS (sfx, TYPE ## _, float_) \ + OCTAVE_INSTALL_SS_INT_BOOL_OPS (fxs, float_, TYPE ## _) \ INSTALL_ASSIGNCONV (octave_ ## TYPE ## _scalar, octave_ ## TYPE ## _scalar, octave_ ## TYPE ## _matrix) \ INSTALL_ASSIGNCONV (octave_ ## TYPE ## _scalar, octave_scalar, octave_ ## TYPE ## _matrix) \ - INSTALL_ASSIGNCONV (octave_ ## TYPE ## _scalar, octave_complex_scalar, octave_complex_matrix) + INSTALL_ASSIGNCONV (octave_ ## TYPE ## _scalar, octave_float_scalar, octave_ ## TYPE ## _matrix) \ + INSTALL_ASSIGNCONV (octave_ ## TYPE ## _scalar, octave_complex_scalar, octave_complex_matrix) \ + INSTALL_ASSIGNCONV (octave_ ## TYPE ## _scalar, octave_float_complex_scalar, octave_float_complex_matrix) #define OCTAVE_INSTALL_SM_INT_ARITH_OPS(PFX, T1, T2) \ INSTALL_BINOP (op_add, octave_ ## T1 ## scalar, octave_ ## T2 ## matrix, PFX ## _add); \ @@ -693,17 +859,26 @@ OCTAVE_INSTALL_SM_INT_ARITH_OPS (sm, TYPE ## _, TYPE ## _) \ OCTAVE_INSTALL_SM_INT_ARITH_OPS (smx, TYPE ## _, ) \ OCTAVE_INSTALL_SM_INT_ARITH_OPS (sxm, , TYPE ## _) \ + OCTAVE_INSTALL_SM_INT_ARITH_OPS (smfx, TYPE ## _, float_) \ + OCTAVE_INSTALL_SM_INT_ARITH_OPS (sfxm, float_, TYPE ## _) \ OCTAVE_INSTALL_SM_INT_CMP_OPS (sm, TYPE ## _, TYPE ## _) \ OCTAVE_INSTALL_SM_INT_CMP_OPS (xm, , TYPE ## _) \ OCTAVE_INSTALL_SM_INT_CMP_OPS (smx, TYPE ## _, ) \ + OCTAVE_INSTALL_SM_INT_CMP_OPS (fxm, float_, TYPE ## _) \ + OCTAVE_INSTALL_SM_INT_CMP_OPS (smfx, TYPE ## _, float_) \ OCTAVE_INSTALL_SM_INT_BOOL_OPS (sm, TYPE ## _, TYPE ## _) \ OCTAVE_INSTALL_SM_INT_BOOL_OPS (xm, , TYPE ## _) \ OCTAVE_INSTALL_SM_INT_BOOL_OPS (smx, TYPE ## _, ) \ + OCTAVE_INSTALL_SM_INT_BOOL_OPS (fxm, float_, TYPE ## _) \ + OCTAVE_INSTALL_SM_INT_BOOL_OPS (smfx, TYPE ## _, float_) \ INSTALL_WIDENOP (octave_ ## TYPE ## _scalar, octave_ ## TYPE ## _matrix, TYPE ## _s_ ## TYPE ## _m_conv) \ INSTALL_WIDENOP (octave_ ## TYPE ## _scalar, octave_complex_matrix, TYPE ## _s_complex_m_conv) \ + INSTALL_WIDENOP (octave_ ## TYPE ## _scalar, octave_float_complex_matrix, TYPE ## _s_float_complex_m_conv) \ INSTALL_ASSIGNCONV (octave_ ## TYPE ## _scalar, octave_ ## TYPE ## _matrix, octave_ ## TYPE ## _matrix) \ INSTALL_ASSIGNCONV (octave_ ## TYPE ## _scalar, octave_matrix, octave_ ## TYPE ## _matrix) \ - INSTALL_ASSIGNCONV (octave_ ## TYPE ## _scalar, octave_complex_matrix, octave_complex_matrix) + INSTALL_ASSIGNCONV (octave_ ## TYPE ## _scalar, octave_float_matrix, octave_ ## TYPE ## _matrix) \ + INSTALL_ASSIGNCONV (octave_ ## TYPE ## _scalar, octave_complex_matrix, octave_complex_matrix) \ + INSTALL_ASSIGNCONV (octave_ ## TYPE ## _scalar, octave_float_complex_matrix, octave_float_complex_matrix) #define OCTAVE_INSTALL_MS_INT_ARITH_OPS(PFX, T1, T2) \ INSTALL_BINOP (op_add, octave_ ## T1 ## matrix, octave_ ## T2 ## scalar, PFX ## _add); \ @@ -737,15 +912,23 @@ OCTAVE_INSTALL_MS_INT_ARITH_OPS (ms, TYPE ## _, TYPE ## _) \ OCTAVE_INSTALL_MS_INT_ARITH_OPS (msx, TYPE ## _, ) \ OCTAVE_INSTALL_MS_INT_ARITH_OPS (mxs, , TYPE ## _) \ + OCTAVE_INSTALL_MS_INT_ARITH_OPS (msfx, TYPE ## _, float_) \ + OCTAVE_INSTALL_MS_INT_ARITH_OPS (mfxs, float_, TYPE ## _) \ OCTAVE_INSTALL_MS_INT_CMP_OPS (ms, TYPE ## _, TYPE ## _) \ OCTAVE_INSTALL_MS_INT_CMP_OPS (mx, TYPE ## _, ) \ OCTAVE_INSTALL_MS_INT_CMP_OPS (mxs, , TYPE ## _) \ + OCTAVE_INSTALL_MS_INT_CMP_OPS (mfx, TYPE ## _, float_) \ + OCTAVE_INSTALL_MS_INT_CMP_OPS (mfxs, float_, TYPE ## _) \ OCTAVE_INSTALL_MS_INT_BOOL_OPS (ms, TYPE ## _, TYPE ## _) \ OCTAVE_INSTALL_MS_INT_BOOL_OPS (mx, TYPE ## _, ) \ OCTAVE_INSTALL_MS_INT_BOOL_OPS (mxs, , TYPE ## _) \ + OCTAVE_INSTALL_MS_INT_BOOL_OPS (mfx, TYPE ## _, float_) \ + OCTAVE_INSTALL_MS_INT_BOOL_OPS (mfxs, float_, TYPE ## _) \ OCTAVE_INSTALL_MS_INT_ASSIGN_OPS (ms, TYPE ## _, TYPE ## _) \ OCTAVE_INSTALL_MS_INT_ASSIGN_OPS (mx, TYPE ## _, ) \ - INSTALL_ASSIGNCONV (octave_ ## TYPE ## _matrix, octave_complex_scalar, octave_complex_matrix) + OCTAVE_INSTALL_MS_INT_ASSIGN_OPS (mfx, TYPE ## _, float_) \ + INSTALL_ASSIGNCONV (octave_ ## TYPE ## _matrix, octave_complex_scalar, octave_complex_matrix) \ + INSTALL_ASSIGNCONV (octave_ ## TYPE ## _matrix, octave_float_complex_scalar, octave_float_complex_matrix) #define OCTAVE_INSTALL_M_INT_UNOPS(TYPE) \ INSTALL_UNOP (op_not, octave_ ## TYPE ## _matrix, m_not); \ @@ -789,16 +972,25 @@ OCTAVE_INSTALL_MM_INT_ARITH_OPS (mm, TYPE ##_, TYPE ## _) \ OCTAVE_INSTALL_MM_INT_ARITH_OPS (mmx, TYPE ##_, ) \ OCTAVE_INSTALL_MM_INT_ARITH_OPS (mxm, , TYPE ##_) \ + OCTAVE_INSTALL_MM_INT_ARITH_OPS (mmfx, TYPE ##_, float_) \ + OCTAVE_INSTALL_MM_INT_ARITH_OPS (mfxm, float_, TYPE ##_) \ OCTAVE_INSTALL_MM_INT_CMP_OPS (mm, TYPE ## _, TYPE ## _) \ OCTAVE_INSTALL_MM_INT_CMP_OPS (mmx, TYPE ## _, ) \ OCTAVE_INSTALL_MM_INT_CMP_OPS (mxm, , TYPE ## _) \ + OCTAVE_INSTALL_MM_INT_CMP_OPS (mmfx, TYPE ## _, float_) \ + OCTAVE_INSTALL_MM_INT_CMP_OPS (mfxm, float_, TYPE ## _) \ OCTAVE_INSTALL_MM_INT_BOOL_OPS (mm, TYPE ## _, TYPE ## _) \ OCTAVE_INSTALL_MM_INT_BOOL_OPS (mmx, TYPE ## _, ) \ OCTAVE_INSTALL_MM_INT_BOOL_OPS (mxm, , TYPE ## _) \ + OCTAVE_INSTALL_MM_INT_BOOL_OPS (mmfx, TYPE ## _, float_) \ + OCTAVE_INSTALL_MM_INT_BOOL_OPS (mfxm, float_, TYPE ## _) \ OCTAVE_INSTALL_MM_INT_ASSIGN_OPS (mm, TYPE ## _, TYPE ## _) \ OCTAVE_INSTALL_MM_INT_ASSIGN_OPS (mmx, TYPE ## _, ) \ + OCTAVE_INSTALL_MM_INT_ASSIGN_OPS (mmfx, TYPE ## _, float_) \ INSTALL_WIDENOP (octave_ ## TYPE ## _matrix, octave_complex_matrix, TYPE ## _m_complex_m_conv) \ - INSTALL_ASSIGNCONV (octave_ ## TYPE ## _matrix, octave_complex_matrix, octave_complex_matrix) + INSTALL_WIDENOP (octave_ ## TYPE ## _matrix, octave_float_complex_matrix, TYPE ## _m_float_complex_m_conv) \ + INSTALL_ASSIGNCONV (octave_ ## TYPE ## _matrix, octave_complex_matrix, octave_complex_matrix) \ + INSTALL_ASSIGNCONV (octave_ ## TYPE ## _matrix, octave_float_complex_matrix, octave_float_complex_matrix) #define OCTAVE_INSTALL_RE_INT_ASSIGN_OPS(TYPE) \ INSTALL_ASSIGNOP (op_asn_eq, octave_matrix, octave_ ## TYPE ## _scalar, TYPE ## ms_assign) \ @@ -806,12 +998,24 @@ INSTALL_ASSIGNCONV (octave_scalar, octave_ ## TYPE ## _scalar, octave_matrix) \ INSTALL_ASSIGNCONV (octave_matrix, octave_ ## TYPE ## _matrix, octave_matrix) +#define OCTAVE_INSTALL_FLT_RE_INT_ASSIGN_OPS(TYPE) \ + INSTALL_ASSIGNOP (op_asn_eq, octave_float_matrix, octave_ ## TYPE ## _scalar, TYPE ## fms_assign) \ + INSTALL_ASSIGNOP (op_asn_eq, octave_float_matrix, octave_ ## TYPE ## _matrix, TYPE ## fmm_assign) \ + INSTALL_ASSIGNCONV (octave_float_scalar, octave_ ## TYPE ## _scalar, octave_float_matrix) \ + INSTALL_ASSIGNCONV (octave_float_matrix, octave_ ## TYPE ## _matrix, octave_float_matrix) + #define OCTAVE_INSTALL_CX_INT_ASSIGN_OPS(TYPE) \ INSTALL_ASSIGNOP (op_asn_eq, octave_complex_matrix, octave_ ## TYPE ## _scalar, TYPE ## cms_assign) \ INSTALL_ASSIGNOP (op_asn_eq, octave_complex_matrix, octave_ ## TYPE ## _matrix, TYPE ## cmm_assign) \ INSTALL_ASSIGNCONV (octave_complex_scalar, octave_ ## TYPE ## _scalar, octave_complex_matrix) \ INSTALL_ASSIGNCONV (octave_complex_matrix, octave_ ## TYPE ## _matrix, octave_complex_matrix) +#define OCTAVE_INSTALL_FLT_CX_INT_ASSIGN_OPS(TYPE) \ + INSTALL_ASSIGNOP (op_asn_eq, octave_float_complex_matrix, octave_ ## TYPE ## _scalar, TYPE ## fcms_assign) \ + INSTALL_ASSIGNOP (op_asn_eq, octave_float_complex_matrix, octave_ ## TYPE ## _matrix, TYPE ## fcmm_assign) \ + INSTALL_ASSIGNCONV (octave_float_complex_scalar, octave_ ## TYPE ## _scalar, octave_complex_matrix) \ + INSTALL_ASSIGNCONV (octave_float_complex_matrix, octave_ ## TYPE ## _matrix, octave_complex_matrix) + #define OCTAVE_INSTALL_INT_OPS(TYPE) \ OCTAVE_INSTALL_SS_INT_OPS (TYPE) \ OCTAVE_INSTALL_SM_INT_OPS (TYPE) \ @@ -819,7 +1023,9 @@ OCTAVE_INSTALL_MM_INT_OPS (TYPE) \ OCTAVE_INSTALL_CONCAT_FN (TYPE) \ OCTAVE_INSTALL_RE_INT_ASSIGN_OPS (TYPE) \ - OCTAVE_INSTALL_CX_INT_ASSIGN_OPS (TYPE) + OCTAVE_INSTALL_FLT_RE_INT_ASSIGN_OPS (TYPE) \ + OCTAVE_INSTALL_CX_INT_ASSIGN_OPS (TYPE) \ + OCTAVE_INSTALL_FLT_CX_INT_ASSIGN_OPS (TYPE) #define OCTAVE_INSTALL_SM_INT_ASSIGNCONV(TLHS, TRHS) \ INSTALL_ASSIGNCONV (octave_ ## TLHS ## _scalar, octave_ ## TRHS ## _scalar, octave_ ## TLHS ## _matrix) \ diff -r 45f5faba05a2 -r 82be108cc558 src/OPERATORS/op-m-cm.cc --- a/src/OPERATORS/op-m-cm.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/OPERATORS/op-m-cm.cc Sun Apr 27 22:34:17 2008 +0200 @@ -34,7 +34,9 @@ #include "oct-obj.h" #include "ov.h" #include "ov-re-mat.h" +#include "ov-flt-re-mat.h" #include "ov-cx-mat.h" +#include "ov-flt-cx-mat.h" #include "ov-typeinfo.h" #include "ops.h" #include "xdiv.h" @@ -132,6 +134,7 @@ INSTALL_CATOP (octave_matrix, octave_complex_matrix, m_cm); INSTALL_ASSIGNCONV (octave_matrix, octave_complex_matrix, octave_complex_matrix); + INSTALL_ASSIGNCONV (octave_float_matrix, octave_complex_matrix, octave_float_complex_matrix); INSTALL_WIDENOP (octave_matrix, octave_complex_matrix, complex_matrix_conv); } diff -r 45f5faba05a2 -r 82be108cc558 src/OPERATORS/op-m-cs.cc --- a/src/OPERATORS/op-m-cs.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/OPERATORS/op-m-cs.cc Sun Apr 27 22:34:17 2008 +0200 @@ -34,7 +34,9 @@ #include "oct-obj.h" #include "ov.h" #include "ov-re-mat.h" +#include "ov-flt-re-mat.h" #include "ov-cx-mat.h" +#include "ov-flt-cx-mat.h" #include "ov-complex.h" #include "ov-typeinfo.h" #include "ops.h" @@ -135,6 +137,7 @@ INSTALL_CATOP (octave_matrix, octave_complex, m_cs); INSTALL_ASSIGNCONV (octave_matrix, octave_complex, octave_complex_matrix); + INSTALL_ASSIGNCONV (octave_float_matrix, octave_complex, octave_float_complex_matrix); } /* diff -r 45f5faba05a2 -r 82be108cc558 src/OPERATORS/op-m-m.cc --- a/src/OPERATORS/op-m-m.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/OPERATORS/op-m-m.cc Sun Apr 27 22:34:17 2008 +0200 @@ -29,6 +29,7 @@ #include "oct-obj.h" #include "ov.h" #include "ov-re-mat.h" +#include "ov-flt-re-mat.h" #include "ov-typeinfo.h" #include "ops.h" #include "xdiv.h" @@ -115,6 +116,14 @@ DEFNDCATOP_FN (m_m, matrix, matrix, array, array, concat) DEFNDASSIGNOP_FN (assign, matrix, matrix, array, assign) +DEFNDASSIGNOP_FN (sgl_assign, float_matrix, matrix, float_array, assign) + +CONVDECL (matrix_to_float_matrix) +{ + CAST_CONV_ARG (const octave_matrix&); + + return new octave_float_matrix (FloatNDArray (v.array_value ())); +} void install_m_m_ops (void) @@ -150,6 +159,9 @@ INSTALL_CATOP (octave_matrix, octave_matrix, m_m); INSTALL_ASSIGNOP (op_asn_eq, octave_matrix, octave_matrix, assign); + INSTALL_ASSIGNOP (op_asn_eq, octave_float_matrix, octave_matrix, sgl_assign); + + INSTALL_CONVOP (octave_matrix, octave_float_matrix, matrix_to_float_matrix); } /* diff -r 45f5faba05a2 -r 82be108cc558 src/OPERATORS/op-m-s.cc --- a/src/OPERATORS/op-m-s.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/OPERATORS/op-m-s.cc Sun Apr 27 22:34:17 2008 +0200 @@ -29,6 +29,7 @@ #include "oct-obj.h" #include "ov.h" #include "ov-re-mat.h" +#include "ov-flt-re-mat.h" #include "ov-scalar.h" #include "ov-typeinfo.h" #include "ops.h" @@ -105,6 +106,7 @@ DEFNDCATOP_FN (m_s, matrix, scalar, array, array, concat) DEFNDASSIGNOP_FN (assign, matrix, scalar, array, assign) +DEFNDASSIGNOP_FN (sgl_assign, float_matrix, scalar, float_array, assign) void install_m_s_ops (void) @@ -137,6 +139,7 @@ INSTALL_CATOP (octave_matrix, octave_scalar, m_s); INSTALL_ASSIGNOP (op_asn_eq, octave_matrix, octave_scalar, assign); + INSTALL_ASSIGNOP (op_asn_eq, octave_float_matrix, octave_scalar, sgl_assign); } /* diff -r 45f5faba05a2 -r 82be108cc558 src/OPERATORS/op-range.cc --- a/src/OPERATORS/op-range.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/OPERATORS/op-range.cc Sun Apr 27 22:34:17 2008 +0200 @@ -32,6 +32,7 @@ #include "ov-ch-mat.h" #include "ov-scalar.h" #include "ov-re-mat.h" +#include "ov-flt-re-mat.h" #include "ov-complex.h" #include "ov-cx-mat.h" #include "ov-bool.h" @@ -74,6 +75,13 @@ DEFNDCATOP_FN (bm_r, bool_matrix, range, array, array, concat) DEFNDCATOP_FN (chm_r, char_matrix, range, char_array, array, concat) +CONVDECL (range_to_float_matrix) +{ + CAST_CONV_ARG (const octave_range&); + + return new octave_float_matrix (FloatNDArray (v.array_value ())); +} + void install_range_ops (void) { @@ -98,6 +106,8 @@ INSTALL_CATOP (octave_bool, octave_range, b_r); INSTALL_CATOP (octave_bool_matrix, octave_range, bm_r); INSTALL_CATOP (octave_char_matrix, octave_range, chm_r); + + INSTALL_CONVOP (octave_range, octave_float_matrix, range_to_float_matrix); } /* diff -r 45f5faba05a2 -r 82be108cc558 src/OPERATORS/op-s-cm.cc --- a/src/OPERATORS/op-s-cm.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/OPERATORS/op-s-cm.cc Sun Apr 27 22:34:17 2008 +0200 @@ -34,7 +34,9 @@ #include "oct-obj.h" #include "ov.h" #include "ov-scalar.h" +#include "ov-float.h" #include "ov-cx-mat.h" +#include "ov-flt-cx-mat.h" #include "ov-re-mat.h" #include "ov-typeinfo.h" #include "ops.h" @@ -135,6 +137,7 @@ INSTALL_CATOP (octave_scalar, octave_complex_matrix, s_cm); INSTALL_ASSIGNCONV (octave_scalar, octave_complex_matrix, octave_complex_matrix); + INSTALL_ASSIGNCONV (octave_float_scalar, octave_complex_matrix, octave_float_complex_matrix); INSTALL_WIDENOP (octave_scalar, octave_complex_matrix, complex_matrix_conv); } diff -r 45f5faba05a2 -r 82be108cc558 src/OPERATORS/op-s-cs.cc --- a/src/OPERATORS/op-s-cs.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/OPERATORS/op-s-cs.cc Sun Apr 27 22:34:17 2008 +0200 @@ -29,8 +29,10 @@ #include "oct-obj.h" #include "ov.h" #include "ov-scalar.h" +#include "ov-float.h" #include "ov-complex.h" #include "ov-cx-mat.h" +#include "ov-flt-cx-mat.h" #include "ov-typeinfo.h" #include "ops.h" #include "xdiv.h" @@ -179,6 +181,7 @@ INSTALL_CATOP (octave_scalar, octave_complex, s_cs); INSTALL_ASSIGNCONV (octave_scalar, octave_complex, octave_complex_matrix); + INSTALL_ASSIGNCONV (octave_float_scalar, octave_complex, octave_float_complex_matrix); } /* diff -r 45f5faba05a2 -r 82be108cc558 src/OPERATORS/op-s-m.cc --- a/src/OPERATORS/op-s-m.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/OPERATORS/op-s-m.cc Sun Apr 27 22:34:17 2008 +0200 @@ -29,7 +29,9 @@ #include "oct-obj.h" #include "ov.h" #include "ov-scalar.h" +#include "ov-float.h" #include "ov-re-mat.h" +#include "ov-flt-re-mat.h" #include "ov-typeinfo.h" #include "ops.h" #include "xdiv.h" @@ -129,6 +131,7 @@ INSTALL_CATOP (octave_scalar, octave_matrix, s_m); INSTALL_ASSIGNCONV (octave_scalar, octave_matrix, octave_matrix); + INSTALL_ASSIGNCONV (octave_float_scalar, octave_matrix, octave_float_matrix); INSTALL_WIDENOP (octave_scalar, octave_matrix, matrix_conv); } diff -r 45f5faba05a2 -r 82be108cc558 src/OPERATORS/op-s-s.cc --- a/src/OPERATORS/op-s-s.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/OPERATORS/op-s-s.cc Sun Apr 27 22:34:17 2008 +0200 @@ -29,7 +29,9 @@ #include "oct-obj.h" #include "ov.h" #include "ov-scalar.h" +#include "ov-float.h" #include "ov-re-mat.h" +#include "ov-flt-re-mat.h" #include "ov-typeinfo.h" #include "ops.h" #include "xdiv.h" @@ -118,6 +120,13 @@ DEFNDCATOP_FN (s_s, scalar, scalar, array, array, concat) +CONVDECL (scalar_to_float) +{ + CAST_CONV_ARG (const octave_scalar&); + + return new octave_float_matrix (FloatMatrix (1, 1, static_cast(v.double_value ()))); +} + void install_s_s_ops (void) { @@ -152,6 +161,9 @@ INSTALL_CATOP (octave_scalar, octave_scalar, s_s); INSTALL_ASSIGNCONV (octave_scalar, octave_scalar, octave_matrix); + INSTALL_ASSIGNCONV (octave_float_scalar, octave_scalar, octave_float_matrix); + + INSTALL_CONVOP (octave_scalar, octave_float_matrix, scalar_to_float); } /* diff -r 45f5faba05a2 -r 82be108cc558 src/OPERATORS/op-ui16-ui16.cc --- a/src/OPERATORS/op-ui16-ui16.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/OPERATORS/op-ui16-ui16.cc Sun Apr 27 22:34:17 2008 +0200 @@ -69,9 +69,13 @@ #include "ov-uint64.h" #include "ov-uint8.h" #include "ov-scalar.h" +#include "ov-float.h" #include "ov-complex.h" +#include "ov-flt-complex.h" #include "ov-re-mat.h" +#include "ov-flt-re-mat.h" #include "ov-cx-mat.h" +#include "ov-flt-cx-mat.h" #include "ov-typeinfo.h" #include "ops.h" #include "xdiv.h" diff -r 45f5faba05a2 -r 82be108cc558 src/OPERATORS/op-ui32-ui32.cc --- a/src/OPERATORS/op-ui32-ui32.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/OPERATORS/op-ui32-ui32.cc Sun Apr 27 22:34:17 2008 +0200 @@ -69,9 +69,13 @@ #include "ov-uint64.h" #include "ov-uint8.h" #include "ov-scalar.h" +#include "ov-float.h" #include "ov-complex.h" +#include "ov-flt-complex.h" #include "ov-re-mat.h" +#include "ov-flt-re-mat.h" #include "ov-cx-mat.h" +#include "ov-flt-cx-mat.h" #include "ov-typeinfo.h" #include "ops.h" #include "xdiv.h" diff -r 45f5faba05a2 -r 82be108cc558 src/OPERATORS/op-ui64-ui64.cc --- a/src/OPERATORS/op-ui64-ui64.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/OPERATORS/op-ui64-ui64.cc Sun Apr 27 22:34:17 2008 +0200 @@ -69,9 +69,13 @@ #include "ov-uint64.h" #include "ov-uint8.h" #include "ov-scalar.h" +#include "ov-float.h" #include "ov-complex.h" +#include "ov-flt-complex.h" #include "ov-re-mat.h" +#include "ov-flt-re-mat.h" #include "ov-cx-mat.h" +#include "ov-flt-cx-mat.h" #include "ov-typeinfo.h" #include "ops.h" #include "xdiv.h" diff -r 45f5faba05a2 -r 82be108cc558 src/OPERATORS/op-ui8-ui8.cc --- a/src/OPERATORS/op-ui8-ui8.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/OPERATORS/op-ui8-ui8.cc Sun Apr 27 22:34:17 2008 +0200 @@ -69,9 +69,13 @@ #include "ov-uint64.h" #include "ov-uint8.h" #include "ov-scalar.h" +#include "ov-float.h" #include "ov-complex.h" +#include "ov-flt-complex.h" #include "ov-re-mat.h" +#include "ov-flt-re-mat.h" #include "ov-cx-mat.h" +#include "ov-flt-cx-mat.h" #include "ov-typeinfo.h" #include "ops.h" #include "xdiv.h" diff -r 45f5faba05a2 -r 82be108cc558 src/bitfcns.cc --- a/src/bitfcns.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/bitfcns.cc Sun Apr 27 22:34:17 2008 +0200 @@ -309,6 +309,21 @@ return static_cast (a) & mask; } +static int64_t +bitshift (float a, int n, int64_t mask) +{ + // In the name of bug-for-bug compatibility. + if (a < 0) + return -bitshift (-a, n, mask); + + if (n > 0) + return (static_cast (a) << n) & mask; + else if (n < 0) + return (static_cast (a) >> -n) & mask; + else + return static_cast (a) & mask; +} + // Note that the bitshift operators are undefined if shifted by more // bits than in the type, so we need to test for the size of the // shift. diff -r 45f5faba05a2 -r 82be108cc558 src/data.cc --- a/src/data.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/data.cc Sun Apr 27 22:34:17 2008 +0200 @@ -51,8 +51,12 @@ #include "oct-map.h" #include "oct-obj.h" #include "ov.h" +#include "ov-float.h" #include "ov-complex.h" +#include "ov-flt-complex.h" #include "ov-cx-mat.h" +#include "ov-flt-cx-mat.h" +#include "ov-cx-sparse.h" #include "parse.h" #include "pt-mat.h" #include "utils.h" @@ -129,6 +133,7 @@ // These mapping functions may also be useful in other places, eh? typedef double (*d_dd_fcn) (double, double); +typedef float (*f_ff_fcn) (float, float); static NDArray map_d_m (d_dd_fcn f, double x, const NDArray& y) @@ -149,6 +154,25 @@ return retval; } +static FloatNDArray +map_f_fm (f_ff_fcn f, float x, const FloatNDArray& y) +{ + FloatNDArray retval (y.dims ()); + float *r_data = retval.fortran_vec (); + + const float *y_data = y.data (); + + octave_idx_type nel = y.numel (); + + for (octave_idx_type i = 0; i < nel; i++) + { + OCTAVE_QUIT; + r_data[i] = f (x, y_data[i]); + } + + return retval; +} + static NDArray map_m_d (d_dd_fcn f, const NDArray& x, double y) { @@ -168,6 +192,25 @@ return retval; } +static FloatNDArray +map_fm_f (f_ff_fcn f, const FloatNDArray& x, float y) +{ + FloatNDArray retval (x.dims ()); + float *r_data = retval.fortran_vec (); + + const float *x_data = x.data (); + + octave_idx_type nel = x.numel (); + + for (octave_idx_type i = 0; i < nel; i++) + { + OCTAVE_QUIT; + r_data[i] = f (x_data[i], y); + } + + return retval; +} + static NDArray map_m_m (d_dd_fcn f, const NDArray& x, const NDArray& y) { @@ -190,6 +233,28 @@ return retval; } +static FloatNDArray +map_fm_fm (f_ff_fcn f, const FloatNDArray& x, const FloatNDArray& y) +{ + assert (x.dims () == y.dims ()); + + FloatNDArray retval (x.dims ()); + float *r_data = retval.fortran_vec (); + + const float *x_data = x.data (); + const float *y_data = y.data (); + + octave_idx_type nel = x.numel (); + + for (octave_idx_type i = 0; i < nel; i++) + { + OCTAVE_QUIT; + r_data[i] = f (x_data[i], y_data[i]); + } + + return retval; +} + static SparseMatrix map_d_s (d_dd_fcn f, double x, const SparseMatrix& y) { @@ -438,29 +503,62 @@ bool y_is_scalar = y_dims.all_ones (); bool x_is_scalar = x_dims.all_ones (); + bool is_float = arg_y.is_single_type () || arg_x.is_single_type (); + if (y_is_scalar && x_is_scalar) { - double y = arg_y.double_value (); - - if (! error_state) + if (is_float) { - double x = arg_x.double_value (); + float y = arg_y.float_value (); if (! error_state) - retval = atan2 (y, x); + { + float x = arg_x.float_value (); + + if (! error_state) + retval = atan2f (y, x); + } + } + else + { + double y = arg_y.double_value (); + + if (! error_state) + { + double x = arg_x.double_value (); + + if (! error_state) + retval = atan2 (y, x); + } } } else if (y_is_scalar) { - double y = arg_y.double_value (); - - if (! error_state) + if (is_float) { - // Even if x is sparse return a full matrix here - NDArray x = arg_x.array_value (); + float y = arg_y.float_value (); if (! error_state) - retval = map_d_m (atan2, y, x); + { + // Even if x is sparse return a full matrix here + FloatNDArray x = arg_x.float_array_value (); + + if (! error_state) + retval = map_f_fm (atan2f, y, x); + } + } + else + { + double y = arg_y.double_value (); + + if (! error_state) + { + // Even if x is sparse return a full matrix here + NDArray x = arg_x.array_value (); + + if (! error_state) + retval = map_d_m (atan2, y, x); + } } } else if (x_is_scalar) @@ -477,6 +575,18 @@ retval = map_s_d (atan2, y, x); } } + else if (is_float) + { + FloatNDArray y = arg_y.float_array_value (); + + if (! error_state) + { + float x = arg_x.float_value (); + + if (! error_state) + retval = map_fm_f (atan2f, y, x); + } + } else { NDArray y = arg_y.array_value (); @@ -505,6 +615,18 @@ retval = map_s_s (atan2, y, x); } } + else if (is_float) + { + FloatNDArray y = arg_y.array_value (); + + if (! error_state) + { + FloatNDArray x = arg_x.array_value (); + + if (! error_state) + retval = map_fm_fm (atan2f, y, x); + } + } else { NDArray y = arg_y.array_value (); @@ -564,64 +686,135 @@ bool x_is_scalar = x_dims.all_ones (); bool y_is_scalar = y_dims.all_ones (); + bool is_float = arg_y.is_single_type () || arg_x.is_single_type (); + if (y_is_scalar && x_is_scalar) { - double x; - if (arg_x.is_complex_type ()) - x = abs (arg_x.complex_value ()); - else - x = arg_x.double_value (); - - if (! error_state) + if (is_float) { - double y; - if (arg_y.is_complex_type ()) - y = abs (arg_y.complex_value ()); + float x; + if (arg_x.is_complex_type ()) + x = abs (arg_x.float_complex_value ()); else - y = arg_y.double_value (); + x = arg_x.float_value (); if (! error_state) - retval = hypot (x, y); + { + float y; + if (arg_y.is_complex_type ()) + y = abs (arg_y.float_complex_value ()); + else + y = arg_y.float_value (); + + if (! error_state) + retval = hypotf (x, y); + } + } + else + { + double x; + if (arg_x.is_complex_type ()) + x = abs (arg_x.complex_value ()); + else + x = arg_x.double_value (); + + if (! error_state) + { + double y; + if (arg_y.is_complex_type ()) + y = abs (arg_y.complex_value ()); + else + y = arg_y.double_value (); + + if (! error_state) + retval = hypot (x, y); + } } } else if (y_is_scalar) { - NDArray x; - if (arg_x.is_complex_type ()) - x = arg_x.complex_array_value ().abs (); - else - x = arg_x.array_value (); - - if (! error_state) + if (is_float) { - double y; - if (arg_y.is_complex_type ()) - y = abs (arg_y.complex_value ()); + FloatNDArray x; + if (arg_x.is_complex_type ()) + x = arg_x.float_complex_array_value ().abs (); else - y = arg_y.double_value (); + x = arg_x.float_array_value (); if (! error_state) - retval = map_m_d (hypot, x, y); + { + float y; + if (arg_y.is_complex_type ()) + y = abs (arg_y.float_complex_value ()); + else + y = arg_y.float_value (); + + if (! error_state) + retval = map_fm_f (hypotf, x, y); + } + } + else + { + NDArray x; + if (arg_x.is_complex_type ()) + x = arg_x.complex_array_value ().abs (); + else + x = arg_x.array_value (); + + if (! error_state) + { + double y; + if (arg_y.is_complex_type ()) + y = abs (arg_y.complex_value ()); + else + y = arg_y.double_value (); + + if (! error_state) + retval = map_m_d (hypot, x, y); + } } } else if (x_is_scalar) { - double x; - if (arg_x.is_complex_type ()) - x = abs (arg_x.complex_value ()); - else - x = arg_x.double_value (); - - if (! error_state) + if (is_float) { - NDArray y; - if (arg_y.is_complex_type ()) - y = arg_y.complex_array_value ().abs (); + float x; + if (arg_x.is_complex_type ()) + x = abs (arg_x.float_complex_value ()); else - y = arg_y.array_value (); + x = arg_x.float_value (); if (! error_state) - retval = map_d_m (hypot, x, y); + { + FloatNDArray y; + if (arg_y.is_complex_type ()) + y = arg_y.float_complex_array_value ().abs (); + else + y = arg_y.float_array_value (); + + if (! error_state) + retval = map_f_fm (hypotf, x, y); + } + } + else + { + double x; + if (arg_x.is_complex_type ()) + x = abs (arg_x.complex_value ()); + else + x = arg_x.double_value (); + + if (! error_state) + { + NDArray y; + if (arg_y.is_complex_type ()) + y = arg_y.complex_array_value ().abs (); + else + y = arg_y.array_value (); + + if (! error_state) + retval = map_d_m (hypot, x, y); + } } } else if (y_dims == x_dims) @@ -646,6 +839,26 @@ retval = map_s_s (hypot, x, y); } } + else if (is_float) + { + FloatNDArray x; + if (arg_x.is_complex_type ()) + x = arg_x.float_complex_array_value ().abs (); + else + x = arg_x.float_array_value (); + + if (! error_state) + { + FloatNDArray y; + if (arg_y.is_complex_type ()) + y = arg_y.float_complex_array_value ().abs (); + else + y = arg_y.float_array_value (); + + if (! error_state) + retval = map_fm_fm (hypotf, x, y); + } + } else { NDArray x; @@ -684,6 +897,7 @@ %!assert (size (hypot (1, rand (2, 3, 4))), [2, 3, 4]) %!assert (size (hypot (1, 2)), [1, 1]) %!assert (hypot (1:10, 1:10), sqrt(2) * [1:10], 16*eps) +%!assert (hypot (single(1:10), single(1:10)), single(sqrt(2) * [1:10])); */ template @@ -717,6 +931,29 @@ { if (nargout < 2) retval(0) = args(0).log2 (); + else if (args(0).is_single_type ()) + { + if (args(0).is_real_type ()) + { + FloatNDArray f; + FloatNDArray x = args(0).float_array_value (); + // FIXME -- should E be an int value? + FloatMatrix e; + map_2_xlog2 (x, f, e); + retval (1) = e; + retval (0) = f; + } + else if (args(0).is_complex_type ()) + { + FloatComplexNDArray f; + FloatComplexNDArray x = args(0).float_complex_array_value (); + // FIXME -- should E be an int value? + FloatNDArray e; + map_2_xlog2 (x, f, e); + retval (1) = e; + retval (0) = f; + } + } else if (args(0).is_real_type ()) { NDArray f; @@ -787,37 +1024,69 @@ bool y_is_scalar = y_dims.all_ones (); bool x_is_scalar = x_dims.all_ones (); + bool is_float = arg_y.is_single_type () || arg_x.is_single_type (); + if (y_is_scalar && x_is_scalar) { - double y = arg_y.double_value (); - - if (! error_state) + if (is_float) { - double x = arg_x.double_value (); + float y = arg_y.float_value (); if (! error_state) - retval = fmod (x, y); + { + float x = arg_x.float_value (); + + if (! error_state) + retval = fmod (x, y); + } + } + else + { + double y = arg_y.double_value (); + + if (! error_state) + { + double x = arg_x.double_value (); + + if (! error_state) + retval = fmod (x, y); + } } } else if (y_is_scalar) { - double y = arg_y.double_value (); - - if (! error_state) + if (is_float) { - if (arg_x.is_sparse_type ()) + float y = arg_y.float_value (); + + if (! error_state) { - SparseMatrix x = arg_x.sparse_matrix_value (); + FloatNDArray x = arg_x.float_array_value (); if (! error_state) - retval = map_s_d (fmod, x, y); + retval = map_fm_f (fmodf, x, y); } - else + } + else + { + double y = arg_y.double_value (); + + if (! error_state) { - NDArray x = arg_x.array_value (); - - if (! error_state) - retval = map_m_d (fmod, x, y); + if (arg_x.is_sparse_type ()) + { + SparseMatrix x = arg_x.sparse_matrix_value (); + + if (! error_state) + retval = map_s_d (fmod, x, y); + } + else + { + NDArray x = arg_x.array_value (); + + if (! error_state) + retval = map_m_d (fmod, x, y); + } } } } @@ -835,6 +1104,18 @@ retval = map_d_s (fmod, x, y); } } + else if (is_float) + { + FloatNDArray y = arg_y.float_array_value (); + + if (! error_state) + { + float x = arg_x.float_value (); + + if (! error_state) + retval = map_f_fm (fmodf, x, y); + } + } else { NDArray y = arg_y.array_value (); @@ -862,6 +1143,18 @@ retval = map_s_s (fmod, x, y); } } + else if (is_float) + { + FloatNDArray y = arg_y.float_array_value (); + + if (! error_state) + { + FloatNDArray x = arg_x.float_array_value (); + + if (! error_state) + retval = map_fm_fm (fmodf, x, y); + } + } else { NDArray y = arg_y.array_value (); @@ -892,6 +1185,8 @@ %!assert (size (fmod (1, 2)), [1, 1]) */ +// FIXME Need to convert the reduction functions of this file for single precision + #define NATIVE_REDUCTION_1(FCN, TYPE, DIM) \ (arg.is_ ## TYPE ## _type ()) \ { \ @@ -967,6 +1262,24 @@ { \ error (#FCN, ": invalid char type"); \ } \ + else if (arg.is_single_type ()) \ + { \ + if (arg.is_complex_type ()) \ + { \ + FloatComplexNDArray tmp = \ + arg.float_complex_array_value (); \ + \ + if (! error_state) \ + retval = tmp.FCN (dim); \ + } \ + else if (arg.is_real_type ()) \ + { \ + FloatNDArray tmp = arg.float_array_value (); \ + \ + if (! error_state) \ + retval = tmp.FCN (dim); \ + } \ + } \ else if (arg.is_complex_type ()) \ { \ ComplexNDArray tmp = arg.complex_array_value (); \ @@ -987,6 +1300,24 @@ return retval; \ } \ } \ + else if (arg.is_single_type ()) \ + { \ + if (arg.is_real_type ()) \ + { \ + FloatNDArray tmp = arg.float_array_value (); \ + \ + if (! error_state) \ + retval = tmp.FCN (dim); \ + } \ + else if (arg.is_complex_type ()) \ + { \ + FloatComplexNDArray tmp = \ + arg.float_complex_array_value (); \ + \ + if (! error_state) \ + retval = tmp.FCN (dim); \ + } \ + } \ else if (arg.is_real_type ()) \ { \ NDArray tmp = arg.array_value (); \ @@ -1043,6 +1374,13 @@ if (! error_state) \ retval = tmp.FCN (dim); \ } \ + else if (arg.is_single_type ()) \ + { \ + FloatNDArray tmp = arg.float_array_value (); \ + \ + if (! error_state) \ + retval = tmp.FCN (dim); \ + } \ else \ { \ NDArray tmp = arg.array_value (); \ @@ -1060,6 +1398,13 @@ if (! error_state) \ retval = tmp.FCN (dim); \ } \ + else if (arg.is_single_type ()) \ + { \ + FloatComplexNDArray tmp = arg.float_complex_array_value (); \ + \ + if (! error_state) \ + retval = tmp.FCN (dim); \ + } \ else \ { \ ComplexNDArray tmp = arg.complex_array_value (); \ @@ -1850,19 +2195,46 @@ retval = arg; else { - if (arg.numel () == 1) + if (arg.is_sparse_type ()) { - Complex val = arg.complex_value (); + SparseComplexMatrix val = arg.sparse_complex_matrix_value (); if (! error_state) - retval = octave_value (new octave_complex (val)); + retval = octave_value (new octave_sparse_complex_matrix (val)); + } + else if (arg.is_single_type ()) + { + if (arg.numel () == 1) + { + FloatComplex val = arg.float_complex_value (); + + if (! error_state) + retval = octave_value (new octave_float_complex (val)); + } + else + { + FloatComplexNDArray val = arg.float_complex_array_value (); + + if (! error_state) + retval = octave_value (new octave_float_complex_matrix (val)); + } } else { - ComplexNDArray val = arg.complex_array_value (); - - if (! error_state) - retval = octave_value (new octave_complex_matrix (val)); + if (arg.numel () == 1) + { + Complex val = arg.complex_value (); + + if (! error_state) + retval = octave_value (new octave_complex (val)); + } + else + { + ComplexNDArray val = arg.complex_array_value (); + + if (! error_state) + retval = octave_value (new octave_complex_matrix (val)); + } } if (error_state) @@ -1874,7 +2246,140 @@ octave_value re = args(0); octave_value im = args(1); - if (re.numel () == 1) + if (re.is_sparse_type () && im.is_sparse_type ()) + { + const SparseMatrix re_val = re.sparse_matrix_value (); + const SparseMatrix im_val = im.sparse_matrix_value (); + + if (!error_state) + { + if (re.numel () == 1) + { + SparseComplexMatrix result; + if (re_val.nnz () == 0) + result = Complex(0, 1) * SparseComplexMatrix (im_val); + else + { + result = SparseComplexMatrix (im_val.dims (), re_val (0)); + octave_idx_type nr = im_val.rows (); + octave_idx_type nc = im_val.cols (); + + for (octave_idx_type j = 0; j < nc; j++) + { + octave_idx_type off = j * nr; + for (octave_idx_type i = im_val.cidx(j); + i < im_val.cidx(j + 1); i++) + result.data (im_val.ridx(i) + off) = + result.data (im_val.ridx(i) + off) + + Complex (0, im_val.data (i)); + } + } + retval = octave_value (new octave_sparse_complex_matrix (result)); + } + else if (im.numel () == 1) + { + SparseComplexMatrix result; + if (im_val.nnz () == 0) + result = SparseComplexMatrix (re_val); + else + { + result = SparseComplexMatrix (re_val.rows(), re_val.cols(), Complex(0, im_val (0))); + octave_idx_type nr = re_val.rows (); + octave_idx_type nc = re_val.cols (); + + for (octave_idx_type j = 0; j < nc; j++) + { + octave_idx_type off = j * nr; + for (octave_idx_type i = re_val.cidx(j); + i < re_val.cidx(j + 1); i++) + result.data (re_val.ridx(i) + off) = + result.data (re_val.ridx(i) + off) + + re_val.data (i); + } + } + retval = octave_value (new octave_sparse_complex_matrix (result)); + } + else + { + if (re_val.dims () == im_val.dims ()) + { + SparseComplexMatrix result = SparseComplexMatrix(re_val) + + Complex(0, 1) * SparseComplexMatrix (im_val); + retval = octave_value (new octave_sparse_complex_matrix (result)); + } + else + error ("complex: dimension mismatch"); + } + } + } + else if (re.is_single_type () || im.is_single_type ()) + { + if (re.numel () == 1) + { + float re_val = re.float_value (); + + if (im.numel () == 1) + { + float im_val = im.double_value (); + + if (! error_state) + retval = octave_value (new octave_float_complex (FloatComplex (re_val, im_val))); + } + else + { + const FloatNDArray im_val = im.float_array_value (); + + if (! error_state) + { + FloatComplexNDArray result (im_val.dims (), FloatComplex ()); + + for (octave_idx_type i = 0; i < im_val.numel (); i++) + result.xelem (i) = FloatComplex (re_val, im_val(i)); + + retval = octave_value (new octave_float_complex_matrix (result)); + } + } + } + else + { + const FloatNDArray re_val = re.float_array_value (); + + if (im.numel () == 1) + { + float im_val = im.float_value (); + + if (! error_state) + { + FloatComplexNDArray result (re_val.dims (), FloatComplex ()); + + for (octave_idx_type i = 0; i < re_val.numel (); i++) + result.xelem (i) = FloatComplex (re_val(i), im_val); + + retval = octave_value (new octave_float_complex_matrix (result)); + } + } + else + { + const FloatNDArray im_val = im.float_array_value (); + + if (! error_state) + { + if (re_val.dims () == im_val.dims ()) + { + FloatComplexNDArray result (re_val.dims (), FloatComplex ()); + + for (octave_idx_type i = 0; i < re_val.numel (); i++) + result.xelem (i) = FloatComplex (re_val(i), im_val(i)); + + retval = octave_value (new octave_float_complex_matrix (result)); + } + else + error ("complex: dimension mismatch"); + } + } + } + } + else if (re.numel () == 1) { double re_val = re.double_value (); @@ -2134,7 +2639,10 @@ retval = uint64NDArray (dims, val); break; - case oct_data_conv::dt_single: // FIXME + case oct_data_conv::dt_single: + retval = FloatNDArray (dims, val); + break; + case oct_data_conv::dt_double: retval = NDArray (dims, val); break; @@ -2215,7 +2723,10 @@ { switch (dt) { - case oct_data_conv::dt_single: // FIXME + case oct_data_conv::dt_single: + retval = FloatNDArray (dims, val); + break; + case oct_data_conv::dt_double: retval = NDArray (dims, val); break; @@ -2293,7 +2804,10 @@ { switch (dt) { - case oct_data_conv::dt_single: // FIXME + case oct_data_conv::dt_single: + retval = FloatComplexNDArray (dims, static_cast (val)); + break; + case oct_data_conv::dt_double: retval = ComplexNDArray (dims, val); break; @@ -2692,6 +3206,7 @@ INSTANTIATE_EYE (uint32NDArray); INSTANTIATE_EYE (int64NDArray); INSTANTIATE_EYE (uint64NDArray); +INSTANTIATE_EYE (FloatNDArray); INSTANTIATE_EYE (NDArray); INSTANTIATE_EYE (boolNDArray); @@ -2740,7 +3255,10 @@ retval = identity_matrix (nr, nc); break; - case oct_data_conv::dt_single: // FIXME + case oct_data_conv::dt_single: + retval = identity_matrix (nr, nc); + break; + case oct_data_conv::dt_double: retval = identity_matrix (nr, nc); break; @@ -2894,30 +3412,62 @@ octave_value arg_1 = args(0); octave_value arg_2 = args(1); - if (arg_1.is_complex_type () || arg_2.is_complex_type ()) + if (arg_1.is_single_type () || arg_2.is_single_type ()) { - Complex x1 = arg_1.complex_value (); - Complex x2 = arg_2.complex_value (); - - if (! error_state) + if (arg_1.is_complex_type () || arg_2.is_complex_type ()) { - ComplexRowVector rv = linspace (x1, x2, npoints); + FloatComplex x1 = arg_1.float_complex_value (); + FloatComplex x2 = arg_2.float_complex_value (); if (! error_state) - retval = rv; + { + FloatComplexRowVector rv = linspace (x1, x2, npoints); + + if (! error_state) + retval = rv; + } + } + else + { + float x1 = arg_1.float_value (); + float x2 = arg_2.float_value (); + + if (! error_state) + { + FloatRowVector rv = linspace (x1, x2, npoints); + + if (! error_state) + retval = rv; + } } } else { - double x1 = arg_1.double_value (); - double x2 = arg_2.double_value (); - - if (! error_state) + if (arg_1.is_complex_type () || arg_2.is_complex_type ()) { - RowVector rv = linspace (x1, x2, npoints); + Complex x1 = arg_1.complex_value (); + Complex x2 = arg_2.complex_value (); if (! error_state) - retval = rv; + { + ComplexRowVector rv = linspace (x1, x2, npoints); + + if (! error_state) + retval = rv; + } + } + else + { + double x1 = arg_1.double_value (); + double x2 = arg_2.double_value (); + + if (! error_state) + { + RowVector rv = linspace (x1, x2, npoints); + + if (! error_state) + retval = rv; + } } } } diff -r 45f5faba05a2 -r 82be108cc558 src/oct-stream.cc --- a/src/oct-stream.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/oct-stream.cc Sun Apr 27 22:34:17 2008 +0200 @@ -3219,7 +3219,7 @@ INSTANTIATE_DO_READ (uint32NDArray); INSTANTIATE_DO_READ (int64NDArray); INSTANTIATE_DO_READ (uint64NDArray); -// INSTANTIATE_DO_READ (floatNDArray); +INSTANTIATE_DO_READ (FloatNDArray); INSTANTIATE_DO_READ (NDArray); INSTANTIATE_DO_READ (charNDArray); INSTANTIATE_DO_READ (boolNDArray); @@ -3270,10 +3270,7 @@ FILL_TABLE_ROW (oct_data_conv::dt_uint32, uint32NDArray); FILL_TABLE_ROW (oct_data_conv::dt_int64, int64NDArray); FILL_TABLE_ROW (oct_data_conv::dt_uint64, uint64NDArray); - // FIXME -- the following line allows things like int8=>single - // to work, but they will actually return a double value. We - // need a floatNDArray for this to work properly. - FILL_TABLE_ROW (oct_data_conv::dt_single, NDArray); + FILL_TABLE_ROW (oct_data_conv::dt_single, FloatNDArray); FILL_TABLE_ROW (oct_data_conv::dt_double, NDArray); FILL_TABLE_ROW (oct_data_conv::dt_char, charNDArray); FILL_TABLE_ROW (oct_data_conv::dt_schar, charNDArray); @@ -3610,6 +3607,11 @@ octave_idx_type, oct_mach_info::float_format); template octave_idx_type +octave_stream::write (const Array&, octave_idx_type, + oct_data_conv::data_type, + octave_idx_type, oct_mach_info::float_format); + +template octave_idx_type octave_stream::write (const Array&, octave_idx_type, oct_data_conv::data_type, octave_idx_type, oct_mach_info::float_format); diff -r 45f5faba05a2 -r 82be108cc558 src/ov-base.cc --- a/src/ov-base.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/ov-base.cc Sun Apr 27 22:34:17 2008 +0200 @@ -426,6 +426,14 @@ return retval; } +float +octave_base_value::float_value (bool) const +{ + float retval = lo_ieee_float_nan_value (); + gripe_wrong_type_arg ("octave_base_value::float_value ()", type_name ()); + return retval; +} + Cell octave_base_value::cell_value () const { @@ -442,14 +450,30 @@ return retval; } +FloatMatrix +octave_base_value::float_matrix_value (bool) const +{ + FloatMatrix retval; + gripe_wrong_type_arg ("octave_base_value::float_matrix_value()", type_name ()); + return retval; +} + NDArray octave_base_value::array_value (bool) const { - NDArray retval; + FloatNDArray retval; gripe_wrong_type_arg ("octave_base_value::array_value()", type_name ()); return retval; } +FloatNDArray +octave_base_value::float_array_value (bool) const +{ + FloatNDArray retval; + gripe_wrong_type_arg ("octave_base_value::float_array_value()", type_name ()); + return retval; +} + Complex octave_base_value::complex_value (bool) const { @@ -459,6 +483,15 @@ return retval; } +FloatComplex +octave_base_value::float_complex_value (bool) const +{ + float tmp = lo_ieee_float_nan_value (); + FloatComplex retval (tmp, tmp); + gripe_wrong_type_arg ("octave_base_value::float_complex_value()", type_name ()); + return retval; +} + ComplexMatrix octave_base_value::complex_matrix_value (bool) const { @@ -468,6 +501,15 @@ return retval; } +FloatComplexMatrix +octave_base_value::float_complex_matrix_value (bool) const +{ + FloatComplexMatrix retval; + gripe_wrong_type_arg ("octave_base_value::float_complex_matrix_value()", + type_name ()); + return retval; +} + ComplexNDArray octave_base_value::complex_array_value (bool) const { @@ -477,6 +519,15 @@ return retval; } +FloatComplexNDArray +octave_base_value::float_complex_array_value (bool) const +{ + FloatComplexNDArray retval; + gripe_wrong_type_arg ("octave_base_value::float_complex_array_value()", + type_name ()); + return retval; +} + bool octave_base_value::bool_value (bool) const { diff -r 45f5faba05a2 -r 82be108cc558 src/ov-base.h --- a/src/ov-base.h Wed May 14 18:09:56 2008 +0200 +++ b/src/ov-base.h Sun Apr 27 22:34:17 2008 +0200 @@ -126,6 +126,10 @@ numeric_conversion_function (void) const { return static_cast (0); } + virtual type_conv_fcn + numeric_demotion_function (void) const + { return static_cast (0); } + virtual octave_value squeeze (void) const; virtual octave_base_value *try_narrowing_conversion (void) { return 0; } @@ -322,21 +326,36 @@ virtual double double_value (bool = false) const; + virtual float float_value (bool = false) const; + virtual double scalar_value (bool frc_str_conv = false) const { return double_value (frc_str_conv); } + virtual float float_scalar_value (bool frc_str_conv = false) const + { return float_value (frc_str_conv); } + virtual Cell cell_value (void) const; virtual Matrix matrix_value (bool = false) const; + virtual FloatMatrix float_matrix_value (bool = false) const; + virtual NDArray array_value (bool = false) const; + virtual FloatNDArray float_array_value (bool = false) const; + virtual Complex complex_value (bool = false) const; + virtual FloatComplex float_complex_value (bool = false) const; + virtual ComplexMatrix complex_matrix_value (bool = false) const; + virtual FloatComplexMatrix float_complex_matrix_value (bool = false) const; + virtual ComplexNDArray complex_array_value (bool = false) const; + virtual FloatComplexNDArray float_complex_array_value (bool = false) const; + virtual bool bool_value (bool = false) const; virtual boolMatrix bool_matrix_value (bool = false) const; diff -r 45f5faba05a2 -r 82be108cc558 src/ov-bool-mat.cc --- a/src/ov-bool-mat.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/ov-bool-mat.cc Sun Apr 27 22:34:17 2008 +0200 @@ -112,6 +112,24 @@ return retval; } +float +octave_bool_matrix::float_value (bool) const +{ + float retval = lo_ieee_float_nan_value (); + + if (rows () > 0 && columns () > 0) + { + gripe_implicit_conversion ("Octave:array-as-scalar", + "bool matrix", "real scalar"); + + retval = matrix (0, 0); + } + else + gripe_invalid_conversion ("bool matrix", "real scalar"); + + return retval; +} + Complex octave_bool_matrix::complex_value (bool) const { @@ -132,6 +150,26 @@ return retval; } +FloatComplex +octave_bool_matrix::float_complex_value (bool) const +{ + float tmp = lo_ieee_float_nan_value (); + + FloatComplex retval (tmp, tmp); + + if (rows () > 0 && columns () > 0) + { + gripe_implicit_conversion ("Octave:array-as-scalar", + "bool matrix", "complex scalar"); + + retval = matrix (0, 0); + } + else + gripe_invalid_conversion ("bool matrix", "complex scalar"); + + return retval; +} + octave_value octave_bool_matrix::convert_to_str_internal (bool pad, bool force, char type) const diff -r 45f5faba05a2 -r 82be108cc558 src/ov-bool-mat.h --- a/src/ov-bool-mat.h Wed May 14 18:09:56 2008 +0200 +++ b/src/ov-bool-mat.h Sun Apr 27 22:34:17 2008 +0200 @@ -113,23 +113,39 @@ double double_value (bool = false) const; + float float_value (bool = false) const; + double scalar_value (bool frc_str_conv = false) const { return double_value (frc_str_conv); } Matrix matrix_value (bool = false) const { return Matrix (matrix.matrix_value ()); } + FloatMatrix float_matrix_value (bool = false) const + { return FloatMatrix (matrix.matrix_value ()); } + NDArray array_value (bool = false) const { return NDArray (matrix); } + FloatNDArray float_array_value (bool = false) const + { return FloatNDArray (matrix); } + Complex complex_value (bool = false) const; + FloatComplex float_complex_value (bool = false) const; + ComplexMatrix complex_matrix_value (bool = false) const { return ComplexMatrix (matrix.matrix_value ( )); } + FloatComplexMatrix float_complex_matrix_value (bool = false) const + { return FloatComplexMatrix (matrix.matrix_value ( )); } + ComplexNDArray complex_array_value (bool = false) const { return ComplexNDArray (matrix); } + FloatComplexNDArray float_complex_array_value (bool = false) const + { return FloatComplexNDArray (matrix); } + charNDArray char_array_value (bool = false) const { diff -r 45f5faba05a2 -r 82be108cc558 src/ov-bool.h --- a/src/ov-bool.h Wed May 14 18:09:56 2008 +0200 +++ b/src/ov-bool.h Sun Apr 27 22:34:17 2008 +0200 @@ -121,22 +121,40 @@ double double_value (bool = false) const { return scalar; } + float float_value (bool = false) const { return scalar; } + double scalar_value (bool = false) const { return scalar; } + float float_scalar_value (bool = false) const { return scalar; } + Matrix matrix_value (bool = false) const { return Matrix (1, 1, scalar); } + FloatMatrix float_matrix_value (bool = false) const + { return FloatMatrix (1, 1, scalar); } + NDArray array_value (bool = false) const { return NDArray (dim_vector (1, 1), scalar); } + FloatNDArray float_array_value (bool = false) const + { return FloatNDArray (dim_vector (1, 1), scalar); } + Complex complex_value (bool = false) const { return scalar; } + FloatComplex float_complex_value (bool = false) const { return scalar; } + ComplexMatrix complex_matrix_value (bool = false) const - { return ComplexMatrix (1, 1, Complex (scalar)); } + { return ComplexMatrix (1, 1, Complex (scalar)); } + + FloatComplexMatrix float_complex_matrix_value (bool = false) const + { return FloatComplexMatrix (1, 1, FloatComplex (scalar)); } ComplexNDArray complex_array_value (bool = false) const { return ComplexNDArray (dim_vector (1, 1), Complex (scalar)); } + FloatComplexNDArray float_complex_array_value (bool = false) const + { return FloatComplexNDArray (dim_vector (1, 1), FloatComplex (scalar)); } + SparseMatrix sparse_matrix_value (bool = false) const { return SparseMatrix (Matrix (1, 1, scalar)); } diff -r 45f5faba05a2 -r 82be108cc558 src/ov-ch-mat.cc --- a/src/ov-ch-mat.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/ov-ch-mat.cc Sun Apr 27 22:34:17 2008 +0200 @@ -80,6 +80,24 @@ return retval; } +float +octave_char_matrix::float_value (bool) const +{ + float retval = lo_ieee_float_nan_value (); + + if (rows () > 0 && columns () > 0) + { + gripe_implicit_conversion ("Octave:array-as-scalar", + "character matrix", "real scalar"); + + retval = matrix (0, 0); + } + else + gripe_invalid_conversion ("character matrix", "real scalar"); + + return retval; +} + Complex octave_char_matrix::complex_value (bool) const { @@ -100,6 +118,26 @@ return retval; } +FloatComplex +octave_char_matrix::float_complex_value (bool) const +{ + float tmp = lo_ieee_float_nan_value (); + + FloatComplex retval (tmp, tmp); + + if (rows () > 0 && columns () > 0) + { + gripe_implicit_conversion ("Octave:array-as-scalar", + "character matrix", "complex scalar"); + + retval = matrix (0, 0); + } + else + gripe_invalid_conversion ("character matrix", "complex scalar"); + + return retval; +} + void octave_char_matrix::print_raw (std::ostream& os, bool pr_as_read_syntax) const diff -r 45f5faba05a2 -r 82be108cc558 src/ov-ch-mat.h --- a/src/ov-ch-mat.h Wed May 14 18:09:56 2008 +0200 +++ b/src/ov-ch-mat.h Sun Apr 27 22:34:17 2008 +0200 @@ -91,23 +91,42 @@ double double_value (bool = false) const; + float float_value (bool = false) const; + double scalar_value (bool frc_str_conv = false) const { return double_value (frc_str_conv); } + float float_scalar_value (bool frc_str_conv = false) const + { return float_value (frc_str_conv); } + Matrix matrix_value (bool = false) const { return Matrix (matrix.matrix_value ()); } + FloatMatrix float_matrix_value (bool = false) const + { return FloatMatrix (matrix.matrix_value ()); } + NDArray array_value (bool = false) const { return NDArray (matrix); } + FloatNDArray float_array_value (bool = false) const + { return FloatNDArray (matrix); } + Complex complex_value (bool = false) const; + FloatComplex float_complex_value (bool = false) const; + ComplexMatrix complex_matrix_value (bool = false) const { return ComplexMatrix (matrix.matrix_value ()); } + FloatComplexMatrix float_complex_matrix_value (bool = false) const + { return FloatComplexMatrix (matrix.matrix_value ()); } + ComplexNDArray complex_array_value (bool = false) const { return ComplexNDArray (matrix); } + FloatComplexNDArray float_complex_array_value (bool = false) const + { return FloatComplexNDArray (matrix); } + charMatrix char_matrix_value (bool = false) const { return matrix.matrix_value (); } diff -r 45f5faba05a2 -r 82be108cc558 src/ov-complex.cc --- a/src/ov-complex.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/ov-complex.cc Sun Apr 27 22:34:17 2008 +0200 @@ -35,6 +35,7 @@ #include "oct-stream.h" #include "ops.h" #include "ov-complex.h" +#include "ov-flt-complex.h" #include "ov-base.h" #include "ov-base-scalar.h" #include "ov-base-scalar.cc" @@ -42,6 +43,7 @@ #include "ov-scalar.h" #include "gripes.h" #include "pr-output.h" +#include "ops.h" #include "ls-oct-ascii.h" #include "ls-hdf5.h" @@ -53,6 +55,20 @@ DEFINE_OV_TYPEID_FUNCTIONS_AND_DATA (octave_complex, "complex scalar", "double"); +static octave_base_value * +default_numeric_demotion_function (const octave_base_value& a) +{ + CAST_CONV_ARG (const octave_complex&); + + return new octave_float_complex (v.float_complex_value ()); +} + +octave_base_value::type_conv_fcn +octave_complex::numeric_demotion_function (void) const +{ + return default_numeric_demotion_function; +} + octave_base_value * octave_complex::try_narrowing_conversion (void) { @@ -107,6 +123,20 @@ return retval; } +float +octave_complex::float_value (bool force_conversion) const +{ + float retval = lo_ieee_float_nan_value (); + + if (! force_conversion) + gripe_implicit_conversion ("Octave:imag-to-real", + "complex scalar", "real scalar"); + + retval = std::real (scalar); + + return retval; +} + Matrix octave_complex::matrix_value (bool force_conversion) const { @@ -121,6 +151,20 @@ return retval; } +FloatMatrix +octave_complex::float_matrix_value (bool force_conversion) const +{ + FloatMatrix retval; + + if (! force_conversion) + gripe_implicit_conversion ("Octave:imag-to-real", + "complex scalar", "real matrix"); + + retval = FloatMatrix (1, 1, std::real (scalar)); + + return retval; +} + NDArray octave_complex::array_value (bool force_conversion) const { @@ -135,12 +179,31 @@ return retval; } +FloatNDArray +octave_complex::float_array_value (bool force_conversion) const +{ + FloatNDArray retval; + + if (! force_conversion) + gripe_implicit_conversion ("Octave:imag-to-real", + "complex scalar", "real matrix"); + + retval = FloatNDArray (dim_vector (1, 1), std::real (scalar)); + + return retval; +} + Complex octave_complex::complex_value (bool) const { return scalar; } +FloatComplex +octave_complex::float_complex_value (bool) const +{ + return static_cast (scalar); +} ComplexMatrix octave_complex::complex_matrix_value (bool) const @@ -148,12 +211,24 @@ return ComplexMatrix (1, 1, scalar); } +FloatComplexMatrix +octave_complex::float_complex_matrix_value (bool) const +{ + return FloatComplexMatrix (1, 1, static_cast (scalar)); +} + ComplexNDArray octave_complex::complex_array_value (bool /* force_conversion */) const { return ComplexNDArray (dim_vector (1, 1), scalar); } +FloatComplexNDArray +octave_complex::float_complex_array_value (bool /* force_conversion */) const +{ + return FloatComplexNDArray (dim_vector (1, 1), static_cast (scalar)); +} + octave_value octave_complex::resize (const dim_vector& dv, bool fill) const { diff -r 45f5faba05a2 -r 82be108cc558 src/ov-complex.h --- a/src/ov-complex.h Wed May 14 18:09:56 2008 +0200 +++ b/src/ov-complex.h Sun Apr 27 22:34:17 2008 +0200 @@ -73,6 +73,8 @@ octave_base_value *empty_clone (void) const { return new octave_complex_matrix (); } + type_conv_fcn numeric_demotion_function (void) const; + octave_base_value *try_narrowing_conversion (void); octave_value do_index_op (const octave_value_list& idx, @@ -99,13 +101,22 @@ double double_value (bool = false) const; + float float_value (bool = false) const; + double scalar_value (bool frc_str_conv = false) const { return double_value (frc_str_conv); } + float float_scalar_value (bool frc_str_conv = false) const + { return float_value (frc_str_conv); } + Matrix matrix_value (bool = false) const; + FloatMatrix float_matrix_value (bool = false) const; + NDArray array_value (bool = false) const; + FloatNDArray float_array_value (bool = false) const; + SparseMatrix sparse_matrix_value (bool = false) const { return SparseMatrix (matrix_value ()); } @@ -116,10 +127,16 @@ Complex complex_value (bool = false) const; + FloatComplex float_complex_value (bool = false) const; + ComplexMatrix complex_matrix_value (bool = false) const; + FloatComplexMatrix float_complex_matrix_value (bool = false) const; + ComplexNDArray complex_array_value (bool = false) const; + FloatComplexNDArray float_complex_array_value (bool = false) const; + void increment (void) { scalar += 1.0; } void decrement (void) { scalar -= 1.0; } diff -r 45f5faba05a2 -r 82be108cc558 src/ov-cx-mat.cc --- a/src/ov-cx-mat.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/ov-cx-mat.cc Sun Apr 27 22:34:17 2008 +0200 @@ -44,6 +44,7 @@ #include "ov-base-mat.cc" #include "ov-complex.h" #include "ov-cx-mat.h" +#include "ov-flt-cx-mat.h" #include "ov-re-mat.h" #include "ov-scalar.h" #include "pr-output.h" @@ -60,6 +61,20 @@ DEFINE_OV_TYPEID_FUNCTIONS_AND_DATA (octave_complex_matrix, "complex matrix", "double"); +static octave_base_value * +default_numeric_demotion_function (const octave_base_value& a) +{ + CAST_CONV_ARG (const octave_complex_matrix&); + + return new octave_float_complex_matrix (v.float_complex_matrix_value ()); +} + +octave_base_value::type_conv_fcn +octave_complex_matrix::numeric_demotion_function (void) const +{ + return default_numeric_demotion_function; +} + octave_base_value * octave_complex_matrix::try_narrowing_conversion (void) { @@ -142,6 +157,28 @@ return retval; } +float +octave_complex_matrix::float_value (bool force_conversion) const +{ + float retval = lo_ieee_float_nan_value (); + + if (! force_conversion) + gripe_implicit_conversion ("Octave:imag-to-real", + "complex matrix", "real scalar"); + + if (rows () > 0 && columns () > 0) + { + gripe_implicit_conversion ("Octave:array-as-scalar", + "complex matrix", "real scalar"); + + retval = std::real (matrix (0, 0)); + } + else + gripe_invalid_conversion ("complex matrix", "real scalar"); + + return retval; +} + Matrix octave_complex_matrix::matrix_value (bool force_conversion) const { @@ -156,6 +193,20 @@ return retval; } +FloatMatrix +octave_complex_matrix::float_matrix_value (bool force_conversion) const +{ + FloatMatrix retval; + + if (! force_conversion) + gripe_implicit_conversion ("Octave:imag-to-real", + "complex matrix", "real matrix"); + + retval = ::real (matrix.matrix_value ()); + + return retval; +} + Complex octave_complex_matrix::complex_value (bool) const { @@ -176,12 +227,38 @@ return retval; } +FloatComplex +octave_complex_matrix::float_complex_value (bool) const +{ + float tmp = lo_ieee_float_nan_value (); + + FloatComplex retval (tmp, tmp); + + if (rows () > 0 && columns () > 0) + { + gripe_implicit_conversion ("Octave:array-as-scalar", + "complex matrix", "complex scalar"); + + retval = matrix (0, 0); + } + else + gripe_invalid_conversion ("complex matrix", "complex scalar"); + + return retval; +} + ComplexMatrix octave_complex_matrix::complex_matrix_value (bool) const { return matrix.matrix_value (); } +FloatComplexMatrix +octave_complex_matrix::float_complex_matrix_value (bool) const +{ + return FloatComplexMatrix (matrix.matrix_value ()); +} + charNDArray octave_complex_matrix::char_array_value (bool frc_str_conv) const { @@ -202,6 +279,12 @@ return retval; } +FloatComplexNDArray +octave_complex_matrix::float_complex_array_value (bool) const +{ + return FloatComplexNDArray (matrix); +} + SparseMatrix octave_complex_matrix::sparse_matrix_value (bool force_conversion) const { diff -r 45f5faba05a2 -r 82be108cc558 src/ov-cx-mat.h --- a/src/ov-cx-mat.h Wed May 14 18:09:56 2008 +0200 +++ b/src/ov-cx-mat.h Sun Apr 27 22:34:17 2008 +0200 @@ -86,6 +86,8 @@ octave_base_value *clone (void) const { return new octave_complex_matrix (*this); } octave_base_value *empty_clone (void) const { return new octave_complex_matrix (); } + type_conv_fcn numeric_demotion_function (void) const; + octave_base_value *try_narrowing_conversion (void); void assign (const octave_value_list& idx, const ComplexNDArray& rhs); @@ -104,17 +106,30 @@ double double_value (bool = false) const; + float float_value (bool = false) const; + double scalar_value (bool frc_str_conv = false) const { return double_value (frc_str_conv); } + float float_scalar_value (bool frc_str_conv = false) const + { return float_value (frc_str_conv); } + Matrix matrix_value (bool = false) const; + FloatMatrix float_matrix_value (bool = false) const; + Complex complex_value (bool = false) const; + FloatComplex float_complex_value (bool = false) const; + ComplexMatrix complex_matrix_value (bool = false) const; + FloatComplexMatrix float_complex_matrix_value (bool = false) const; + ComplexNDArray complex_array_value (bool = false) const { return matrix; } + FloatComplexNDArray float_complex_array_value (bool = false) const; + charNDArray char_array_value (bool frc_str_conv = false) const; SparseMatrix sparse_matrix_value (bool = false) const; diff -r 45f5faba05a2 -r 82be108cc558 src/ov-float.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/ov-float.cc Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,356 @@ +/* + +Copyright (C) 1996, 1997, 1998, 2000, 2002, 2003, 2004, 2005, 2006, + 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +#include "data-conv.h" +#include "mach-info.h" +#include "lo-specfun.h" +#include "lo-mappers.h" + +#include "defun.h" +#include "gripes.h" +#include "oct-obj.h" +#include "oct-stream.h" +#include "ov-scalar.h" +#include "ov-float.h" +#include "ov-base.h" +#include "ov-base-scalar.h" +#include "ov-base-scalar.cc" +#include "ov-flt-re-mat.h" +#include "ov-typeinfo.h" +#include "pr-output.h" +#include "xdiv.h" +#include "xpow.h" +#include "ops.h" + +#include "ls-oct-ascii.h" +#include "ls-hdf5.h" + +template class octave_base_scalar; + +DEFINE_OCTAVE_ALLOCATOR (octave_float_scalar); + +DEFINE_OV_TYPEID_FUNCTIONS_AND_DATA (octave_float_scalar, "float scalar", "single"); + +octave_value +octave_float_scalar::do_index_op (const octave_value_list& idx, bool resize_ok) +{ + octave_value retval; + + if (idx.valid_scalar_indices ()) + retval = scalar; + else + { + // FIXME -- this doesn't solve the problem of + // + // a = 1; a([1,1], [1,1], [1,1]) + // + // and similar constructions. Hmm... + + // FIXME -- using this constructor avoids narrowing the + // 1x1 matrix back to a scalar value. Need a better solution + // to this problem. + + octave_value tmp (new octave_matrix (matrix_value ())); + + retval = tmp.do_index_op (idx, resize_ok); + } + + return retval; +} + +std::streamoff +octave_float_scalar::streamoff_value (void) const +{ + std::streamoff retval (-1); + + if (D_NINT (scalar) == scalar) + retval = std::streamoff (static_cast (scalar)); + else + error ("conversion to streamoff value failed"); + + return retval; +} + +streamoff_array +octave_float_scalar::streamoff_array_value (void) const +{ + streamoff_array retval; + + std::streamoff soff = streamoff_value (); + + if (! error_state) + retval = streamoff_array (dim_vector (1, 1), soff); + + return retval; +} + +octave_value +octave_float_scalar::resize (const dim_vector& dv, bool fill) const +{ + if (fill) + { + NDArray retval (dv, NDArray::resize_fill_value()); + + if (dv.numel ()) + retval(0) = scalar; + + return retval; + } + else + { + NDArray retval (dv); + + if (dv.numel ()) + retval(0) = scalar; + + return retval; + } +} + +octave_value +octave_float_scalar::convert_to_str_internal (bool, bool, char type) const +{ + octave_value retval; + + if (xisnan (scalar)) + ::error ("invalid conversion from NaN to character"); + else + { + int ival = NINT (scalar); + + if (ival < 0 || ival > UCHAR_MAX) + { + // FIXME -- is there something better we could do? + + ival = 0; + + ::warning ("range error for conversion to character value"); + } + + retval = octave_value (std::string (1, static_cast (ival)), type); + } + + return retval; +} + +bool +octave_float_scalar::save_ascii (std::ostream& os) +{ + float d = float_value (); + + octave_write_float (os, d); + + os << "\n"; + + return true; +} + +bool +octave_float_scalar::load_ascii (std::istream& is) +{ + scalar = octave_read_float (is); + if (!is) + { + error ("load: failed to load scalar constant"); + return false; + } + + return true; +} + +bool +octave_float_scalar::save_binary (std::ostream& os, bool& /* save_as_floats */) +{ + char tmp = LS_FLOAT; + os.write (reinterpret_cast (&tmp), 1); + float dtmp = float_value (); + os.write (reinterpret_cast (&dtmp), 4); + + return true; +} + +bool +octave_float_scalar::load_binary (std::istream& is, bool swap, + oct_mach_info::float_format fmt) +{ + char tmp; + if (! is.read (reinterpret_cast (&tmp), 1)) + return false; + + float dtmp; + read_floats (is, &dtmp, static_cast (tmp), 1, swap, fmt); + if (error_state || ! is) + return false; + + scalar = dtmp; + return true; +} + +#if defined (HAVE_HDF5) + +bool +octave_float_scalar::save_hdf5 (hid_t loc_id, const char *name, + bool /* save_as_floats */) +{ + hsize_t dimens[3]; + hid_t space_hid = -1, data_hid = -1; + bool retval = true; + + space_hid = H5Screate_simple (0, dimens, 0); + if (space_hid < 0) return false; + + data_hid = H5Dcreate (loc_id, name, H5T_NATIVE_FLOAT, space_hid, + H5P_DEFAULT); + if (data_hid < 0) + { + H5Sclose (space_hid); + return false; + } + + float tmp = float_value (); + retval = H5Dwrite (data_hid, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, + H5P_DEFAULT, &tmp) >= 0; + + H5Dclose (data_hid); + H5Sclose (space_hid); + + return retval; +} + +bool +octave_float_scalar::load_hdf5 (hid_t loc_id, const char *name, + bool /* have_h5giterate_bug */) +{ + hid_t data_hid = H5Dopen (loc_id, name); + hid_t space_id = H5Dget_space (data_hid); + + hsize_t rank = H5Sget_simple_extent_ndims (space_id); + + if (rank != 0) + { + H5Dclose (data_hid); + return false; + } + + float dtmp; + if (H5Dread (data_hid, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, + H5P_DEFAULT, &dtmp) < 0) + { + H5Dclose (data_hid); + return false; + } + + scalar = dtmp; + + H5Dclose (data_hid); + + return true; +} + +#endif + +mxArray * +octave_float_scalar::as_mxArray (void) const +{ + mxArray *retval = new mxArray (mxSINGLE_CLASS, 1, 1, mxREAL); + + float *pr = static_cast (retval->get_data ()); + + pr[0] = scalar; + + return retval; +} + +#define SCALAR_MAPPER(MAP, FCN) \ + octave_value \ + octave_float_scalar::MAP (void) const \ + { \ + return octave_value (FCN (scalar)); \ + } + +#define CD_SCALAR_MAPPER(MAP, RFCN, CFCN, L1, L2) \ + octave_value \ + octave_float_scalar::MAP (void) const \ + { \ + return (scalar < L1 || scalar > L2 \ + ? octave_value (CFCN (Complex (scalar))) \ + : octave_value (RFCN (scalar))); \ + } + +static float +xconj (float x) +{ + return x; +} + +SCALAR_MAPPER (erf, ::erf) +SCALAR_MAPPER (erfc, ::erfc) +SCALAR_MAPPER (gamma, xgamma) +CD_SCALAR_MAPPER (lgamma, xlgamma, xlgamma, 0.0, octave_Inf) +SCALAR_MAPPER (abs, ::fabs) +SCALAR_MAPPER (acos, ::acos) +CD_SCALAR_MAPPER (acosh, ::acosh, ::acosh, 1.0, octave_Inf) +SCALAR_MAPPER (angle, ::arg) +SCALAR_MAPPER (arg, ::arg) +CD_SCALAR_MAPPER (asin, ::asin, ::asin, -1.0, 1.0) +SCALAR_MAPPER (asinh, ::asinh) +SCALAR_MAPPER (atan, ::atan) +CD_SCALAR_MAPPER (atanh, ::atanh, ::atanh, -1.0, 1.0) +SCALAR_MAPPER (ceil, ::ceil) +SCALAR_MAPPER (conj, xconj) +SCALAR_MAPPER (cos, ::cos) +SCALAR_MAPPER (cosh, ::cosh) +SCALAR_MAPPER (exp, ::exp) +SCALAR_MAPPER (expm1, ::expm1) +SCALAR_MAPPER (fix, ::fix) +SCALAR_MAPPER (floor, ::floor) +SCALAR_MAPPER (imag, ::imag) +CD_SCALAR_MAPPER (log, ::log, std::log, 0.0, octave_Inf) +CD_SCALAR_MAPPER (log2, xlog2, xlog2, 0.0, octave_Inf) +CD_SCALAR_MAPPER (log10, ::log10, std::log10, 0.0, octave_Inf) +CD_SCALAR_MAPPER (log1p, ::log1p, ::log1p, -1.0, octave_Inf) +SCALAR_MAPPER (real, ::real) +SCALAR_MAPPER (round, xround) +SCALAR_MAPPER (roundb, xroundb) +SCALAR_MAPPER (signum, ::signum) +SCALAR_MAPPER (sin, ::sin) +SCALAR_MAPPER (sinh, ::sinh) +CD_SCALAR_MAPPER (sqrt, ::sqrt, std::sqrt, 0.0, octave_Inf) +SCALAR_MAPPER (tan, ::tan) +SCALAR_MAPPER (tanh, ::tanh) +SCALAR_MAPPER (finite, xfinite) +SCALAR_MAPPER (isinf, xisinf) +SCALAR_MAPPER (isna, octave_is_NA) +SCALAR_MAPPER (isnan, xisnan) + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 src/ov-float.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/ov-float.h Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,297 @@ +/* + +Copyright (C) 1996, 1997, 1998, 2000, 2002, 2003, 2004, 2005, 2006, + 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#if !defined (octave_float_h) +#define octave_float_h 1 + +#include + +#include +#include + +#include "lo-ieee.h" +#include "lo-mappers.h" +#include "lo-utils.h" +#include "mx-base.h" +#include "oct-alloc.h" +#include "str-vec.h" + +#include "gripes.h" +#include "ov-base.h" +#include "ov-re-mat.h" +#include "ov-flt-re-mat.h" +#include "ov-base-scalar.h" +#include "ov-typeinfo.h" + +class Octave_map; +class octave_value_list; + +class tree_walker; + +// Real scalar values. + +class +OCTINTERP_API +octave_float_scalar : public octave_base_scalar +{ +public: + + octave_float_scalar (void) + : octave_base_scalar (0.0) { } + + octave_float_scalar (float d) + : octave_base_scalar (d) { } + + octave_float_scalar (const octave_float_scalar& s) + : octave_base_scalar (s) { } + + ~octave_float_scalar (void) { } + + octave_base_value *clone (void) const { return new octave_float_scalar (*this); } + + // We return an octave_matrix here instead of an octave_float_scalar so + // that in expressions like A(2,2,2) = 2 (for A previously + // undefined), A will be empty instead of a 1x1 object. + octave_base_value *empty_clone (void) const { return new octave_matrix (); } + + octave_value do_index_op (const octave_value_list& idx, + bool resize_ok = false); + + idx_vector index_vector (void) const { return idx_vector (scalar); } + + octave_value any (int = 0) const + { return (scalar != 0 && ! lo_ieee_isnan (scalar)); } + + bool is_real_scalar (void) const { return true; } + + bool is_real_type (void) const { return true; } + + bool is_single_type (void) const { return true; } + + bool is_float_type (void) const { return true; } + + bool valid_as_scalar_index (void) const + { + return (! xisnan (scalar) + && F_NINT (scalar) == scalar + && NINTbig (scalar) == 1); + } + + bool valid_as_zero_index (void) const + { + return (! xisnan (scalar) + && F_NINT (scalar) == scalar + && NINTbig (scalar) == 0); + } + + int8NDArray + int8_array_value (void) const + { return int8NDArray (dim_vector (1, 1), scalar); } + + int16NDArray + int16_array_value (void) const + { return int16NDArray (dim_vector (1, 1), scalar); } + + int32NDArray + int32_array_value (void) const + { return int32NDArray (dim_vector (1, 1), scalar); } + + int64NDArray + int64_array_value (void) const + { return int64NDArray (dim_vector (1, 1), scalar); } + + uint8NDArray + uint8_array_value (void) const + { return uint8NDArray (dim_vector (1, 1), scalar); } + + uint16NDArray + uint16_array_value (void) const + { return uint16NDArray (dim_vector (1, 1), scalar); } + + uint32NDArray + uint32_array_value (void) const + { return uint32NDArray (dim_vector (1, 1), scalar); } + + uint64NDArray + uint64_array_value (void) const + { return uint64NDArray (dim_vector (1, 1), scalar); } + + double double_value (bool = false) const { return static_cast (scalar); } + + float float_value (bool = false) const { return scalar; } + + double scalar_value (bool = false) const { return static_cast (scalar); } + + float float_scalar_value (bool = false) const { return scalar; } + + Matrix matrix_value (bool = false) const + { return Matrix (1, 1, scalar); } + + FloatMatrix float_matrix_value (bool = false) const + { return FloatMatrix (1, 1, scalar); } + + NDArray array_value (bool = false) const + { return NDArray (dim_vector (1, 1), scalar); } + + FloatNDArray float_array_value (bool = false) const + { return FloatNDArray (dim_vector (1, 1), scalar); } + + SparseMatrix sparse_matrix_value (bool = false) const + { return SparseMatrix (Matrix (1, 1, scalar)); } + + // XXX FIXME XXX Need SparseComplexMatrix (Matrix) constructor!!! + SparseComplexMatrix sparse_complex_matrix_value (bool = false) const + { return SparseComplexMatrix (sparse_matrix_value ()); } + + octave_value resize (const dim_vector& dv, bool fill = false) const; + + Complex complex_value (bool = false) const { return scalar; } + + FloatComplex float_complex_value (bool = false) const { return scalar; } + + ComplexMatrix complex_matrix_value (bool = false) const + { return ComplexMatrix (1, 1, Complex (scalar)); } + + FloatComplexMatrix float_complex_matrix_value (bool = false) const + { return FloatComplexMatrix (1, 1, FloatComplex (scalar)); } + + ComplexNDArray complex_array_value (bool = false) const + { return ComplexNDArray (dim_vector (1, 1), Complex (scalar)); } + + FloatComplexNDArray float_complex_array_value (bool = false) const + { return FloatComplexNDArray (dim_vector (1, 1), FloatComplex (scalar)); } + + charNDArray + char_array_value (bool = false) const + { + charNDArray retval (dim_vector (1, 1)); + retval(0) = static_cast (scalar); + return retval; + } + + bool bool_value (bool warn = false) const + { + if (warn && scalar != 0 && scalar != 1) + gripe_logical_conversion (); + + return scalar; + } + + boolNDArray bool_array_value (bool warn = false) const + { + if (warn && scalar != 0 && scalar != 1) + gripe_logical_conversion (); + + return boolNDArray (dim_vector (1, 1), scalar); + } + + std::streamoff streamoff_value (void) const; + + streamoff_array streamoff_array_value (void) const; + + octave_value convert_to_str_internal (bool pad, bool force, char type) const; + + void increment (void) { ++scalar; } + + void decrement (void) { --scalar; } + + bool save_ascii (std::ostream& os); + + bool load_ascii (std::istream& is); + + bool save_binary (std::ostream& os, bool& save_as_floats); + + bool load_binary (std::istream& is, bool swap, + oct_mach_info::float_format fmt); + +#if defined (HAVE_HDF5) + bool save_hdf5 (hid_t loc_id, const char *name, bool save_as_floats); + + bool load_hdf5 (hid_t loc_id, const char *name, bool have_h5giterate_bug); +#endif + + int write (octave_stream& os, int block_size, + oct_data_conv::data_type output_type, int skip, + oct_mach_info::float_format flt_fmt) const + { + return os.write (array_value (), block_size, output_type, + skip, flt_fmt); + } + + mxArray *as_mxArray (void) const; + + octave_value erf (void) const; + octave_value erfc (void) const; + octave_value gamma (void) const; + octave_value lgamma (void) const; + octave_value abs (void) const; + octave_value acos (void) const; + octave_value acosh (void) const; + octave_value angle (void) const; + octave_value arg (void) const; + octave_value asin (void) const; + octave_value asinh (void) const; + octave_value atan (void) const; + octave_value atanh (void) const; + octave_value ceil (void) const; + octave_value conj (void) const; + octave_value cos (void) const; + octave_value cosh (void) const; + octave_value exp (void) const; + octave_value expm1 (void) const; + octave_value fix (void) const; + octave_value floor (void) const; + octave_value imag (void) const; + octave_value log (void) const; + octave_value log2 (void) const; + octave_value log10 (void) const; + octave_value log1p (void) const; + octave_value real (void) const; + octave_value round (void) const; + octave_value roundb (void) const; + octave_value signum (void) const; + octave_value sin (void) const; + octave_value sinh (void) const; + octave_value sqrt (void) const; + octave_value tan (void) const; + octave_value tanh (void) const; + octave_value finite (void) const; + octave_value isinf (void) const; + octave_value isna (void) const; + octave_value isnan (void) const; + +private: + octave_value map (float (*fcn) (float)) const; + + DECLARE_OCTAVE_ALLOCATOR + + DECLARE_OV_TYPEID_FUNCTIONS_AND_DATA +}; + +#endif + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 src/ov-flt-complex.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/ov-flt-complex.cc Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,498 @@ +/* + +Copyright (C) 1996, 1997, 1998, 2000, 2001, 2002, 2003, 2004, 2005, + 2006, 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +#include "lo-ieee.h" +#include "lo-specfun.h" +#include "lo-mappers.h" + +#include "oct-obj.h" +#include "oct-stream.h" +#include "ops.h" +#include "ov-complex.h" +#include "ov-base.h" +#include "ov-base-scalar.h" +#include "ov-base-scalar.cc" +#include "ov-flt-cx-mat.h" +#include "ov-float.h" +#include "ov-flt-complex.h" +#include "gripes.h" +#include "pr-output.h" +#include "ops.h" + +#include "ls-oct-ascii.h" +#include "ls-hdf5.h" + +template class octave_base_scalar; + +DEFINE_OCTAVE_ALLOCATOR (octave_float_complex); + +DEFINE_OV_TYPEID_FUNCTIONS_AND_DATA (octave_float_complex, + "float complex scalar", "single"); + +octave_base_value * +octave_float_complex::try_narrowing_conversion (void) +{ + octave_base_value *retval = 0; + + float im = std::imag (scalar); + + if (im == 0.0 && ! lo_ieee_signbit (im)) + retval = new octave_float_scalar (std::real (scalar)); + + return retval; +} + +octave_value +octave_float_complex::do_index_op (const octave_value_list& idx, bool resize_ok) +{ + octave_value retval; + + if (idx.valid_scalar_indices ()) + retval = scalar; + else + { + // FIXME -- this doesn't solve the problem of + // + // a = i; a([1,1], [1,1], [1,1]) + // + // and similar constructions. Hmm... + + // FIXME -- using this constructor avoids narrowing the + // 1x1 matrix back to a scalar value. Need a better solution + // to this problem. + + octave_value tmp (new octave_float_complex_matrix (float_complex_matrix_value ())); + + retval = tmp.do_index_op (idx, resize_ok); + } + + return retval; +} + +double +octave_float_complex::double_value (bool force_conversion) const +{ + double retval = lo_ieee_nan_value (); + + if (! force_conversion) + gripe_implicit_conversion ("Octave:imag-to-real", + "complex scalar", "real scalar"); + + retval = std::real (scalar); + + return retval; +} + +float +octave_float_complex::float_value (bool force_conversion) const +{ + float retval = lo_ieee_float_nan_value (); + + if (! force_conversion) + gripe_implicit_conversion ("Octave:imag-to-real", + "complex scalar", "real scalar"); + + retval = std::real (scalar); + + return retval; +} + +Matrix +octave_float_complex::matrix_value (bool force_conversion) const +{ + Matrix retval; + + if (! force_conversion) + gripe_implicit_conversion ("Octave:imag-to-real", + "complex scalar", "real matrix"); + + retval = Matrix (1, 1, std::real (scalar)); + + return retval; +} + +FloatMatrix +octave_float_complex::float_matrix_value (bool force_conversion) const +{ + FloatMatrix retval; + + if (! force_conversion) + gripe_implicit_conversion ("Octave:imag-to-real", + "complex scalar", "real matrix"); + + retval = FloatMatrix (1, 1, std::real (scalar)); + + return retval; +} + +NDArray +octave_float_complex::array_value (bool force_conversion) const +{ + NDArray retval; + + if (! force_conversion) + gripe_implicit_conversion ("Octave:imag-to-real", + "complex scalar", "real matrix"); + + retval = NDArray (dim_vector (1, 1), std::real (scalar)); + + return retval; +} + +FloatNDArray +octave_float_complex::float_array_value (bool force_conversion) const +{ + FloatNDArray retval; + + if (! force_conversion) + gripe_implicit_conversion ("Octave:imag-to-real", + "complex scalar", "real matrix"); + + retval = FloatNDArray (dim_vector (1, 1), std::real (scalar)); + + return retval; +} + +Complex +octave_float_complex::complex_value (bool) const +{ + return scalar; +} + +FloatComplex +octave_float_complex::float_complex_value (bool) const +{ + return static_cast (scalar); +} + +ComplexMatrix +octave_float_complex::complex_matrix_value (bool) const +{ + return ComplexMatrix (1, 1, scalar); +} + +FloatComplexMatrix +octave_float_complex::float_complex_matrix_value (bool) const +{ + return FloatComplexMatrix (1, 1, scalar); +} + +ComplexNDArray +octave_float_complex::complex_array_value (bool /* force_conversion */) const +{ + return ComplexNDArray (dim_vector (1, 1), scalar); +} + +FloatComplexNDArray +octave_float_complex::float_complex_array_value (bool /* force_conversion */) const +{ + return FloatComplexNDArray (dim_vector (1, 1), scalar); +} + +octave_value +octave_float_complex::resize (const dim_vector& dv, bool fill) const +{ + if (fill) + { + FloatComplexNDArray retval (dv, FloatComplexNDArray::resize_fill_value ()); + + if (dv.numel ()) + retval(0) = scalar; + + return retval; + } + else + { + FloatComplexNDArray retval (dv); + + if (dv.numel ()) + retval(0) = scalar; + + return retval; + } +} + +bool +octave_float_complex::save_ascii (std::ostream& os) +{ + FloatComplex c = float_complex_value (); + + octave_write_float_complex (os, c); + + os << "\n"; + + return true; +} + +bool +octave_float_complex::load_ascii (std::istream& is) +{ + scalar = octave_read_float_complex (is); + + if (!is) + { + error ("load: failed to load complex scalar constant"); + return false; + } + + return true; +} + + +bool +octave_float_complex::save_binary (std::ostream& os, bool& /* save_as_floats */) +{ + char tmp = static_cast (LS_FLOAT); + os.write (reinterpret_cast (&tmp), 1); + FloatComplex ctmp = float_complex_value (); + os.write (reinterpret_cast (&ctmp), 8); + + return true; +} + +bool +octave_float_complex::load_binary (std::istream& is, bool swap, + oct_mach_info::float_format fmt) +{ + char tmp; + if (! is.read (reinterpret_cast (&tmp), 1)) + return false; + + FloatComplex ctmp; + read_floats (is, reinterpret_cast (&ctmp), + static_cast (tmp), 2, swap, fmt); + if (error_state || ! is) + return false; + + scalar = ctmp; + return true; +} + +#if defined (HAVE_HDF5) + +bool +octave_float_complex::save_hdf5 (hid_t loc_id, const char *name, + bool /* save_as_floats */) +{ + hsize_t dimens[3]; + hid_t space_hid = -1, type_hid = -1, data_hid = -1; + bool retval = true; + + space_hid = H5Screate_simple (0, dimens, 0); + if (space_hid < 0) + return false; + + type_hid = hdf5_make_complex_type (H5T_NATIVE_FLOAT); + if (type_hid < 0) + { + H5Sclose (space_hid); + return false; + } + + data_hid = H5Dcreate (loc_id, name, type_hid, space_hid, H5P_DEFAULT); + if (data_hid < 0) + { + H5Sclose (space_hid); + H5Tclose (type_hid); + return false; + } + + FloatComplex tmp = float_complex_value (); + retval = H5Dwrite (data_hid, type_hid, H5S_ALL, H5S_ALL, H5P_DEFAULT, + &tmp) >= 0; + + H5Dclose (data_hid); + H5Tclose (type_hid); + H5Sclose (space_hid); + + return retval; +} + +bool +octave_float_complex::load_hdf5 (hid_t loc_id, const char *name, + bool /* have_h5giterate_bug */) +{ + bool retval = false; + hid_t data_hid = H5Dopen (loc_id, name); + hid_t type_hid = H5Dget_type (data_hid); + + hid_t complex_type = hdf5_make_complex_type (H5T_NATIVE_FLOAT); + + if (! hdf5_types_compatible (type_hid, complex_type)) + { + H5Tclose (complex_type); + H5Dclose (data_hid); + return false; + } + + hid_t space_id = H5Dget_space (data_hid); + hsize_t rank = H5Sget_simple_extent_ndims (space_id); + + if (rank != 0) + { + H5Tclose (complex_type); + H5Sclose (space_id); + H5Dclose (data_hid); + return false; + } + + // complex scalar: + FloatComplex ctmp; + if (H5Dread (data_hid, complex_type, H5S_ALL, H5S_ALL, H5P_DEFAULT, + &ctmp) >= 0) + { + retval = true; + scalar = ctmp; + } + + H5Tclose (complex_type); + H5Sclose (space_id); + H5Dclose (data_hid); + + return retval; +} + +#endif + +mxArray * +octave_float_complex::as_mxArray (void) const +{ + mxArray *retval = new mxArray (mxSINGLE_CLASS, 1, 1, mxCOMPLEX); + + float *pr = static_cast (retval->get_data ()); + float *pi = static_cast (retval->get_imag_data ()); + + pr[0] = std::real (scalar); + pi[0] = std::imag (scalar); + + return retval; +} + +static float +xabs (const FloatComplex& x) +{ + return (xisinf (x.real ()) || xisinf (x.imag ())) ? octave_Inf : abs (x); +} + +static float +ximag (const FloatComplex& x) +{ + return x.imag (); +} + +static float +xreal (const FloatComplex& x) +{ + return x.real (); +} + +#define COMPLEX_MAPPER(MAP, FCN) \ + octave_value \ + octave_float_complex::MAP (void) const \ + { \ + return octave_value (FCN (scalar)); \ + } + +#define SCALAR_MAPPER(MAP, FCN) \ + octave_value \ + octave_float_complex::MAP (void) const \ + { \ + if (scalar.imag () == 0) \ + return octave_value (FCN (scalar.real ())); \ + else \ + { \ + error ("%s: not defined for complex arguments", #MAP); \ + return octave_value (); \ + } \ + } + +#define CD_SCALAR_MAPPER(MAP, RFCN, CFCN, L1, L2) \ + octave_value \ + octave_float_complex::MAP (void) const \ + { \ + if (scalar.imag () == 0) \ + { \ + float re = scalar.real (); \ + return (re < L1 || re > L2 \ + ? octave_value (CFCN (scalar)) \ + : octave_value (RFCN (re))); \ + } \ + else \ + { \ + error ("%s: not defined for complex arguments", #MAP); \ + return octave_value (); \ + } \ + } + +SCALAR_MAPPER (erf, ::erf) +SCALAR_MAPPER (erfc, ::erfc) +SCALAR_MAPPER (gamma, xgamma) +CD_SCALAR_MAPPER (lgamma, xlgamma, xlgamma, 0.0, octave_Inf) + +COMPLEX_MAPPER (abs, xabs) +COMPLEX_MAPPER (acos, ::acos) +COMPLEX_MAPPER (acosh, ::acosh) +COMPLEX_MAPPER (angle, std::arg) +COMPLEX_MAPPER (arg, std::arg) +COMPLEX_MAPPER (asin, ::asin) +COMPLEX_MAPPER (asinh, ::asinh) +COMPLEX_MAPPER (atan, ::atan) +COMPLEX_MAPPER (atanh, ::atanh) +COMPLEX_MAPPER (ceil, ::ceil) +COMPLEX_MAPPER (conj, std::conj) +COMPLEX_MAPPER (cos, std::cos) +COMPLEX_MAPPER (cosh, std::cosh) +COMPLEX_MAPPER (exp, std::exp) +COMPLEX_MAPPER (expm1, ::expm1) +COMPLEX_MAPPER (fix, ::fix) +COMPLEX_MAPPER (floor, ::floor) +COMPLEX_MAPPER (imag, ximag) +COMPLEX_MAPPER (log, std::log) +COMPLEX_MAPPER (log2, xlog2) +COMPLEX_MAPPER (log10, std::log10) +COMPLEX_MAPPER (log1p, ::log1p) +COMPLEX_MAPPER (real, xreal) +COMPLEX_MAPPER (round, xround) +COMPLEX_MAPPER (roundb, xroundb) +COMPLEX_MAPPER (signum, ::signum) +COMPLEX_MAPPER (sin, std::sin) +COMPLEX_MAPPER (sinh, std::sinh) +COMPLEX_MAPPER (sqrt, std::sqrt) +COMPLEX_MAPPER (tan, std::tan) +COMPLEX_MAPPER (tanh, std::tanh) +COMPLEX_MAPPER (finite, xfinite) +COMPLEX_MAPPER (isinf, xisinf) +COMPLEX_MAPPER (isna, octave_is_NA) +COMPLEX_MAPPER (isnan, xisnan) + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 src/ov-flt-complex.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/ov-flt-complex.h Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,223 @@ +/* + +Copyright (C) 1996, 1997, 1998, 2000, 2002, 2003, 2004, 2005, 2006, + 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#if !defined (octave_float_complex_h) +#define octave_float_complex_h 1 + +#include + +#include +#include + +#include "lo-ieee.h" +#include "mx-base.h" +#include "oct-alloc.h" +#include "str-vec.h" + +#include "error.h" +#include "ov-base.h" +#include "ov-flt-cx-mat.h" +#include "ov-base-scalar.h" +#include "ov-typeinfo.h" + +class Octave_map; +class octave_value_list; + +class tree_walker; + +// Complex scalar values. + +class +OCTINTERP_API +octave_float_complex : public octave_base_scalar +{ +public: + + octave_float_complex (void) + : octave_base_scalar () { } + + octave_float_complex (const FloatComplex& c) + : octave_base_scalar (c) { } + + octave_float_complex (const octave_float_complex& c) + : octave_base_scalar (c) { } + + ~octave_float_complex (void) { } + + octave_base_value *clone (void) const { return new octave_float_complex (*this); } + + // We return an octave_float_complex_matrix object here instead of an + // octave_float_complex object so that in expressions like A(2,2,2) = 2 + // (for A previously undefined), A will be empty instead of a 1x1 + // object. + octave_base_value *empty_clone (void) const + { return new octave_float_complex_matrix (); } + + octave_base_value *try_narrowing_conversion (void); + + octave_value do_index_op (const octave_value_list& idx, + bool resize_ok = false); + + octave_value any (int = 0) const + { + return (scalar != FloatComplex (0, 0) + && ! (lo_ieee_isnan (std::real (scalar)) + || lo_ieee_isnan (std::imag (scalar)))); + } + + bool is_complex_scalar (void) const { return true; } + + bool is_complex_type (void) const { return true; } + + bool is_single_type (void) const { return true; } + + bool is_float_type (void) const { return true; } + + // FIXME ??? + bool valid_as_scalar_index (void) const { return false; } + bool valid_as_zero_index (void) const { return false; } + + double double_value (bool = false) const; + + float float_value (bool = false) const; + + double scalar_value (bool frc_str_conv = false) const + { return double_value (frc_str_conv); } + + float float_scalar_value (bool frc_str_conv = false) const + { return float_value (frc_str_conv); } + + Matrix matrix_value (bool = false) const; + + FloatMatrix float_matrix_value (bool = false) const; + + NDArray array_value (bool = false) const; + + FloatNDArray float_array_value (bool = false) const; + + SparseMatrix sparse_matrix_value (bool = false) const + { return SparseMatrix (matrix_value ()); } + + SparseComplexMatrix sparse_complex_matrix_value (bool = false) const + { return SparseComplexMatrix (complex_matrix_value ()); } + + octave_value resize (const dim_vector& dv, bool fill = false) const; + + Complex complex_value (bool = false) const; + + FloatComplex float_complex_value (bool = false) const; + + ComplexMatrix complex_matrix_value (bool = false) const; + + FloatComplexMatrix float_complex_matrix_value (bool = false) const; + + ComplexNDArray complex_array_value (bool = false) const; + + FloatComplexNDArray float_complex_array_value (bool = false) const; + + void increment (void) { scalar += 1.0; } + + void decrement (void) { scalar -= 1.0; } + + bool save_ascii (std::ostream& os); + + bool load_ascii (std::istream& is); + + bool save_binary (std::ostream& os, bool& save_as_floats); + + bool load_binary (std::istream& is, bool swap, + oct_mach_info::float_format fmt); + +#if defined (HAVE_HDF5) + bool save_hdf5 (hid_t loc_id, const char *name, bool save_as_floats); + + bool load_hdf5 (hid_t loc_id, const char *name, bool have_h5giterate_bug); +#endif + + int write (octave_stream& os, int block_size, + oct_data_conv::data_type output_type, int skip, + oct_mach_info::float_format flt_fmt) const + { + // Yes, for compatibility, we drop the imaginary part here. + return os.write (array_value (true), block_size, output_type, + skip, flt_fmt); + } + + mxArray *as_mxArray (void) const; + + octave_value erf (void) const; + octave_value erfc (void) const; + octave_value gamma (void) const; + octave_value lgamma (void) const; + octave_value abs (void) const; + octave_value acos (void) const; + octave_value acosh (void) const; + octave_value angle (void) const; + octave_value arg (void) const; + octave_value asin (void) const; + octave_value asinh (void) const; + octave_value atan (void) const; + octave_value atanh (void) const; + octave_value ceil (void) const; + octave_value conj (void) const; + octave_value cos (void) const; + octave_value cosh (void) const; + octave_value exp (void) const; + octave_value expm1 (void) const; + octave_value fix (void) const; + octave_value floor (void) const; + octave_value imag (void) const; + octave_value log (void) const; + octave_value log2 (void) const; + octave_value log10 (void) const; + octave_value log1p (void) const; + octave_value real (void) const; + octave_value round (void) const; + octave_value roundb (void) const; + octave_value signum (void) const; + octave_value sin (void) const; + octave_value sinh (void) const; + octave_value sqrt (void) const; + octave_value tan (void) const; + octave_value tanh (void) const; + octave_value finite (void) const; + octave_value isinf (void) const; + octave_value isna (void) const; + octave_value isnan (void) const; + +private: + + DECLARE_OCTAVE_ALLOCATOR + + DECLARE_OV_TYPEID_FUNCTIONS_AND_DATA +}; + +typedef octave_float_complex octave_float_complex_scalar; + +#endif + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 src/ov-flt-cx-mat.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/ov-flt-cx-mat.cc Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,856 @@ +/* + +Copyright (C) 1996, 1997, 1998, 2000, 2001, 2002, 2003, 2004, 2005, + 2006, 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include + +#include "data-conv.h" +#include "lo-ieee.h" +#include "lo-specfun.h" +#include "lo-mappers.h" +#include "mx-base.h" +#include "mach-info.h" + +#include "gripes.h" +#include "oct-obj.h" +#include "oct-stream.h" +#include "ops.h" +#include "ov-base.h" +#include "ov-base-mat.h" +#include "ov-base-mat.cc" +#include "ov-complex.h" +#include "ov-flt-complex.h" +#include "ov-cx-mat.h" +#include "ov-flt-cx-mat.h" +#include "ov-re-mat.h" +#include "ov-flt-re-mat.h" +#include "ov-scalar.h" +#include "ov-float.h" +#include "pr-output.h" +#include "ops.h" + +#include "byte-swap.h" +#include "ls-oct-ascii.h" +#include "ls-hdf5.h" +#include "ls-utils.h" + +template class octave_base_matrix; + +DEFINE_OCTAVE_ALLOCATOR (octave_float_complex_matrix); + +DEFINE_OV_TYPEID_FUNCTIONS_AND_DATA (octave_float_complex_matrix, + "float complex matrix", "single"); + +octave_base_value * +octave_float_complex_matrix::try_narrowing_conversion (void) +{ + octave_base_value *retval = 0; + + if (matrix.ndims () == 2) + { + FloatComplexMatrix cm = matrix.matrix_value (); + + octave_idx_type nr = cm.rows (); + octave_idx_type nc = cm.cols (); + + if (nr == 1 && nc == 1) + { + FloatComplex c = matrix (0, 0); + + float im = std::imag (c); + + if (im == 0.0 && ! lo_ieee_signbit (im)) + retval = new octave_float_scalar (std::real (c)); + else + retval = new octave_float_complex (c); + } + else if (nr == 0 || nc == 0) + retval = new octave_float_matrix (FloatMatrix (nr, nc)); + else if (cm.all_elements_are_real ()) + retval = new octave_float_matrix (::real (cm)); + } + else if (matrix.all_elements_are_real ()) + retval = new octave_float_matrix (::real (matrix)); + + return retval; +} + +void +octave_float_complex_matrix::assign (const octave_value_list& idx, + const FloatComplexNDArray& rhs) +{ + octave_base_matrix::assign (idx, rhs); +} + +void +octave_float_complex_matrix::assign (const octave_value_list& idx, + const FloatNDArray& rhs) +{ + octave_idx_type len = idx.length (); + + for (octave_idx_type i = 0; i < len; i++) + matrix.set_index (idx(i).index_vector ()); + + ::assign (matrix, rhs); +} + +bool +octave_float_complex_matrix::valid_as_scalar_index (void) const +{ + // FIXME + return false; +} + +double +octave_float_complex_matrix::double_value (bool force_conversion) const +{ + double retval = lo_ieee_nan_value (); + + if (! force_conversion) + gripe_implicit_conversion ("Octave:imag-to-real", + "complex matrix", "real scalar"); + + if (rows () > 0 && columns () > 0) + { + gripe_implicit_conversion ("Octave:array-as-scalar", + "complex matrix", "real scalar"); + + retval = std::real (matrix (0, 0)); + } + else + gripe_invalid_conversion ("complex matrix", "real scalar"); + + return retval; +} + +float +octave_float_complex_matrix::float_value (bool force_conversion) const +{ + float retval = lo_ieee_float_nan_value (); + + if (! force_conversion) + gripe_implicit_conversion ("Octave:imag-to-real", + "complex matrix", "real scalar"); + + if (rows () > 0 && columns () > 0) + { + gripe_implicit_conversion ("Octave:array-as-scalar", + "complex matrix", "real scalar"); + + retval = std::real (matrix (0, 0)); + } + else + gripe_invalid_conversion ("complex matrix", "real scalar"); + + return retval; +} + +Matrix +octave_float_complex_matrix::matrix_value (bool force_conversion) const +{ + Matrix retval; + + if (! force_conversion) + gripe_implicit_conversion ("Octave:imag-to-real", + "complex matrix", "real matrix"); + + retval = ::real (matrix.matrix_value ()); + + return retval; +} + +FloatMatrix +octave_float_complex_matrix::float_matrix_value (bool force_conversion) const +{ + FloatMatrix retval; + + if (! force_conversion) + gripe_implicit_conversion ("Octave:imag-to-real", + "complex matrix", "real matrix"); + + retval = ::real (matrix.matrix_value ()); + + return retval; +} + +Complex +octave_float_complex_matrix::complex_value (bool) const +{ + double tmp = lo_ieee_nan_value (); + + Complex retval (tmp, tmp); + + if (rows () > 0 && columns () > 0) + { + gripe_implicit_conversion ("Octave:array-as-scalar", + "complex matrix", "complex scalar"); + + retval = matrix (0, 0); + } + else + gripe_invalid_conversion ("complex matrix", "complex scalar"); + + return retval; +} + +FloatComplex +octave_float_complex_matrix::float_complex_value (bool) const +{ + float tmp = lo_ieee_float_nan_value (); + + FloatComplex retval (tmp, tmp); + + if (rows () > 0 && columns () > 0) + { + gripe_implicit_conversion ("Octave:array-as-scalar", + "complex matrix", "complex scalar"); + + retval = matrix (0, 0); + } + else + gripe_invalid_conversion ("complex matrix", "complex scalar"); + + return retval; +} + +ComplexMatrix +octave_float_complex_matrix::complex_matrix_value (bool) const +{ + return matrix.matrix_value (); +} + +FloatComplexMatrix +octave_float_complex_matrix::float_complex_matrix_value (bool) const +{ + return FloatComplexMatrix (matrix.matrix_value ()); +} + +charNDArray +octave_float_complex_matrix::char_array_value (bool frc_str_conv) const +{ + charNDArray retval; + + if (! frc_str_conv) + gripe_implicit_conversion ("Octave:num-to-str", + "complex matrix", "string"); + else + { + retval = charNDArray (dims ()); + octave_idx_type nel = numel (); + + for (octave_idx_type i = 0; i < nel; i++) + retval.elem (i) = static_cast(std::real (matrix.elem (i))); + } + + return retval; +} + +FloatComplexNDArray +octave_float_complex_matrix::float_complex_array_value (bool) const +{ + return FloatComplexNDArray (matrix); +} + +SparseMatrix +octave_float_complex_matrix::sparse_matrix_value (bool force_conversion) const +{ + SparseMatrix retval; + + if (! force_conversion) + gripe_implicit_conversion ("Octave:imag-to-real", + "complex matrix", "real matrix"); + + retval = SparseMatrix (::real (matrix.matrix_value ())); + + return retval; +} + +SparseComplexMatrix +octave_float_complex_matrix::sparse_complex_matrix_value (bool) const +{ + return SparseComplexMatrix (matrix.matrix_value ()); +} + +bool +octave_float_complex_matrix::save_ascii (std::ostream& os) +{ + dim_vector d = dims (); + if (d.length () > 2) + { + FloatComplexNDArray tmp = complex_array_value (); + + os << "# ndims: " << d.length () << "\n"; + + for (int i = 0; i < d.length (); i++) + os << " " << d (i); + + os << "\n" << tmp; + } + else + { + // Keep this case, rather than use generic code above for backward + // compatiability. Makes load_ascii much more complex!! + os << "# rows: " << rows () << "\n" + << "# columns: " << columns () << "\n"; + + os << complex_matrix_value (); + } + + return true; +} + +bool +octave_float_complex_matrix::load_ascii (std::istream& is) +{ + bool success = true; + + string_vector keywords(2); + + keywords[0] = "ndims"; + keywords[1] = "rows"; + + std::string kw; + octave_idx_type val = 0; + + if (extract_keyword (is, keywords, kw, val, true)) + { + if (kw == "ndims") + { + int mdims = static_cast (val); + + if (mdims >= 0) + { + dim_vector dv; + dv.resize (mdims); + + for (int i = 0; i < mdims; i++) + is >> dv(i); + + if (is) + { + FloatComplexNDArray tmp(dv); + + if (tmp.is_empty ()) + matrix = tmp; + else + { + is >> tmp; + + if (is) + matrix = tmp; + else + { + error ("load: failed to load matrix constant"); + success = false; + } + } + } + else + { + error ("load: failed to read dimensions"); + success = false; + } + } + else + { + error ("load: failed to extract number of dimensions"); + success = false; + } + } + else if (kw == "rows") + { + octave_idx_type nr = val; + octave_idx_type nc = 0; + + if (nr >= 0 && extract_keyword (is, "columns", nc) && nc >= 0) + { + if (nr > 0 && nc > 0) + { + FloatComplexMatrix tmp (nr, nc); + is >> tmp; + if (is) + matrix = tmp; + else + { + error ("load: failed to load matrix constant"); + success = false; + } + } + else if (nr == 0 || nc == 0) + matrix = FloatComplexMatrix (nr, nc); + else + panic_impossible (); + } + else + { + error ("load: failed to extract number of rows and columns"); + success = false; + } + } + else + panic_impossible (); + } + else + { + error ("load: failed to extract number of rows and columns"); + success = false; + } + + return success; +} + +bool +octave_float_complex_matrix::save_binary (std::ostream& os, bool&) +{ + dim_vector d = dims (); + if (d.length() < 1) + return false; + + // Use negative value for ndims to differentiate with old format!! + int32_t tmp = - d.length(); + os.write (reinterpret_cast (&tmp), 4); + for (int i = 0; i < d.length (); i++) + { + tmp = d(i); + os.write (reinterpret_cast (&tmp), 4); + } + + FloatComplexNDArray m = complex_array_value (); + save_type st = LS_FLOAT; + if (d.numel () > 4096) // FIXME -- make this configurable. + { + float max_val, min_val; + if (m.all_integers (max_val, min_val)) + st = get_save_type (max_val, min_val); + } + + const FloatComplex *mtmp = m.data (); + write_floats (os, reinterpret_cast (mtmp), st, 2 * d.numel ()); + + return true; +} + +bool +octave_float_complex_matrix::load_binary (std::istream& is, bool swap, + oct_mach_info::float_format fmt) +{ + char tmp; + int32_t mdims; + if (! is.read (reinterpret_cast (&mdims), 4)) + return false; + if (swap) + swap_bytes<4> (&mdims); + if (mdims < 0) + { + mdims = - mdims; + int32_t di; + dim_vector dv; + dv.resize (mdims); + + for (int i = 0; i < mdims; i++) + { + if (! is.read (reinterpret_cast (&di), 4)) + return false; + if (swap) + swap_bytes<4> (&di); + dv(i) = di; + } + + // Convert an array with a single dimension to be a row vector. + // Octave should never write files like this, other software + // might. + + if (mdims == 1) + { + mdims = 2; + dv.resize (mdims); + dv(1) = dv(0); + dv(0) = 1; + } + + if (! is.read (reinterpret_cast (&tmp), 1)) + return false; + + FloatComplexNDArray m(dv); + FloatComplex *im = m.fortran_vec (); + read_floats (is, reinterpret_cast (im), + static_cast (tmp), 2 * dv.numel (), swap, fmt); + if (error_state || ! is) + return false; + matrix = m; + } + else + { + int32_t nr, nc; + nr = mdims; + if (! is.read (reinterpret_cast (&nc), 4)) + return false; + if (swap) + swap_bytes<4> (&nc); + if (! is.read (reinterpret_cast (&tmp), 1)) + return false; + FloatComplexMatrix m (nr, nc); + FloatComplex *im = m.fortran_vec (); + octave_idx_type len = nr * nc; + read_floats (is, reinterpret_cast (im), + static_cast (tmp), 2*len, swap, fmt); + if (error_state || ! is) + return false; + matrix = m; + } + return true; +} + +#if defined (HAVE_HDF5) + +bool +octave_float_complex_matrix::save_hdf5 (hid_t loc_id, const char *name, bool) +{ + dim_vector dv = dims (); + int empty = save_hdf5_empty (loc_id, name, dv); + if (empty) + return (empty > 0); + + int rank = dv.length (); + hid_t space_hid = -1, data_hid = -1, type_hid = -1; + bool retval = true; + FloatComplexNDArray m = complex_array_value (); + + OCTAVE_LOCAL_BUFFER (hsize_t, hdims, rank); + + // Octave uses column-major, while HDF5 uses row-major ordering + for (int i = 0; i < rank; i++) + hdims[i] = dv (rank-i-1); + + space_hid = H5Screate_simple (rank, hdims, 0); + if (space_hid < 0) return false; + + hid_t save_type_hid = H5T_NATIVE_FLOAT; + +#if HAVE_HDF5_INT2FLOAT_CONVERSIONS + // hdf5 currently doesn't support float/integer conversions + else + { + float max_val, min_val; + + if (m.all_integers (max_val, min_val)) + save_type_hid + = save_type_to_hdf5 (get_save_type (max_val, min_val)); + } +#endif /* HAVE_HDF5_INT2FLOAT_CONVERSIONS */ + + type_hid = hdf5_make_complex_type (save_type_hid); + if (type_hid < 0) + { + H5Sclose (space_hid); + return false; + } + + data_hid = H5Dcreate (loc_id, name, type_hid, space_hid, H5P_DEFAULT); + if (data_hid < 0) + { + H5Sclose (space_hid); + H5Tclose (type_hid); + return false; + } + + hid_t complex_type_hid = hdf5_make_complex_type (H5T_NATIVE_FLOAT); + if (complex_type_hid < 0) retval = false; + + if (retval) + { + FloatComplex *mtmp = m.fortran_vec (); + if (H5Dwrite (data_hid, complex_type_hid, H5S_ALL, H5S_ALL, H5P_DEFAULT, + mtmp) < 0) + { + H5Tclose (complex_type_hid); + retval = false; + } + } + + H5Tclose (complex_type_hid); + H5Dclose (data_hid); + H5Tclose (type_hid); + H5Sclose (space_hid); + + return retval; +} + +bool +octave_float_complex_matrix::load_hdf5 (hid_t loc_id, const char *name, + bool /* have_h5giterate_bug */) +{ + bool retval = false; + + dim_vector dv; + int empty = load_hdf5_empty (loc_id, name, dv); + if (empty > 0) + matrix.resize(dv); + if (empty) + return (empty > 0); + + hid_t data_hid = H5Dopen (loc_id, name); + hid_t type_hid = H5Dget_type (data_hid); + + hid_t complex_type = hdf5_make_complex_type (H5T_NATIVE_FLOAT); + + if (! hdf5_types_compatible (type_hid, complex_type)) + { + H5Tclose (complex_type); + H5Dclose (data_hid); + return false; + } + + hid_t space_id = H5Dget_space (data_hid); + + hsize_t rank = H5Sget_simple_extent_ndims (space_id); + + if (rank < 1) + { + H5Tclose (complex_type); + H5Sclose (space_id); + H5Dclose (data_hid); + return false; + } + + OCTAVE_LOCAL_BUFFER (hsize_t, hdims, rank); + OCTAVE_LOCAL_BUFFER (hsize_t, maxdims, rank); + + H5Sget_simple_extent_dims (space_id, hdims, maxdims); + + // Octave uses column-major, while HDF5 uses row-major ordering + if (rank == 1) + { + dv.resize (2); + dv(0) = 1; + dv(1) = hdims[0]; + } + else + { + dv.resize (rank); + for (hsize_t i = 0, j = rank - 1; i < rank; i++, j--) + dv(j) = hdims[i]; + } + + FloatComplexNDArray m (dv); + FloatComplex *reim = m.fortran_vec (); + if (H5Dread (data_hid, complex_type, H5S_ALL, H5S_ALL, H5P_DEFAULT, + reim) >= 0) + { + retval = true; + matrix = m; + } + + H5Tclose (complex_type); + H5Sclose (space_id); + H5Dclose (data_hid); + + return retval; +} + +#endif + +void +octave_float_complex_matrix::print_raw (std::ostream& os, + bool pr_as_read_syntax) const +{ + octave_print_internal (os, matrix, pr_as_read_syntax, + current_print_indent_level ()); +} + +mxArray * +octave_float_complex_matrix::as_mxArray (void) const +{ + mxArray *retval = new mxArray (mxSINGLE_CLASS, dims (), mxCOMPLEX); + + float *pr = static_cast (retval->get_data ()); + float *pi = static_cast (retval->get_imag_data ()); + + mwSize nel = numel (); + + const FloatComplex *p = matrix.data (); + + for (mwIndex i = 0; i < nel; i++) + { + pr[i] = std::real (p[i]); + pi[i] = std::imag (p[i]); + } + + return retval; +} + +static float +xabs (const FloatComplex& x) +{ + return (xisinf (x.real ()) || xisinf (x.imag ())) ? octave_Inf : abs (x); +} + +static float +ximag (const FloatComplex& x) +{ + return x.imag (); +} + +static float +xreal (const FloatComplex& x) +{ + return x.real (); +} + +static bool +any_element_less_than (const FloatNDArray& a, float val) +{ + octave_idx_type len = a.length (); + const float *m = a.fortran_vec (); + + for (octave_idx_type i = 0; i < len; i++) + { + OCTAVE_QUIT; + + if (m[i] < val) + return true; + } + + return false; +} + +static bool +any_element_greater_than (const FloatNDArray& a, float val) +{ + octave_idx_type len = a.length (); + const float *m = a.fortran_vec (); + + for (octave_idx_type i = 0; i < len; i++) + { + OCTAVE_QUIT; + + if (m[i] > val) + return true; + } + + return false; +} + +#define ARRAY_MAPPER(MAP, AMAP, FCN) \ + octave_value \ + octave_float_complex_matrix::MAP (void) const \ + { \ + static AMAP cmap = FCN; \ + return matrix.map (cmap); \ + } + +#define DARRAY_MAPPER(MAP, AMAP, FCN) \ + octave_value \ + octave_float_complex_matrix::MAP (void) const \ + { \ + static FloatComplexNDArray::dmapper dmap = ximag; \ + NDArray m = matrix.map (dmap); \ + if (m.all_elements_are_zero ()) \ + { \ + dmap = xreal; \ + m = matrix.map (dmap); \ + static AMAP cmap = FCN; \ + return m.map (cmap); \ + } \ + else \ + { \ + error ("%s: not defined for complex arguments", #MAP); \ + return octave_value (); \ + } \ + } + +#define CD_ARRAY_MAPPER(MAP, RFCN, CFCN, L1, L2) \ + octave_value \ + octave_float_complex_matrix::MAP (void) const \ + { \ + static FloatComplexNDArray::dmapper idmap = ximag; \ + NDArray m = matrix.map (idmap); \ + if (m.all_elements_are_zero ()) \ + { \ + static FloatComplexNDArray::dmapper rdmap = xreal; \ + m = matrix.map (rdmap); \ + static NDArray::dmapper dmap = RFCN; \ + static NDArray::cmapper cmap = CFCN; \ + return (any_element_less_than (m, L1) \ + ? octave_value (m.map (cmap)) \ + : (any_element_greater_than (m, L2) \ + ? octave_value (m.map (cmap)) \ + : octave_value (m.map (dmap)))); \ + } \ + else \ + { \ + /*error ("%s: not defined for complex arguments", #MAP); */ \ + return octave_value (m); \ + } \ + } + +DARRAY_MAPPER (erf, NDArray::dmapper, ::erf) +DARRAY_MAPPER (erfc, NDArray::dmapper, ::erfc) +DARRAY_MAPPER (gamma, NDArray::dmapper, xgamma) +CD_ARRAY_MAPPER (lgamma, xlgamma, xlgamma, 0.0, octave_Inf) + +ARRAY_MAPPER (abs, FloatComplexNDArray::dmapper, xabs) +ARRAY_MAPPER (acos, FloatComplexNDArray::cmapper, ::acos) +ARRAY_MAPPER (acosh, FloatComplexNDArray::cmapper, ::acosh) +ARRAY_MAPPER (angle, FloatComplexNDArray::dmapper, std::arg) +ARRAY_MAPPER (arg, FloatComplexNDArray::dmapper, std::arg) +ARRAY_MAPPER (asin, FloatComplexNDArray::cmapper, ::asin) +ARRAY_MAPPER (asinh, FloatComplexNDArray::cmapper, ::asinh) +ARRAY_MAPPER (atan, FloatComplexNDArray::cmapper, ::atan) +ARRAY_MAPPER (atanh, FloatComplexNDArray::cmapper, ::atanh) +ARRAY_MAPPER (ceil, FloatComplexNDArray::cmapper, ::ceil) +ARRAY_MAPPER (conj, FloatComplexNDArray::cmapper, std::conj) +ARRAY_MAPPER (cos, FloatComplexNDArray::cmapper, std::cos) +ARRAY_MAPPER (cosh, FloatComplexNDArray::cmapper, std::cosh) +ARRAY_MAPPER (exp, FloatComplexNDArray::cmapper, std::exp) +ARRAY_MAPPER (expm1, FloatComplexNDArray::cmapper, ::expm1f) +ARRAY_MAPPER (fix, FloatComplexNDArray::cmapper, ::fix) +ARRAY_MAPPER (floor, FloatComplexNDArray::cmapper, ::floor) +ARRAY_MAPPER (imag, FloatComplexNDArray::dmapper, ximag) +ARRAY_MAPPER (log, FloatComplexNDArray::cmapper, std::log) +ARRAY_MAPPER (log2, FloatComplexNDArray::cmapper, xlog2) +ARRAY_MAPPER (log10, FloatComplexNDArray::cmapper, std::log10) +ARRAY_MAPPER (log1p, FloatComplexNDArray::cmapper, ::log1pf) +ARRAY_MAPPER (real, FloatComplexNDArray::dmapper, xreal) +ARRAY_MAPPER (round, FloatComplexNDArray::cmapper, xround) +ARRAY_MAPPER (roundb, FloatComplexNDArray::cmapper, xroundb) +ARRAY_MAPPER (signum, FloatComplexNDArray::cmapper, ::signum) +ARRAY_MAPPER (sin, FloatComplexNDArray::cmapper, std::sin) +ARRAY_MAPPER (sinh, FloatComplexNDArray::cmapper, std::sinh) +ARRAY_MAPPER (sqrt, FloatComplexNDArray::cmapper, std::sqrt) +ARRAY_MAPPER (tan, FloatComplexNDArray::cmapper, std::tan) +ARRAY_MAPPER (tanh, FloatComplexNDArray::cmapper, std::tanh) +ARRAY_MAPPER (finite, FloatComplexNDArray::bmapper, xfinite) +ARRAY_MAPPER (isinf, FloatComplexNDArray::bmapper, xisinf) +ARRAY_MAPPER (isna, FloatComplexNDArray::bmapper, octave_is_NA) +ARRAY_MAPPER (isnan, FloatComplexNDArray::bmapper, xisnan) + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 src/ov-flt-cx-mat.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/ov-flt-cx-mat.h Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,222 @@ +/* + +Copyright (C) 1996, 1997, 1998, 2000, 2002, 2003, 2004, 2005, 2006, + 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#if !defined (octave_float_complex_matrix_h) +#define octave_float_complex_matrix_h 1 + +#include + +#include +#include + +#include "mx-base.h" +#include "oct-alloc.h" +#include "str-vec.h" + +#include "error.h" +#include "oct-stream.h" +#include "ov-base.h" +#include "ov-base-mat.h" +#include "ov-typeinfo.h" + +#include "MatrixType.h" + +class Octave_map; +class octave_value_list; + +class tree_walker; + +// Complex matrix values. + +class +OCTINTERP_API +octave_float_complex_matrix : public octave_base_matrix +{ +public: + + octave_float_complex_matrix (void) + : octave_base_matrix () { } + + octave_float_complex_matrix (const FloatComplexNDArray& m) + : octave_base_matrix (m) { } + + octave_float_complex_matrix (const FloatComplexMatrix& m) + : octave_base_matrix (m) { } + + octave_float_complex_matrix (const FloatComplexMatrix& m, const MatrixType& t) + : octave_base_matrix (m, t) { } + + octave_float_complex_matrix (const ArrayN& m) + : octave_base_matrix (FloatComplexNDArray (m)) { } + + octave_float_complex_matrix (const FloatComplexDiagMatrix& d) + : octave_base_matrix (FloatComplexMatrix (d)) { } + + octave_float_complex_matrix (const FloatComplexRowVector& v) + : octave_base_matrix (FloatComplexMatrix (v)) { } + + octave_float_complex_matrix (const FloatComplexColumnVector& v) + : octave_base_matrix (FloatComplexMatrix (v)) { } + + octave_float_complex_matrix (const octave_float_complex_matrix& cm) + : octave_base_matrix (cm) { } + + ~octave_float_complex_matrix (void) { } + + octave_base_value *clone (void) const { return new octave_float_complex_matrix (*this); } + octave_base_value *empty_clone (void) const { return new octave_float_complex_matrix (); } + + octave_base_value *try_narrowing_conversion (void); + + void assign (const octave_value_list& idx, const FloatComplexNDArray& rhs); + + void assign (const octave_value_list& idx, const FloatNDArray& rhs); + + bool is_complex_matrix (void) const { return true; } + + bool is_complex_type (void) const { return true; } + + bool is_double_type (void) const { return true; } + + bool is_float_type (void) const { return true; } + + bool valid_as_scalar_index (void) const; + + double double_value (bool = false) const; + + float float_value (bool = false) const; + + double scalar_value (bool frc_str_conv = false) const + { return double_value (frc_str_conv); } + + float float_scalar_value (bool frc_str_conv = false) const + { return float_value (frc_str_conv); } + + Matrix matrix_value (bool = false) const; + + FloatMatrix float_matrix_value (bool = false) const; + + Complex complex_value (bool = false) const; + + FloatComplex float_complex_value (bool = false) const; + + ComplexMatrix complex_matrix_value (bool = false) const; + + FloatComplexMatrix float_complex_matrix_value (bool = false) const; + + ComplexNDArray complex_array_value (bool = false) const { return matrix; } + + FloatComplexNDArray float_complex_array_value (bool = false) const; + + charNDArray char_array_value (bool frc_str_conv = false) const; + + SparseMatrix sparse_matrix_value (bool = false) const; + + SparseComplexMatrix sparse_complex_matrix_value (bool = false) const; + + void increment (void) { matrix += FloatComplex (1.0); } + + void decrement (void) { matrix -= FloatComplex (1.0); } + + bool save_ascii (std::ostream& os); + + bool load_ascii (std::istream& is); + + bool save_binary (std::ostream& os, bool& save_as_floats); + + bool load_binary (std::istream& is, bool swap, + oct_mach_info::float_format fmt); + +#if defined (HAVE_HDF5) + bool save_hdf5 (hid_t loc_id, const char *name, bool save_as_floats); + + bool load_hdf5 (hid_t loc_id, const char *name, bool have_h5giterate_bug); +#endif + + int write (octave_stream& os, int block_size, + oct_data_conv::data_type output_type, int skip, + oct_mach_info::float_format flt_fmt) const + { + // Yes, for compatibility, we drop the imaginary part here. + return os.write (matrix_value (true), block_size, output_type, + skip, flt_fmt); + } + + void print_raw (std::ostream& os, bool pr_as_read_syntax = false) const; + + mxArray *as_mxArray (void) const; + + octave_value erf (void) const; + octave_value erfc (void) const; + octave_value gamma (void) const; + octave_value lgamma (void) const; + octave_value abs (void) const; + octave_value acos (void) const; + octave_value acosh (void) const; + octave_value angle (void) const; + octave_value arg (void) const; + octave_value asin (void) const; + octave_value asinh (void) const; + octave_value atan (void) const; + octave_value atanh (void) const; + octave_value ceil (void) const; + octave_value conj (void) const; + octave_value cos (void) const; + octave_value cosh (void) const; + octave_value exp (void) const; + octave_value expm1 (void) const; + octave_value fix (void) const; + octave_value floor (void) const; + octave_value imag (void) const; + octave_value log (void) const; + octave_value log2 (void) const; + octave_value log10 (void) const; + octave_value log1p (void) const; + octave_value real (void) const; + octave_value round (void) const; + octave_value roundb (void) const; + octave_value signum (void) const; + octave_value sin (void) const; + octave_value sinh (void) const; + octave_value sqrt (void) const; + octave_value tan (void) const; + octave_value tanh (void) const; + octave_value finite (void) const; + octave_value isinf (void) const; + octave_value isna (void) const; + octave_value isnan (void) const; + +private: + + DECLARE_OCTAVE_ALLOCATOR + + DECLARE_OV_TYPEID_FUNCTIONS_AND_DATA +}; + +#endif + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 src/ov-flt-re-mat.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/ov-flt-re-mat.cc Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,837 @@ +/* + +Copyright (C) 1996, 1997, 1998, 2000, 2001, 2002, 2003, 2004, 2005, + 2006, 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +#include +#include + +#include "data-conv.h" +#include "lo-ieee.h" +#include "lo-utils.h" +#include "lo-specfun.h" +#include "lo-mappers.h" +#include "mach-info.h" +#include "mx-base.h" +#include "quit.h" + +#include "defun.h" +#include "gripes.h" +#include "oct-obj.h" +#include "oct-lvalue.h" +#include "oct-stream.h" +#include "ops.h" +#include "ov-base.h" +#include "ov-base-mat.h" +#include "ov-base-mat.cc" +#include "ov-scalar.h" +#include "ov-float.h" +#include "ov-flt-complex.h" +#include "ov-re-mat.h" +#include "ov-flt-re-mat.h" +#include "ov-flt-cx-mat.h" +#include "ov-re-sparse.h" +#include "ov-type-conv.h" +#include "pr-output.h" +#include "variables.h" +#include "ops.h" + +#include "byte-swap.h" +#include "ls-oct-ascii.h" +#include "ls-utils.h" +#include "ls-hdf5.h" + +#if ! defined (UCHAR_MAX) +#define UCHAR_MAX 255 +#endif + +template class octave_base_matrix; + +DEFINE_OCTAVE_ALLOCATOR (octave_float_matrix); + +DEFINE_OV_TYPEID_FUNCTIONS_AND_DATA (octave_float_matrix, "float matrix", "single"); + +octave_base_value * +octave_float_matrix::try_narrowing_conversion (void) +{ + octave_base_value *retval = 0; + + if (matrix.nelem () == 1) + retval = new octave_float_scalar (matrix (0)); + + return retval; +} + +bool +octave_float_matrix::valid_as_scalar_index (void) const +{ + // FIXME + return false; +} + +double +octave_float_matrix::double_value (bool) const +{ + double retval = lo_ieee_nan_value (); + + if (numel () > 0) + { + gripe_implicit_conversion ("Octave:array-as-scalar", + "real matrix", "real scalar"); + + retval = matrix (0, 0); + } + else + gripe_invalid_conversion ("real matrix", "real scalar"); + + return retval; +} + +float +octave_float_matrix::float_value (bool) const +{ + float retval = lo_ieee_float_nan_value (); + + if (numel () > 0) + { + gripe_implicit_conversion ("Octave:array-as-scalar", + "real matrix", "real scalar"); + + retval = matrix (0, 0); + } + else + gripe_invalid_conversion ("real matrix", "real scalar"); + + return retval; +} + +// FIXME + +Matrix +octave_float_matrix::matrix_value (bool) const +{ + return Matrix (matrix.matrix_value ()); +} + +FloatMatrix +octave_float_matrix::float_matrix_value (bool) const +{ + return matrix.matrix_value (); +} + +Complex +octave_float_matrix::complex_value (bool) const +{ + double tmp = lo_ieee_nan_value (); + + Complex retval (tmp, tmp); + + if (rows () > 0 && columns () > 0) + { + gripe_implicit_conversion ("Octave:array-as-scalar", + "real matrix", "complex scalar"); + + retval = matrix (0, 0); + } + else + gripe_invalid_conversion ("real matrix", "complex scalar"); + + return retval; +} + +FloatComplex +octave_float_matrix::float_complex_value (bool) const +{ + double tmp = lo_ieee_float_nan_value (); + + FloatComplex retval (tmp, tmp); + + if (rows () > 0 && columns () > 0) + { + gripe_implicit_conversion ("Octave:array-as-scalar", + "real matrix", "complex scalar"); + + retval = matrix (0, 0); + } + else + gripe_invalid_conversion ("real matrix", "complex scalar"); + + return retval; +} + +// FIXME + +ComplexMatrix +octave_float_matrix::complex_matrix_value (bool) const +{ + return ComplexMatrix (matrix.matrix_value ()); +} + +FloatComplexMatrix +octave_float_matrix::float_complex_matrix_value (bool) const +{ + return FloatComplexMatrix (matrix.matrix_value ()); +} + +ComplexNDArray +octave_float_matrix::complex_array_value (bool) const +{ + return ComplexNDArray (matrix); +} + +FloatComplexNDArray +octave_float_matrix::float_complex_array_value (bool) const +{ + return FloatComplexNDArray (matrix); +} + +NDArray +octave_float_matrix::array_value (bool) const +{ + return NDArray (matrix); +} + +boolNDArray +octave_float_matrix::bool_array_value (bool warn) const +{ + if (warn && matrix.any_element_not_one_or_zero ()) + gripe_logical_conversion (); + + return boolNDArray (matrix); +} + +charNDArray +octave_float_matrix::char_array_value (bool) const +{ + charNDArray retval (dims ()); + + octave_idx_type nel = numel (); + + for (octave_idx_type i = 0; i < nel; i++) + retval.elem (i) = static_cast(matrix.elem (i)); + + return retval; +} + +SparseMatrix +octave_float_matrix::sparse_matrix_value (bool) const +{ + return SparseMatrix (matrix.matrix_value ()); +} + +SparseComplexMatrix +octave_float_matrix::sparse_complex_matrix_value (bool) const +{ + // FIXME Need a SparseComplexMatrix (Matrix) constructor to make + // this function more efficient. Then this should become + // return SparseComplexMatrix (matrix.matrix_value ()); + return SparseComplexMatrix (sparse_matrix_value ()); +} + +streamoff_array +octave_float_matrix::streamoff_array_value (void) const +{ + streamoff_array retval (dims ()); + + octave_idx_type nel = numel (); + + for (octave_idx_type i = 0; i < nel; i++) + { + float d = matrix(i); + + if (F_NINT (d) == d) + retval(i) = std::streamoff (static_cast (d)); + else + { + error ("conversion to streamoff_array value failed"); + break; + } + } + + return retval; +} + +octave_value +octave_float_matrix::convert_to_str_internal (bool, bool, char type) const +{ + octave_value retval; + dim_vector dv = dims (); + octave_idx_type nel = dv.numel (); + + charNDArray chm (dv); + + bool warned = false; + + for (octave_idx_type i = 0; i < nel; i++) + { + OCTAVE_QUIT; + + float d = matrix (i); + + if (xisnan (d)) + { + ::error ("invalid conversion from NaN to character"); + return retval; + } + else + { + int ival = NINT (d); + + if (ival < 0 || ival > UCHAR_MAX) + { + // FIXME -- is there something + // better we could do? + + ival = 0; + + if (! warned) + { + ::warning ("range error for conversion to character value"); + warned = true; + } + } + + chm (i) = static_cast (ival); + } + } + + retval = octave_value (chm, true, type); + + return retval; +} + +bool +octave_float_matrix::save_ascii (std::ostream& os) +{ + dim_vector d = dims (); + + if (d.length () > 2) + { + FloatNDArray tmp = float_array_value (); + + os << "# ndims: " << d.length () << "\n"; + + for (int i=0; i < d.length (); i++) + os << " " << d (i); + + os << "\n" << tmp; + } + else + { + // Keep this case, rather than use generic code above for backward + // compatiability. Makes load_ascii much more complex!! + os << "# rows: " << rows () << "\n" + << "# columns: " << columns () << "\n"; + + os << float_matrix_value (); + } + + return true; +} + +bool +octave_float_matrix::load_ascii (std::istream& is) +{ + bool success = true; + + string_vector keywords(2); + + keywords[0] = "ndims"; + keywords[1] = "rows"; + + std::string kw; + octave_idx_type val = 0; + + if (extract_keyword (is, keywords, kw, val, true)) + { + if (kw == "ndims") + { + int mdims = static_cast (val); + + if (mdims >= 0) + { + dim_vector dv; + dv.resize (mdims); + + for (int i = 0; i < mdims; i++) + is >> dv(i); + + if (is) + { + FloatNDArray tmp(dv); + + if (tmp.is_empty ()) + matrix = tmp; + else + { + is >> tmp; + + if (is) + matrix = tmp; + else + { + error ("load: failed to load matrix constant"); + success = false; + } + } + } + else + { + error ("load: failed to read dimensions"); + success = false; + } + } + else + { + error ("load: failed to extract number of dimensions"); + success = false; + } + } + else if (kw == "rows") + { + octave_idx_type nr = val; + octave_idx_type nc = 0; + + if (nr >= 0 && extract_keyword (is, "columns", nc) && nc >= 0) + { + if (nr > 0 && nc > 0) + { + FloatMatrix tmp (nr, nc); + is >> tmp; + if (is) + matrix = tmp; + else + { + error ("load: failed to load matrix constant"); + success = false; + } + } + else if (nr == 0 || nc == 0) + matrix = FloatMatrix (nr, nc); + else + panic_impossible (); + } + else + { + error ("load: failed to extract number of rows and columns"); + success = false; + } + } + else + panic_impossible (); + } + else + { + error ("load: failed to extract number of rows and columns"); + success = false; + } + + return success; +} + +bool +octave_float_matrix::save_binary (std::ostream& os, bool&) +{ + + dim_vector d = dims (); + if (d.length() < 1) + return false; + + // Use negative value for ndims to differentiate with old format!! + int32_t tmp = - d.length(); + os.write (reinterpret_cast (&tmp), 4); + for (int i = 0; i < d.length (); i++) + { + tmp = d(i); + os.write (reinterpret_cast (&tmp), 4); + } + + FloatNDArray m = float_array_value (); + save_type st = LS_FLOAT; + if (d.numel () > 8192) // FIXME -- make this configurable. + { + float max_val, min_val; + if (m.all_integers (max_val, min_val)) + st = get_save_type (max_val, min_val); + } + + const float *mtmp = m.data (); + write_floats (os, mtmp, st, d.numel ()); + + return true; +} + +bool +octave_float_matrix::load_binary (std::istream& is, bool swap, + oct_mach_info::float_format fmt) +{ + char tmp; + int32_t mdims; + if (! is.read (reinterpret_cast (&mdims), 4)) + return false; + if (swap) + swap_bytes<4> (&mdims); + if (mdims < 0) + { + mdims = - mdims; + int32_t di; + dim_vector dv; + dv.resize (mdims); + + for (int i = 0; i < mdims; i++) + { + if (! is.read (reinterpret_cast (&di), 4)) + return false; + if (swap) + swap_bytes<4> (&di); + dv(i) = di; + } + + // Convert an array with a single dimension to be a row vector. + // Octave should never write files like this, other software + // might. + + if (mdims == 1) + { + mdims = 2; + dv.resize (mdims); + dv(1) = dv(0); + dv(0) = 1; + } + + if (! is.read (reinterpret_cast (&tmp), 1)) + return false; + + FloatNDArray m(dv); + float *re = m.fortran_vec (); + read_floats (is, re, static_cast (tmp), dv.numel (), swap, fmt); + if (error_state || ! is) + return false; + matrix = m; + } + else + { + int32_t nr, nc; + nr = mdims; + if (! is.read (reinterpret_cast (&nc), 4)) + return false; + if (swap) + swap_bytes<4> (&nc); + if (! is.read (reinterpret_cast (&tmp), 1)) + return false; + FloatMatrix m (nr, nc); + float *re = m.fortran_vec (); + octave_idx_type len = nr * nc; + read_floats (is, re, static_cast (tmp), len, swap, fmt); + if (error_state || ! is) + return false; + matrix = m; + } + return true; +} + +#if defined (HAVE_HDF5) + +bool +octave_float_matrix::save_hdf5 (hid_t loc_id, const char *name, bool) +{ + dim_vector dv = dims (); + int empty = save_hdf5_empty (loc_id, name, dv); + if (empty) + return (empty > 0); + + int rank = dv.length (); + hid_t space_hid = -1, data_hid = -1; + bool retval = true; + FloatNDArray m = array_value (); + + OCTAVE_LOCAL_BUFFER (hsize_t, hdims, rank); + + // Octave uses column-major, while HDF5 uses row-major ordering + for (int i = 0; i < rank; i++) + hdims[i] = dv (rank-i-1); + + space_hid = H5Screate_simple (rank, hdims, 0); + + if (space_hid < 0) return false; + + hid_t save_type_hid = H5T_NATIVE_FLOAT; + +#if HAVE_HDF5_INT2FLOAT_CONVERSIONS + // hdf5 currently doesn't support float/integer conversions + else + { + float max_val, min_val; + + if (m.all_integers (max_val, min_val)) + save_type_hid + = save_type_to_hdf5 (get_save_type (max_val, min_val)); + } +#endif /* HAVE_HDF5_INT2FLOAT_CONVERSIONS */ + + data_hid = H5Dcreate (loc_id, name, save_type_hid, space_hid, + H5P_DEFAULT); + if (data_hid < 0) + { + H5Sclose (space_hid); + return false; + } + + float *mtmp = m.fortran_vec (); + retval = H5Dwrite (data_hid, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, + H5P_DEFAULT, mtmp) >= 0; + + H5Dclose (data_hid); + H5Sclose (space_hid); + + return retval; +} + +bool +octave_float_matrix::load_hdf5 (hid_t loc_id, const char *name, + bool /* have_h5giterate_bug */) +{ + bool retval = false; + + dim_vector dv; + int empty = load_hdf5_empty (loc_id, name, dv); + if (empty > 0) + matrix.resize(dv); + if (empty) + return (empty > 0); + + hid_t data_hid = H5Dopen (loc_id, name); + hid_t space_id = H5Dget_space (data_hid); + + hsize_t rank = H5Sget_simple_extent_ndims (space_id); + + if (rank < 1) + { + H5Sclose (space_id); + H5Dclose (data_hid); + return false; + } + + OCTAVE_LOCAL_BUFFER (hsize_t, hdims, rank); + OCTAVE_LOCAL_BUFFER (hsize_t, maxdims, rank); + + H5Sget_simple_extent_dims (space_id, hdims, maxdims); + + // Octave uses column-major, while HDF5 uses row-major ordering + if (rank == 1) + { + dv.resize (2); + dv(0) = 1; + dv(1) = hdims[0]; + } + else + { + dv.resize (rank); + for (hsize_t i = 0, j = rank - 1; i < rank; i++, j--) + dv(j) = hdims[i]; + } + + FloatNDArray m (dv); + float *re = m.fortran_vec (); + if (H5Dread (data_hid, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, + H5P_DEFAULT, re) >= 0) + { + retval = true; + matrix = m; + } + + H5Sclose (space_id); + H5Dclose (data_hid); + + return retval; +} + +#endif + +void +octave_float_matrix::print_raw (std::ostream& os, + bool pr_as_read_syntax) const +{ + octave_print_internal (os, matrix, pr_as_read_syntax, + current_print_indent_level ()); +} + +mxArray * +octave_float_matrix::as_mxArray (void) const +{ + mxArray *retval = new mxArray (mxSINGLE_CLASS, dims (), mxREAL); + + float *pr = static_cast (retval->get_data ()); + + mwSize nel = numel (); + + const float *p = matrix.data (); + + for (mwIndex i = 0; i < nel; i++) + pr[i] = p[i]; + + return retval; +} + +static bool +any_element_less_than (const FloatNDArray& a, float val) +{ + octave_idx_type len = a.length (); + const float *m = a.fortran_vec (); + + for (octave_idx_type i = 0; i < len; i++) + { + OCTAVE_QUIT; + + if (m[i] < val) + return true; + } + + return false; +} + +static bool +any_element_greater_than (const FloatNDArray& a, float val) +{ + octave_idx_type len = a.length (); + const float *m = a.fortran_vec (); + + for (octave_idx_type i = 0; i < len; i++) + { + OCTAVE_QUIT; + + if (m[i] > val) + return true; + } + + return false; +} + +#define ARRAY_MAPPER(MAP, AMAP, FCN) \ + octave_value \ + octave_float_matrix::MAP (void) const \ + { \ + static AMAP dmap = FCN; \ + return matrix.map (dmap); \ + } + +#define CD_ARRAY_MAPPER(MAP, RFCN, CFCN, L1, L2) \ + octave_value \ + octave_float_matrix::MAP (void) const \ + { \ + static FloatNDArray::dmapper dmap = RFCN; \ + static FloatNDArray::cmapper cmap = CFCN; \ + \ + return (any_element_less_than (matrix, L1) \ + ? octave_value (matrix.map (cmap)) \ + : (any_element_greater_than (matrix, L2) \ + ? octave_value (matrix.map (cmap)) \ + : octave_value (matrix.map (dmap)))); \ + } + +static float +xconj (float x) +{ + return x; +} + +ARRAY_MAPPER (erf, FloatNDArray::dmapper, ::erff) +ARRAY_MAPPER (erfc, FloatNDArray::dmapper, ::erfcf) +ARRAY_MAPPER (gamma, FloatNDArray::dmapper, xgamma) +CD_ARRAY_MAPPER (lgamma, xlgamma, xlgamma, 0.0, octave_Inf) +ARRAY_MAPPER (abs, FloatNDArray::dmapper, ::fabsf) +ARRAY_MAPPER (acos, FloatNDArray::dmapper, ::acosf) +CD_ARRAY_MAPPER (acosh, ::acoshf, ::acosh, 1.0, octave_Inf) +ARRAY_MAPPER (angle, FloatNDArray::dmapper, ::arg) +ARRAY_MAPPER (arg, FloatNDArray::dmapper, ::arg) +CD_ARRAY_MAPPER (asin, ::asinf, ::asin, -1.0, 1.0) +ARRAY_MAPPER (asinh, FloatNDArray::dmapper,::asinhf) +ARRAY_MAPPER (atan, FloatNDArray::dmapper, ::atanf) +CD_ARRAY_MAPPER (atanh, ::atanhf, ::atanh, -1.0, 1.0) +ARRAY_MAPPER (ceil, FloatNDArray::dmapper, ::ceilf) +ARRAY_MAPPER (conj, FloatNDArray::dmapper, xconj) +ARRAY_MAPPER (cos, FloatNDArray::dmapper, ::cosf) +ARRAY_MAPPER (cosh, FloatNDArray::dmapper, ::coshf) +ARRAY_MAPPER (exp, FloatNDArray::dmapper, ::expf) +ARRAY_MAPPER (expm1, FloatNDArray::dmapper, ::expm1f) +ARRAY_MAPPER (fix, FloatNDArray::dmapper, ::fix) +ARRAY_MAPPER (floor, FloatNDArray::dmapper, ::floorf) +ARRAY_MAPPER (imag, FloatNDArray::dmapper, ::imag) +CD_ARRAY_MAPPER (log, ::logf, std::log, 0.0, octave_Inf) +CD_ARRAY_MAPPER (log2, xlog2, xlog2, 0.0, octave_Inf) +CD_ARRAY_MAPPER (log10, ::log10f, std::log10, 0.0, octave_Inf) +CD_ARRAY_MAPPER (log1p, ::log1pf, ::log1pf, -1.0, octave_Inf) +ARRAY_MAPPER (real, FloatNDArray::dmapper, ::real) +ARRAY_MAPPER (round, FloatNDArray::dmapper, xround) +ARRAY_MAPPER (roundb, FloatNDArray::dmapper, xroundb) +ARRAY_MAPPER (signum, FloatNDArray::dmapper, ::signum) +ARRAY_MAPPER (sin, FloatNDArray::dmapper, ::sinf) +ARRAY_MAPPER (sinh, FloatNDArray::dmapper, ::sinhf) +CD_ARRAY_MAPPER (sqrt, ::sqrtf, std::sqrt, 0.0, octave_Inf) +ARRAY_MAPPER (tan, FloatNDArray::dmapper, ::tanf) +ARRAY_MAPPER (tanh, FloatNDArray::dmapper, ::tanhf) +ARRAY_MAPPER (finite, FloatNDArray::bmapper, xfinite) +ARRAY_MAPPER (isinf, FloatNDArray::bmapper, xisinf) +ARRAY_MAPPER (isna, FloatNDArray::bmapper, octave_is_NA) +ARRAY_MAPPER (isnan, FloatNDArray::bmapper, xisnan) + +DEFUN (single, args, , + "-*- texinfo -*-\n\ +@deftypefn {Built-in Function} {} double (@var{x})\n\ +Convert @var{x} to single precision type.\n\ +@end deftypefn") +{ + // The OCTAVE_TYPE_CONV_BODY3 macro declares retval, so they go + // inside their own scopes, and we don't declare retval here to + // avoid a shadowed declaration warning. + + if (args.length () == 1) + { + if (args(0).is_sparse_type ()) + { + error ("single: sparse type do not support single precision"); + } + else if (args(0).is_complex_type ()) + { + OCTAVE_TYPE_CONV_BODY3 (single, octave_float_complex_matrix, octave_float_complex); + } + else + { + OCTAVE_TYPE_CONV_BODY3 (single, octave_float_matrix, octave_float_scalar); + } + } + else + print_usage (); + + return octave_value (); +} + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 src/ov-flt-re-mat.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/ov-flt-re-mat.h Sun Apr 27 22:34:17 2008 +0200 @@ -0,0 +1,250 @@ +/* + +Copyright (C) 1996, 1997, 1998, 2000, 2002, 2003, 2004, 2005, 2006, + 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#if !defined (octave_float_matrix_h) +#define octave_float_matrix_h 1 + +#include + +#include +#include + +#include "mx-base.h" +#include "oct-alloc.h" +#include "so-array.h" +#include "str-vec.h" + +#include "error.h" +#include "oct-stream.h" +#include "ov-base.h" +#include "ov-base-mat.h" +#include "ov-typeinfo.h" + +#include "MatrixType.h" + +class Octave_map; +class octave_value_list; + +class tree_walker; + +// Real matrix values. + +class +OCTINTERP_API +octave_float_matrix : public octave_base_matrix +{ +public: + + octave_float_matrix (void) + : octave_base_matrix () { } + + octave_float_matrix (const FloatMatrix& m) + : octave_base_matrix (m) { } + + octave_float_matrix (const FloatMatrix& m, const MatrixType& t) + : octave_base_matrix (m, t) { } + + octave_float_matrix (const FloatNDArray& nda) + : octave_base_matrix (nda) { } + + octave_float_matrix (const ArrayN& m) + : octave_base_matrix (FloatNDArray (m)) { } + + octave_float_matrix (const FloatDiagMatrix& d) + : octave_base_matrix (FloatMatrix (d)) { } + + octave_float_matrix (const FloatRowVector& v) + : octave_base_matrix (FloatMatrix (v)) { } + + octave_float_matrix (const FloatColumnVector& v) + : octave_base_matrix (FloatMatrix (v)) { } + + octave_float_matrix (const octave_float_matrix& m) + : octave_base_matrix (m) { } + + ~octave_float_matrix (void) { } + + octave_base_value *clone (void) const { return new octave_float_matrix (*this); } + octave_base_value *empty_clone (void) const { return new octave_float_matrix (); } + + octave_base_value *try_narrowing_conversion (void); + + idx_vector index_vector (void) const { return idx_vector (matrix); } + + bool is_real_matrix (void) const { return true; } + + bool is_real_type (void) const { return true; } + + bool is_single_type (void) const { return true; } + + bool is_float_type (void) const { return true; } + + bool valid_as_scalar_index (void) const; + + int8NDArray + int8_array_value (void) const { return int8NDArray (matrix); } + + int16NDArray + int16_array_value (void) const { return int16NDArray (matrix); } + + int32NDArray + int32_array_value (void) const { return int32NDArray (matrix); } + + int64NDArray + int64_array_value (void) const { return int64NDArray (matrix); } + + uint8NDArray + uint8_array_value (void) const { return uint8NDArray (matrix); } + + uint16NDArray + uint16_array_value (void) const { return uint16NDArray (matrix); } + + uint32NDArray + uint32_array_value (void) const { return uint32NDArray (matrix); } + + uint64NDArray + uint64_array_value (void) const { return uint64NDArray (matrix); } + + double double_value (bool = false) const; + + float float_value (bool = false) const; + + double scalar_value (bool frc_str_conv = false) const + { return double_value (frc_str_conv); } + + float float_scalar_value (bool frc_str_conv = false) const + { return float_value (frc_str_conv); } + + Matrix matrix_value (bool = false) const; + + FloatMatrix float_matrix_value (bool = false) const; + + Complex complex_value (bool = false) const; + + FloatComplex float_complex_value (bool = false) const; + + ComplexMatrix complex_matrix_value (bool = false) const; + + FloatComplexMatrix float_complex_matrix_value (bool = false) const; + + ComplexNDArray complex_array_value (bool = false) const; + + FloatComplexNDArray float_complex_array_value (bool = false) const; + + boolNDArray bool_array_value (bool warn = false) const; + + charNDArray char_array_value (bool = false) const; + + NDArray array_value (bool = false) const; + + FloatNDArray float_array_value (bool = false) const { return matrix; } + + SparseMatrix sparse_matrix_value (bool = false) const; + + SparseComplexMatrix sparse_complex_matrix_value (bool = false) const; + + streamoff_array streamoff_array_value (void) const; + + void increment (void) { matrix += 1.0; } + + void decrement (void) { matrix -= 1.0; } + + octave_value convert_to_str_internal (bool pad, bool force, char type) const; + + void print_raw (std::ostream& os, bool pr_as_read_syntax = false) const; + + bool save_ascii (std::ostream& os); + + bool load_ascii (std::istream& is); + + bool save_binary (std::ostream& os, bool& save_as_floats); + + bool load_binary (std::istream& is, bool swap, + oct_mach_info::float_format fmt); + +#if defined (HAVE_HDF5) + bool save_hdf5 (hid_t loc_id, const char *name, bool save_as_floats); + + bool load_hdf5 (hid_t loc_id, const char *name, bool have_h5giterate_bug); +#endif + + int write (octave_stream& os, int block_size, + oct_data_conv::data_type output_type, int skip, + oct_mach_info::float_format flt_fmt) const + { return os.write (matrix, block_size, output_type, skip, flt_fmt); } + + mxArray *as_mxArray (void) const; + + octave_value erf (void) const; + octave_value erfc (void) const; + octave_value gamma (void) const; + octave_value lgamma (void) const; + octave_value abs (void) const; + octave_value acos (void) const; + octave_value acosh (void) const; + octave_value angle (void) const; + octave_value arg (void) const; + octave_value asin (void) const; + octave_value asinh (void) const; + octave_value atan (void) const; + octave_value atanh (void) const; + octave_value ceil (void) const; + octave_value conj (void) const; + octave_value cos (void) const; + octave_value cosh (void) const; + octave_value exp (void) const; + octave_value expm1 (void) const; + octave_value fix (void) const; + octave_value floor (void) const; + octave_value imag (void) const; + octave_value log (void) const; + octave_value log2 (void) const; + octave_value log10 (void) const; + octave_value log1p (void) const; + octave_value real (void) const; + octave_value round (void) const; + octave_value roundb (void) const; + octave_value signum (void) const; + octave_value sin (void) const; + octave_value sinh (void) const; + octave_value sqrt (void) const; + octave_value tan (void) const; + octave_value tanh (void) const; + octave_value finite (void) const; + octave_value isinf (void) const; + octave_value isna (void) const; + octave_value isnan (void) const; + +private: + DECLARE_OCTAVE_ALLOCATOR + + DECLARE_OV_TYPEID_FUNCTIONS_AND_DATA +}; + +#endif + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f5faba05a2 -r 82be108cc558 src/ov-intx.h --- a/src/ov-intx.h Wed May 14 18:09:56 2008 +0200 +++ b/src/ov-intx.h Sun Apr 27 22:34:17 2008 +0200 @@ -111,8 +111,29 @@ } + float + float_value (bool = false) const + { + float retval = lo_ieee_float_nan_value (); + + if (numel () > 0) + { + gripe_implicit_conversion ("Octave:array-as-scalar", + type_name (), "real scalar"); + + retval = matrix(0).float_value (); + } + else + gripe_invalid_conversion (type_name (), "real scalar"); + + return retval; + + } + double scalar_value (bool = false) const { return double_value (); } + float float_scalar_value (bool = false) const { return float_value (); } + Matrix matrix_value (bool = false) const { @@ -131,6 +152,24 @@ return retval; } + FloatMatrix + float_matrix_value (bool = false) const + { + FloatMatrix retval; + dim_vector dv = dims (); + if (dv.length () > 2) + error ("invalid conversion of %s to FloatMatrix", type_name().c_str ()); + else + { + retval = FloatMatrix (dv(0), dv(1)); + float *vec = retval.fortran_vec (); + octave_idx_type nel = matrix.numel (); + for (octave_idx_type i = 0; i < nel; i++) + vec[i] = matrix(i).float_value (); + } + return retval; + } + ComplexMatrix complex_matrix_value (bool = false) const { @@ -149,6 +188,24 @@ return retval; } + FloatComplexMatrix + float_complex_matrix_value (bool = false) const + { + FloatComplexMatrix retval; + dim_vector dv = dims(); + if (dv.length () > 2) + error ("invalid conversion of %s to FloatMatrix", type_name().c_str ()); + else + { + retval = FloatComplexMatrix (dv(0), dv(1)); + FloatComplex *vec = retval.fortran_vec (); + octave_idx_type nel = matrix.numel (); + for (octave_idx_type i = 0; i < nel; i++) + vec[i] = FloatComplex (matrix(i).float_value ()); + } + return retval; + } + NDArray array_value (bool = false) const { @@ -160,6 +217,17 @@ return retval; } + FloatNDArray + float_array_value (bool = false) const + { + FloatNDArray retval (matrix.dims ()); + float *vec = retval.fortran_vec (); + octave_idx_type nel = matrix.numel (); + for (octave_idx_type i = 0; i < nel; i++) + vec[i] = matrix(i).float_value (); + return retval; + } + ComplexNDArray complex_array_value (bool = false) const { @@ -171,6 +239,17 @@ return retval; } + FloatComplexNDArray + float_complex_array_value (bool = false) const + { + FloatComplexNDArray retval (matrix.dims ()); + FloatComplex *vec = retval.fortran_vec (); + octave_idx_type nel = matrix.numel (); + for (octave_idx_type i = 0; i < nel; i++) + vec[i] = FloatComplex (matrix(i).float_value ()); + return retval; + } + boolNDArray bool_array_value (bool warn = false) const { @@ -403,8 +482,12 @@ double double_value (bool = false) const { return scalar.double_value (); } + float float_value (bool = false) const { return scalar.float_value (); } + double scalar_value (bool = false) const { return scalar.double_value (); } + float float_scalar_value (bool = false) const { return scalar.float_value (); } + Matrix matrix_value (bool = false) const { @@ -413,6 +496,14 @@ return retval; } + FloatMatrix + float_matrix_value (bool = false) const + { + FloatMatrix retval (1, 1); + retval(0,0) = scalar.float_value (); + return retval; + } + ComplexMatrix complex_matrix_value (bool = false) const { @@ -421,6 +512,13 @@ return retval; } + FloatComplexMatrix + float_complex_matrix_value (bool = false) const + { + FloatComplexMatrix retval (1, 1); + retval(0,0) = FloatComplex (scalar.float_value ()); + return retval; + } NDArray array_value (bool = false) const @@ -430,11 +528,27 @@ return retval; } + FloatNDArray + float_array_value (bool = false) const + { + FloatNDArray retval (dim_vector (1, 1)); + retval(0) = scalar.float_value (); + return retval; + } + ComplexNDArray complex_array_value (bool = false) const { ComplexNDArray retval (dim_vector (1, 1)); - retval(0) = Complex (scalar.double_value ()); + retval(0) = FloatComplex (scalar.double_value ()); + return retval; + } + + FloatComplexNDArray + float_complex_array_value (bool = false) const + { + FloatComplexNDArray retval (dim_vector (1, 1)); + retval(0) = FloatComplex (scalar.float_value ()); return retval; } diff -r 45f5faba05a2 -r 82be108cc558 src/ov-range.cc --- a/src/ov-range.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/ov-range.cc Sun Apr 27 22:34:17 2008 +0200 @@ -147,6 +147,26 @@ return retval; } +float +octave_range::float_value (bool) const +{ + float retval = lo_ieee_float_nan_value (); + + octave_idx_type nel = range.nelem (); + + if (nel > 0) + { + gripe_implicit_conversion ("Octave:array-as-scalar", + "range", "real scalar"); + + retval = range.base (); + } + else + gripe_invalid_conversion ("range", "real scalar"); + + return retval; +} + octave_value octave_range::all (int dim) const { @@ -206,6 +226,28 @@ return retval; } +FloatComplex +octave_range::float_complex_value (bool) const +{ + float tmp = lo_ieee_float_nan_value (); + + FloatComplex retval (tmp, tmp); + + octave_idx_type nel = range.nelem (); + + if (nel > 0) + { + gripe_implicit_conversion ("Octave:array-as-scalar", + "range", "complex scalar"); + + retval = range.base (); + } + else + gripe_invalid_conversion ("range", "complex scalar"); + + return retval; +} + octave_value octave_range::resize (const dim_vector& dv, bool fill) const { diff -r 45f5faba05a2 -r 82be108cc558 src/ov-range.h --- a/src/ov-range.h Wed May 14 18:09:56 2008 +0200 +++ b/src/ov-range.h Sun Apr 27 22:34:17 2008 +0200 @@ -165,15 +165,26 @@ double double_value (bool = false) const; + float float_value (bool = false) const; + double scalar_value (bool frc_str_conv = false) const { return double_value (frc_str_conv); } + float float_scalar_value (bool frc_str_conv = false) const + { return float_value (frc_str_conv); } + Matrix matrix_value (bool = false) const { return range.matrix_value (); } + FloatMatrix float_matrix_value (bool = false) const + { return range.matrix_value (); } + NDArray array_value (bool = false) const { return range.matrix_value (); } + FloatNDArray float_array_value (bool = false) const + { return FloatMatrix (range.matrix_value ()); } + // FIXME -- it would be better to have Range::intXNDArray_value // functions to avoid the intermediate conversion to a matrix // object. @@ -210,6 +221,8 @@ Complex complex_value (bool = false) const; + FloatComplex float_complex_value (bool = false) const; + boolNDArray bool_array_value (bool warn = false) const { Matrix m = range.matrix_value (); @@ -223,9 +236,15 @@ ComplexMatrix complex_matrix_value (bool = false) const { return ComplexMatrix (range.matrix_value ()); } + FloatComplexMatrix float_complex_matrix_value (bool = false) const + { return FloatComplexMatrix (range.matrix_value ()); } + ComplexNDArray complex_array_value (bool = false) const { return ComplexMatrix (range.matrix_value ()); } + FloatComplexNDArray float_complex_array_value (bool = false) const + { return FloatComplexMatrix (range.matrix_value ()); } + Range range_value (void) const { return range; } octave_value convert_to_str_internal (bool pad, bool force, char type) const; diff -r 45f5faba05a2 -r 82be108cc558 src/ov-re-mat.cc --- a/src/ov-re-mat.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/ov-re-mat.cc Sun Apr 27 22:34:17 2008 +0200 @@ -50,6 +50,9 @@ #include "ov-base-mat.cc" #include "ov-scalar.h" #include "ov-re-mat.h" +#include "ov-flt-re-mat.h" +#include "ov-complex.h" +#include "ov-cx-mat.h" #include "ov-re-sparse.h" #include "ov-type-conv.h" #include "pr-output.h" @@ -70,6 +73,20 @@ DEFINE_OV_TYPEID_FUNCTIONS_AND_DATA (octave_matrix, "matrix", "double"); +static octave_base_value * +default_numeric_demotion_function (const octave_base_value& a) +{ + CAST_CONV_ARG (const octave_matrix&); + + return new octave_float_matrix (v.float_matrix_value ()); +} + +octave_base_value::type_conv_fcn +octave_matrix::numeric_demotion_function (void) const +{ + return default_numeric_demotion_function; +} + octave_base_value * octave_matrix::try_narrowing_conversion (void) { @@ -106,6 +123,24 @@ return retval; } +float +octave_matrix::float_value (bool) const +{ + float retval = lo_ieee_float_nan_value (); + + if (numel () > 0) + { + gripe_implicit_conversion ("Octave:array-as-scalar", + "real matrix", "real scalar"); + + retval = matrix (0, 0); + } + else + gripe_invalid_conversion ("real matrix", "real scalar"); + + return retval; +} + // FIXME Matrix @@ -114,6 +149,12 @@ return matrix.matrix_value (); } +FloatMatrix +octave_matrix::float_matrix_value (bool) const +{ + return FloatMatrix (matrix.matrix_value ()); +} + Complex octave_matrix::complex_value (bool) const { @@ -134,6 +175,26 @@ return retval; } +FloatComplex +octave_matrix::float_complex_value (bool) const +{ + float tmp = lo_ieee_float_nan_value (); + + FloatComplex retval (tmp, tmp); + + if (rows () > 0 && columns () > 0) + { + gripe_implicit_conversion ("Octave:array-as-scalar", + "real matrix", "complex scalar"); + + retval = matrix (0, 0); + } + else + gripe_invalid_conversion ("real matrix", "complex scalar"); + + return retval; +} + // FIXME ComplexMatrix @@ -142,12 +203,24 @@ return ComplexMatrix (matrix.matrix_value ()); } +FloatComplexMatrix +octave_matrix::float_complex_matrix_value (bool) const +{ + return FloatComplexMatrix (matrix.matrix_value ()); +} + ComplexNDArray octave_matrix::complex_array_value (bool) const { return ComplexNDArray (matrix); } +FloatComplexNDArray +octave_matrix::float_complex_array_value (bool) const +{ + return FloatComplexNDArray (matrix); +} + boolNDArray octave_matrix::bool_array_value (bool warn) const { @@ -766,7 +839,18 @@ { if (args(0).is_sparse_type ()) { - OCTAVE_TYPE_CONV_BODY3 (double, octave_sparse_matrix, octave_scalar); + if (args(0).is_complex_type ()) + { + OCTAVE_TYPE_CONV_BODY3 (double, octave_sparse_complex_matrix, octave_complex); + } + else + { + OCTAVE_TYPE_CONV_BODY3 (double, octave_sparse_matrix, octave_scalar); + } + } + else if (args(0).is_complex_type ()) + { + OCTAVE_TYPE_CONV_BODY3 (double, octave_complex_matrix, octave_complex); } else { diff -r 45f5faba05a2 -r 82be108cc558 src/ov-re-mat.h --- a/src/ov-re-mat.h Wed May 14 18:09:56 2008 +0200 +++ b/src/ov-re-mat.h Sun Apr 27 22:34:17 2008 +0200 @@ -87,6 +87,8 @@ octave_base_value *clone (void) const { return new octave_matrix (*this); } octave_base_value *empty_clone (void) const { return new octave_matrix (); } + type_conv_fcn numeric_demotion_function (void) const; + octave_base_value *try_narrowing_conversion (void); idx_vector index_vector (void) const { return idx_vector (matrix); } @@ -127,23 +129,35 @@ double double_value (bool = false) const; + float float_value (bool = false) const; + double scalar_value (bool frc_str_conv = false) const { return double_value (frc_str_conv); } Matrix matrix_value (bool = false) const; + FloatMatrix float_matrix_value (bool = false) const; + Complex complex_value (bool = false) const; + FloatComplex float_complex_value (bool = false) const; + ComplexMatrix complex_matrix_value (bool = false) const; + FloatComplexMatrix float_complex_matrix_value (bool = false) const; + ComplexNDArray complex_array_value (bool = false) const; + FloatComplexNDArray float_complex_array_value (bool = false) const; + boolNDArray bool_array_value (bool warn = false) const; charNDArray char_array_value (bool = false) const; NDArray array_value (bool = false) const { return matrix; } + FloatNDArray float_array_value (bool = false) const { return matrix; } + SparseMatrix sparse_matrix_value (bool = false) const; SparseComplexMatrix sparse_complex_matrix_value (bool = false) const; diff -r 45f5faba05a2 -r 82be108cc558 src/ov-scalar.cc --- a/src/ov-scalar.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/ov-scalar.cc Sun Apr 27 22:34:17 2008 +0200 @@ -37,6 +37,7 @@ #include "oct-obj.h" #include "oct-stream.h" #include "ov-scalar.h" +#include "ov-float.h" #include "ov-base.h" #include "ov-base-scalar.h" #include "ov-base-scalar.cc" @@ -45,6 +46,7 @@ #include "pr-output.h" #include "xdiv.h" #include "xpow.h" +#include "ops.h" #include "ls-oct-ascii.h" #include "ls-hdf5.h" @@ -55,6 +57,20 @@ DEFINE_OV_TYPEID_FUNCTIONS_AND_DATA (octave_scalar, "scalar", "double"); +static octave_base_value * +default_numeric_demotion_function (const octave_base_value& a) +{ + CAST_CONV_ARG (const octave_scalar&); + + return new octave_float_scalar (v.float_value ()); +} + +octave_base_value::type_conv_fcn +octave_scalar::numeric_demotion_function (void) const +{ + return default_numeric_demotion_function; +} + octave_value octave_scalar::do_index_op (const octave_value_list& idx, bool resize_ok) { diff -r 45f5faba05a2 -r 82be108cc558 src/ov-scalar.h --- a/src/ov-scalar.h Wed May 14 18:09:56 2008 +0200 +++ b/src/ov-scalar.h Sun Apr 27 22:34:17 2008 +0200 @@ -76,6 +76,8 @@ octave_value do_index_op (const octave_value_list& idx, bool resize_ok = false); + type_conv_fcn numeric_demotion_function (void) const; + idx_vector index_vector (void) const { return idx_vector (scalar); } octave_value any (int = 0) const @@ -137,14 +139,24 @@ double double_value (bool = false) const { return scalar; } + float float_value (bool = false) const { return static_cast (scalar); } + double scalar_value (bool = false) const { return scalar; } + float float_scalar_value (bool = false) const { return static_cast (scalar); } + Matrix matrix_value (bool = false) const { return Matrix (1, 1, scalar); } + FloatMatrix float_matrix_value (bool = false) const + { return FloatMatrix (1, 1, scalar); } + NDArray array_value (bool = false) const { return NDArray (dim_vector (1, 1), scalar); } + FloatNDArray float_array_value (bool = false) const + { return FloatNDArray (dim_vector (1, 1), scalar); } + SparseMatrix sparse_matrix_value (bool = false) const { return SparseMatrix (Matrix (1, 1, scalar)); } @@ -156,12 +168,20 @@ Complex complex_value (bool = false) const { return scalar; } + FloatComplex float_complex_value (bool = false) const { return scalar; } + ComplexMatrix complex_matrix_value (bool = false) const { return ComplexMatrix (1, 1, Complex (scalar)); } + FloatComplexMatrix float_complex_matrix_value (bool = false) const + { return FloatComplexMatrix (1, 1, FloatComplex (scalar)); } + ComplexNDArray complex_array_value (bool = false) const { return ComplexNDArray (dim_vector (1, 1), Complex (scalar)); } + FloatComplexNDArray float_complex_array_value (bool = false) const + { return FloatComplexNDArray (dim_vector (1, 1), FloatComplex (scalar)); } + charNDArray char_array_value (bool = false) const { diff -r 45f5faba05a2 -r 82be108cc558 src/ov.cc --- a/src/ov.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/ov.cc Sun Apr 27 22:34:17 2008 +0200 @@ -37,7 +37,9 @@ #include "ov-bool-mat.h" #include "ov-cell.h" #include "ov-scalar.h" +#include "ov-float.h" #include "ov-re-mat.h" +#include "ov-flt-re-mat.h" #include "ov-bool-sparse.h" #include "ov-cx-sparse.h" #include "ov-re-sparse.h" @@ -50,7 +52,9 @@ #include "ov-uint32.h" #include "ov-uint64.h" #include "ov-complex.h" +#include "ov-flt-complex.h" #include "ov-cx-mat.h" +#include "ov-flt-cx-mat.h" #include "ov-ch-mat.h" #include "ov-str-mat.h" #include "ov-range.h" @@ -478,6 +482,11 @@ { } +octave_value::octave_value (float d) + : rep (new octave_float_scalar (d)) +{ +} + octave_value::octave_value (const Cell& c, bool is_csl) : rep (is_csl ? dynamic_cast (new octave_cs_list (c)) @@ -498,78 +507,156 @@ maybe_mutate (); } +octave_value::octave_value (const FloatMatrix& m, const MatrixType& t) + : rep (new octave_float_matrix (m, t)) +{ + maybe_mutate (); +} + octave_value::octave_value (const NDArray& a) : rep (new octave_matrix (a)) { maybe_mutate (); } +octave_value::octave_value (const FloatNDArray& a) + : rep (new octave_float_matrix (a)) +{ + maybe_mutate (); +} + octave_value::octave_value (const ArrayN& a) : rep (new octave_matrix (a)) { maybe_mutate (); } +octave_value::octave_value (const ArrayN& a) + : rep (new octave_float_matrix (a)) +{ + maybe_mutate (); +} + octave_value::octave_value (const DiagMatrix& d) : rep (new octave_matrix (d)) { maybe_mutate (); } +octave_value::octave_value (const FloatDiagMatrix& d) + : rep (new octave_float_matrix (d)) +{ + maybe_mutate (); +} + octave_value::octave_value (const RowVector& v) : rep (new octave_matrix (v)) { maybe_mutate (); } +octave_value::octave_value (const FloatRowVector& v) + : rep (new octave_float_matrix (v)) +{ + maybe_mutate (); +} + octave_value::octave_value (const ColumnVector& v) : rep (new octave_matrix (v)) { maybe_mutate (); } +octave_value::octave_value (const FloatColumnVector& v) + : rep (new octave_float_matrix (v)) +{ + maybe_mutate (); +} + octave_value::octave_value (const Complex& C) : rep (new octave_complex (C)) { maybe_mutate (); } +octave_value::octave_value (const FloatComplex& C) + : rep (new octave_float_complex (C)) +{ + maybe_mutate (); +} + octave_value::octave_value (const ComplexMatrix& m, const MatrixType& t) : rep (new octave_complex_matrix (m, t)) { maybe_mutate (); } +octave_value::octave_value (const FloatComplexMatrix& m, const MatrixType& t) + : rep (new octave_float_complex_matrix (m, t)) +{ + maybe_mutate (); +} + octave_value::octave_value (const ComplexNDArray& a) : rep (new octave_complex_matrix (a)) { maybe_mutate (); } +octave_value::octave_value (const FloatComplexNDArray& a) + : rep (new octave_float_complex_matrix (a)) +{ + maybe_mutate (); +} + octave_value::octave_value (const ArrayN& a) : rep (new octave_complex_matrix (a)) { maybe_mutate (); } +octave_value::octave_value (const ArrayN& a) + : rep (new octave_float_complex_matrix (a)) +{ + maybe_mutate (); +} + octave_value::octave_value (const ComplexDiagMatrix& d) : rep (new octave_complex_matrix (d)) { maybe_mutate (); } +octave_value::octave_value (const FloatComplexDiagMatrix& d) + : rep (new octave_complex_matrix (d)) +{ + maybe_mutate (); +} + octave_value::octave_value (const ComplexRowVector& v) : rep (new octave_complex_matrix (v)) { maybe_mutate (); } +octave_value::octave_value (const FloatComplexRowVector& v) + : rep (new octave_float_complex_matrix (v)) +{ + maybe_mutate (); +} + octave_value::octave_value (const ComplexColumnVector& v) : rep (new octave_complex_matrix (v)) { maybe_mutate (); } +octave_value::octave_value (const FloatComplexColumnVector& v) + : rep (new octave_float_complex_matrix (v)) +{ + maybe_mutate (); +} + octave_value::octave_value (bool b) : rep (new octave_bool (b)) { @@ -1497,6 +1584,231 @@ return retval; } +FloatColumnVector +octave_value::float_column_vector_value (bool force_string_conv, + bool /* frc_vec_conv */) const +{ + FloatColumnVector retval; + + FloatMatrix m = float_matrix_value (force_string_conv); + + if (error_state) + return retval; + + octave_idx_type nr = m.rows (); + octave_idx_type nc = m.columns (); + + if (nc == 1) + { + retval.resize (nr); + for (octave_idx_type i = 0; i < nr; i++) + retval (i) = m (i, 0); + } + else + { + std::string tn = type_name (); + gripe_invalid_conversion (tn.c_str (), "real column vector"); + } + + return retval; +} + +FloatComplexColumnVector +octave_value::float_complex_column_vector_value (bool force_string_conv, + bool /* frc_vec_conv */) const +{ + FloatComplexColumnVector retval; + + FloatComplexMatrix m = float_complex_matrix_value (force_string_conv); + + if (error_state) + return retval; + + octave_idx_type nr = m.rows (); + octave_idx_type nc = m.columns (); + + if (nc == 1) + { + retval.resize (nr); + for (octave_idx_type i = 0; i < nr; i++) + retval (i) = m (i, 0); + } + else + { + std::string tn = type_name (); + gripe_invalid_conversion (tn.c_str (), "complex column vector"); + } + + return retval; +} + +FloatRowVector +octave_value::float_row_vector_value (bool force_string_conv, + bool /* frc_vec_conv */) const +{ + FloatRowVector retval; + + FloatMatrix m = float_matrix_value (force_string_conv); + + if (error_state) + return retval; + + octave_idx_type nr = m.rows (); + octave_idx_type nc = m.columns (); + + if (nr == 1) + { + retval.resize (nc); + for (octave_idx_type i = 0; i < nc; i++) + retval (i) = m (0, i); + } + else + { + std::string tn = type_name (); + gripe_invalid_conversion (tn.c_str (), "real row vector"); + } + + return retval; +} + +FloatComplexRowVector +octave_value::float_complex_row_vector_value (bool force_string_conv, + bool /* frc_vec_conv */) const +{ + FloatComplexRowVector retval; + + FloatComplexMatrix m = float_complex_matrix_value (force_string_conv); + + if (error_state) + return retval; + + octave_idx_type nr = m.rows (); + octave_idx_type nc = m.columns (); + + if (nr == 1) + { + retval.resize (nc); + for (octave_idx_type i = 0; i < nc; i++) + retval (i) = m (0, i); + } + else + { + std::string tn = type_name (); + gripe_invalid_conversion (tn.c_str (), "complex row vector"); + } + + return retval; +} + +// Sloppy... + +Array +octave_value::float_vector_value (bool force_string_conv, + bool force_vector_conversion) const +{ + Array retval; + + FloatMatrix m = float_matrix_value (force_string_conv); + + if (error_state) + return retval; + + octave_idx_type nr = m.rows (); + octave_idx_type nc = m.columns (); + + if (nr == 1) + { + retval.resize (nc); + for (octave_idx_type i = 0; i < nc; i++) + retval (i) = m (0, i); + } + else if (nc == 1) + { + retval.resize (nr); + for (octave_idx_type i = 0; i < nr; i++) + retval (i) = m (i, 0); + } + else if (nr > 0 && nc > 0) + { + if (! force_vector_conversion) + gripe_implicit_conversion ("Octave:array-as-vector", + type_name (), "real vector"); + + retval.resize (nr * nc); + octave_idx_type k = 0; + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + { + OCTAVE_QUIT; + + retval (k++) = m (i, j); + } + } + else + { + std::string tn = type_name (); + gripe_invalid_conversion (tn.c_str (), "real vector"); + } + + return retval; +} + +Array +octave_value::float_complex_vector_value (bool force_string_conv, + bool force_vector_conversion) const +{ + Array retval; + + FloatComplexMatrix m = float_complex_matrix_value (force_string_conv); + + if (error_state) + return retval; + + octave_idx_type nr = m.rows (); + octave_idx_type nc = m.columns (); + + if (nr == 1) + { + retval.resize (nc); + for (octave_idx_type i = 0; i < nc; i++) + { + OCTAVE_QUIT; + retval (i) = m (0, i); + } + } + else if (nc == 1) + { + retval.resize (nr); + for (octave_idx_type i = 0; i < nr; i++) + { + OCTAVE_QUIT; + retval (i) = m (i, 0); + } + } + else if (nr > 0 && nc > 0) + { + if (! force_vector_conversion) + gripe_implicit_conversion ("Octave:array-as-vector", + type_name (), "complex vector"); + + retval.resize (nr * nc); + octave_idx_type k = 0; + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + { + OCTAVE_QUIT; + retval (k++) = m (i, j); + } + } + else + { + std::string tn = type_name (); + gripe_invalid_conversion (tn.c_str (), "complex vector"); + } + + return retval; +} + int octave_value::write (octave_stream& os, int block_size, oct_data_conv::data_type output_type, int skip, @@ -1631,12 +1943,132 @@ } } else + { + //demote double -> single and try again + cf1 = tv1.numeric_demotion_function (); + + if (cf1) + { + octave_base_value *tmp = cf1 (*tv1.rep); + + if (tmp) + { + tv1 = octave_value (tmp); + t1 = tv1.type_id (); + } + else + { + gripe_binary_op_conv (octave_value::binary_op_as_string (op)); + return retval; + } + } + + cf2 = tv2.numeric_demotion_function (); + + if (cf2) + { + octave_base_value *tmp = cf2 (*tv2.rep); + + if (tmp) + { + tv2 = octave_value (tmp); + t2 = tv2.type_id (); + } + else + { + gripe_binary_op_conv (octave_value::binary_op_as_string (op)); + return retval; + } + } + + if (cf1 || cf2) + { + f = octave_value_typeinfo::lookup_binary_op (op, t1, t2); + + if (f) + { + try + { + retval = f (*tv1.rep, *tv2.rep); + } + catch (octave_execution_exception) + { + octave_exception_state = octave_no_exception; + error ("caught execution error in library function"); + } + } + else + gripe_binary_op (octave_value::binary_op_as_string (op), + v1.type_name (), v2.type_name ()); + } + else + gripe_binary_op (octave_value::binary_op_as_string (op), + v1.type_name (), v2.type_name ()); + } + } + else + { + //demote double -> single and try again + cf1 = tv1.numeric_demotion_function (); + + if (cf1) + { + octave_base_value *tmp = cf1 (*tv1.rep); + + if (tmp) + { + tv1 = octave_value (tmp); + t1 = tv1.type_id (); + } + else + { + gripe_binary_op_conv (octave_value::binary_op_as_string (op)); + return retval; + } + } + + cf2 = tv2.numeric_demotion_function (); + + if (cf2) + { + octave_base_value *tmp = cf2 (*tv2.rep); + + if (tmp) + { + tv2 = octave_value (tmp); + t2 = tv2.type_id (); + } + else + { + gripe_binary_op_conv (octave_value::binary_op_as_string (op)); + return retval; + } + } + + if (cf1 || cf2) + { + f = octave_value_typeinfo::lookup_binary_op (op, t1, t2); + + if (f) + { + try + { + retval = f (*tv1.rep, *tv2.rep); + } + catch (octave_execution_exception) + { + octave_exception_state = octave_no_exception; + error ("caught execution error in library function"); + } + } + else + gripe_binary_op (octave_value::binary_op_as_string (op), + v1.type_name (), v2.type_name ()); + } + else gripe_binary_op (octave_value::binary_op_as_string (op), v1.type_name (), v2.type_name ()); } - else - gripe_binary_op (octave_value::binary_op_as_string (op), - v1.type_name (), v2.type_name ()); } } @@ -2183,6 +2615,10 @@ octave_fcn_handle::register_type (); octave_fcn_inline::register_type (); octave_streamoff::register_type (); + octave_float_scalar::register_type (); + octave_float_complex::register_type (); + octave_float_matrix::register_type (); + octave_float_complex_matrix::register_type (); } #if 0 diff -r 45f5faba05a2 -r 82be108cc558 src/ov.h --- a/src/ov.h Wed May 14 18:09:56 2008 +0200 +++ b/src/ov.h Sun Apr 27 22:34:17 2008 +0200 @@ -167,21 +167,35 @@ octave_value (octave_time t); octave_value (double d); + octave_value (float d); octave_value (const ArrayN& a, bool is_cs_list = false); octave_value (const Cell& c, bool is_cs_list = false); octave_value (const Matrix& m, const MatrixType& t = MatrixType()); + octave_value (const FloatMatrix& m, const MatrixType& t = MatrixType()); octave_value (const NDArray& nda); + octave_value (const FloatNDArray& nda); octave_value (const ArrayN& m); + octave_value (const ArrayN& m); octave_value (const DiagMatrix& d); + octave_value (const FloatDiagMatrix& d); octave_value (const RowVector& v); + octave_value (const FloatRowVector& v); octave_value (const ColumnVector& v); + octave_value (const FloatColumnVector& v); octave_value (const Complex& C); + octave_value (const FloatComplex& C); octave_value (const ComplexMatrix& m, const MatrixType& t = MatrixType()); + octave_value (const FloatComplexMatrix& m, const MatrixType& t = MatrixType()); octave_value (const ComplexNDArray& cnda); + octave_value (const FloatComplexNDArray& cnda); octave_value (const ArrayN& m); + octave_value (const ArrayN& m); octave_value (const ComplexDiagMatrix& d); + octave_value (const FloatComplexDiagMatrix& d); octave_value (const ComplexRowVector& v); + octave_value (const FloatComplexRowVector& v); octave_value (const ComplexColumnVector& v); + octave_value (const FloatComplexColumnVector& v); octave_value (bool b); octave_value (const boolMatrix& bm, const MatrixType& t = MatrixType()); octave_value (const boolNDArray& bnda); @@ -295,6 +309,9 @@ octave_base_value::type_conv_fcn numeric_conversion_function (void) const { return rep->numeric_conversion_function (); } + octave_base_value::type_conv_fcn numeric_demotion_function (void) const + { return rep->numeric_demotion_function (); } + void maybe_mutate (void); octave_value squeeze (void) const @@ -628,26 +645,47 @@ double double_value (bool frc_str_conv = false) const { return rep->double_value (frc_str_conv); } + float float_value (bool frc_str_conv = false) const + { return rep->float_value (frc_str_conv); } + double scalar_value (bool frc_str_conv = false) const { return rep->scalar_value (frc_str_conv); } + float float_scalar_value (bool frc_str_conv = false) const + { return rep->float_scalar_value (frc_str_conv); } + Cell cell_value (void) const; Matrix matrix_value (bool frc_str_conv = false) const { return rep->matrix_value (frc_str_conv); } + FloatMatrix float_matrix_value (bool frc_str_conv = false) const + { return rep->float_matrix_value (frc_str_conv); } + NDArray array_value (bool frc_str_conv = false) const { return rep->array_value (frc_str_conv); } + FloatNDArray float_array_value (bool frc_str_conv = false) const + { return rep->float_array_value (frc_str_conv); } + Complex complex_value (bool frc_str_conv = false) const { return rep->complex_value (frc_str_conv); } + FloatComplex float_complex_value (bool frc_str_conv = false) const + { return rep->float_complex_value (frc_str_conv); } + ComplexMatrix complex_matrix_value (bool frc_str_conv = false) const { return rep->complex_matrix_value (frc_str_conv); } + FloatComplexMatrix float_complex_matrix_value (bool frc_str_conv = false) const + { return rep->float_complex_matrix_value (frc_str_conv); } + ComplexNDArray complex_array_value (bool frc_str_conv = false) const { return rep->complex_array_value (frc_str_conv); } + FloatComplexNDArray float_complex_array_value (bool frc_str_conv = false) const + { return rep->float_complex_array_value (frc_str_conv); } + bool bool_value (bool warn = false) const { return rep->bool_value (warn); } @@ -768,6 +806,24 @@ complex_row_vector_value (bool frc_str_conv = false, bool frc_vec_conv = false) const; + + FloatColumnVector float_column_vector_value (bool frc_str_conv = false, + bool frc_vec_conv = false) const; + + FloatComplexColumnVector + float_complex_column_vector_value (bool frc_str_conv = false, + bool frc_vec_conv = false) const; + + FloatRowVector float_row_vector_value (bool frc_str_conv = false, + bool frc_vec_conv = false) const; + + FloatComplexRowVector + float_complex_row_vector_value (bool frc_str_conv = false, + bool frc_vec_conv = false) const; + + + + Array int_vector_value (bool req_int = false, bool frc_str_conv = false, bool frc_vec_conv = false) const; @@ -778,6 +834,12 @@ Array complex_vector_value (bool frc_str_conv = false, bool frc_vec_conv = false) const; + Array float_vector_value (bool frc_str_conv = false, + bool frc_vec_conv = false) const; + + Array float_complex_vector_value (bool frc_str_conv = false, + bool frc_vec_conv = false) const; + // Conversions. These should probably be private. If a user of this // class wants a certain kind of constant, he should simply ask for // it, and we should convert it if possible. @@ -1112,6 +1174,7 @@ OCTAVE_ARRAY_TYPE_TRAIT (int64NDArray, octave_int64); OCTAVE_ARRAY_TYPE_TRAIT (uint64NDArray, octave_uint64); OCTAVE_ARRAY_TYPE_TRAIT (NDArray, double); +OCTAVE_ARRAY_TYPE_TRAIT (FloatNDArray, float); // This will eventually go away, but for now it can be used to // simplify the transition to the new octave_value class hierarchy, diff -r 45f5faba05a2 -r 82be108cc558 src/pr-output.cc --- a/src/pr-output.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/pr-output.cc Sun Apr 27 22:34:17 2008 +0200 @@ -1972,6 +1972,55 @@ } void +octave_print_internal (std::ostream& os, bool d, bool pr_as_read_syntax) +{ + octave_print_internal (os, double (d), pr_as_read_syntax); +} + +// FIXME: Write single precision versions of the printing functions + +void +octave_print_internal (std::ostream& os, float d, bool pr_as_read_syntax) +{ + octave_print_internal (os, double (d), pr_as_read_syntax); +} + +void +octave_print_internal (std::ostream& os, const FloatMatrix& m, + bool pr_as_read_syntax, int extra_indent) +{ + octave_print_internal (os, Matrix (m), pr_as_read_syntax, extra_indent); +} + +void +octave_print_internal (std::ostream& os, const FloatNDArray& nda, + bool pr_as_read_syntax, int extra_indent) +{ + octave_print_internal (os, NDArray (nda), pr_as_read_syntax, extra_indent); +} + +void +octave_print_internal (std::ostream& os, const FloatComplex& c, + bool pr_as_read_syntax) +{ + octave_print_internal (os, Complex (c), pr_as_read_syntax); +} + +void +octave_print_internal (std::ostream& os, const FloatComplexMatrix& cm, + bool pr_as_read_syntax, int extra_indent) +{ + octave_print_internal (os, ComplexMatrix (cm), pr_as_read_syntax, extra_indent); +} + +void +octave_print_internal (std::ostream& os, const FloatComplexNDArray& nda, + bool pr_as_read_syntax, int extra_indent) +{ + octave_print_internal (os, ComplexNDArray (nda), pr_as_read_syntax, extra_indent); +} + +void octave_print_internal (std::ostream& os, const Range& r, bool pr_as_read_syntax, int extra_indent) { diff -r 45f5faba05a2 -r 82be108cc558 src/pr-output.h --- a/src/pr-output.h Wed May 14 18:09:56 2008 +0200 +++ b/src/pr-output.h Sun Apr 27 22:34:17 2008 +0200 @@ -30,9 +30,13 @@ template class ArrayN; class ComplexMatrix; +class FloatComplexMatrix; class ComplexNDArray; +class FloatComplexNDArray; class Matrix; +class FloatMatrix; class NDArray; +class FloatNDArray; class Range; class boolMatrix; class boolNDArray; @@ -43,35 +47,68 @@ #include "intNDArray.h" #include "oct-inttypes.h" + +extern OCTINTERP_API void +octave_print_internal (std::ostream& os, bool d, + bool pr_as_read_syntax = false); + extern OCTINTERP_API void octave_print_internal (std::ostream& os, double d, bool pr_as_read_syntax = false); extern OCTINTERP_API void +octave_print_internal (std::ostream& os, float d, + bool pr_as_read_syntax = false); + +extern OCTINTERP_API void octave_print_internal (std::ostream& os, const Matrix& m, bool pr_as_read_syntax = false, int extra_indent = 0); extern OCTINTERP_API void +octave_print_internal (std::ostream& os, const FloatMatrix& m, + bool pr_as_read_syntax = false, + int extra_indent = 0); + +extern OCTINTERP_API void octave_print_internal (std::ostream& os, const NDArray& nda, bool pr_as_read_syntax = false, int extra_indent = 0); extern OCTINTERP_API void +octave_print_internal (std::ostream& os, const FloatNDArray& nda, + bool pr_as_read_syntax = false, + int extra_indent = 0); + +extern OCTINTERP_API void octave_print_internal (std::ostream& os, const Complex& c, bool pr_as_read_syntax = false); extern OCTINTERP_API void +octave_print_internal (std::ostream& os, const FloatComplex& c, + bool pr_as_read_syntax = false); + +extern OCTINTERP_API void octave_print_internal (std::ostream& os, const ComplexMatrix& cm, bool pr_as_read_syntax = false, int extra_indent = 0); extern OCTINTERP_API void +octave_print_internal (std::ostream& os, const FloatComplexMatrix& cm, + bool pr_as_read_syntax = false, + int extra_indent = 0); + +extern OCTINTERP_API void octave_print_internal (std::ostream& os, const ComplexNDArray& nda, bool pr_as_read_syntax = false, int extra_indent = 0); extern OCTINTERP_API void +octave_print_internal (std::ostream& os, const FloatComplexNDArray& nda, + bool pr_as_read_syntax = false, + int extra_indent = 0); + +extern OCTINTERP_API void octave_print_internal (std::ostream& os, const Range& r, bool pr_as_read_syntax = false, int extra_indent = 0); diff -r 45f5faba05a2 -r 82be108cc558 src/pt-mat.cc --- a/src/pt-mat.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/pt-mat.cc Sun Apr 27 22:34:17 2008 +0200 @@ -806,9 +806,14 @@ DO_SINGLE_TYPE_CONCAT (ComplexNDArray, complex_array_value); } } -#if 0 else if (result_type == "single") -#endif + { + if (all_real_p) + DO_SINGLE_TYPE_CONCAT (FloatNDArray, float_array_value); + else + DO_SINGLE_TYPE_CONCAT (FloatComplexNDArray, + float_complex_array_value); + } else if (result_type == "char") { char type = all_dq_strings_p ? '"' : '\''; diff -r 45f5faba05a2 -r 82be108cc558 src/utils.cc --- a/src/utils.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/utils.cc Sun Apr 27 22:34:17 2008 +0200 @@ -899,6 +899,22 @@ return m; } +FloatMatrix +float_identity_matrix (octave_idx_type nr, octave_idx_type nc) +{ + FloatMatrix m (nr, nc, 0.0); + + if (nr > 0 && nc > 0) + { + octave_idx_type n = std::min (nr, nc); + + for (octave_idx_type i = 0; i < n; i++) + m (i, i) = 1.0; + } + + return m; +} + extern int octave_format (std::ostream& os, const char *fmt, ...) { diff -r 45f5faba05a2 -r 82be108cc558 src/utils.h --- a/src/utils.h Wed May 14 18:09:56 2008 +0200 +++ b/src/utils.h Sun Apr 27 22:34:17 2008 +0200 @@ -93,6 +93,9 @@ extern OCTINTERP_API Matrix identity_matrix (octave_idx_type nr, octave_idx_type nc); +extern OCTINTERP_API FloatMatrix +float_identity_matrix (octave_idx_type nr, octave_idx_type nc); + extern OCTINTERP_API int octave_format (std::ostream& os, const char *fmt, ...); diff -r 45f5faba05a2 -r 82be108cc558 src/xdiv.cc --- a/src/xdiv.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/xdiv.cc Sun Apr 27 22:34:17 2008 +0200 @@ -32,6 +32,10 @@ #include "dMatrix.h" #include "CNDArray.h" #include "dNDArray.h" +#include "fCMatrix.h" +#include "fMatrix.h" +#include "fCNDArray.h" +#include "fNDArray.h" #include "oct-cmplx.h" #include "quit.h" @@ -404,6 +408,320 @@ return a.solve (typ, b, info, rcond, solve_singularity_warning); } +static void +solve_singularity_warning (float rcond) +{ + warning ("matrix singular to machine precision, rcond = %g", rcond); + warning ("attempting to find minimum norm solution"); +} + +INSTANTIATE_MX_LEFTDIV_CONFORM (FloatMatrix, FloatMatrix); +INSTANTIATE_MX_LEFTDIV_CONFORM (FloatMatrix, FloatComplexMatrix); +INSTANTIATE_MX_LEFTDIV_CONFORM (FloatComplexMatrix, FloatMatrix); +INSTANTIATE_MX_LEFTDIV_CONFORM (FloatComplexMatrix, FloatComplexMatrix); + +INSTANTIATE_MX_DIV_CONFORM (FloatMatrix, FloatMatrix); +INSTANTIATE_MX_DIV_CONFORM (FloatMatrix, FloatComplexMatrix); +INSTANTIATE_MX_DIV_CONFORM (FloatComplexMatrix, FloatMatrix); +INSTANTIATE_MX_DIV_CONFORM (FloatComplexMatrix, FloatComplexMatrix); + +// Right division functions. +// +// op2 / op1: m cm +// +-- +---+----+ +// matrix | 1 | 3 | +// +---+----+ +// complex_matrix | 2 | 4 | +// +---+----+ + +// -*- 1 -*- +FloatMatrix +xdiv (const FloatMatrix& a, const FloatMatrix& b, MatrixType &typ) +{ + if (! mx_div_conform (a, b)) + return FloatMatrix (); + + FloatMatrix atmp = a.transpose (); + FloatMatrix btmp = b.transpose (); + MatrixType btyp = typ.transpose (); + + octave_idx_type info; + float rcond = 0.0; + + FloatMatrix result + = btmp.solve (btyp, atmp, info, rcond, solve_singularity_warning); + + typ = btyp.transpose (); + return result.transpose (); +} + +// -*- 2 -*- +FloatComplexMatrix +xdiv (const FloatMatrix& a, const FloatComplexMatrix& b, MatrixType &typ) +{ + if (! mx_div_conform (a, b)) + return FloatComplexMatrix (); + + FloatMatrix atmp = a.transpose (); + FloatComplexMatrix btmp = b.hermitian (); + MatrixType btyp = typ.transpose (); + + octave_idx_type info; + float rcond = 0.0; + + FloatComplexMatrix result + = btmp.solve (btyp, atmp, info, rcond, solve_singularity_warning); + + typ = btyp.transpose (); + return result.hermitian (); +} + +// -*- 3 -*- +FloatComplexMatrix +xdiv (const FloatComplexMatrix& a, const FloatMatrix& b, MatrixType &typ) +{ + if (! mx_div_conform (a, b)) + return FloatComplexMatrix (); + + FloatComplexMatrix atmp = a.hermitian (); + FloatMatrix btmp = b.transpose (); + MatrixType btyp = typ.transpose (); + + octave_idx_type info; + float rcond = 0.0; + + FloatComplexMatrix result + = btmp.solve (btyp, atmp, info, rcond, solve_singularity_warning); + + typ = btyp.transpose (); + return result.hermitian (); +} + +// -*- 4 -*- +FloatComplexMatrix +xdiv (const FloatComplexMatrix& a, const FloatComplexMatrix& b, MatrixType &typ) +{ + if (! mx_div_conform (a, b)) + return FloatComplexMatrix (); + + FloatComplexMatrix atmp = a.hermitian (); + FloatComplexMatrix btmp = b.hermitian (); + MatrixType btyp = typ.transpose (); + + octave_idx_type info; + float rcond = 0.0; + + FloatComplexMatrix result + = btmp.solve (btyp, atmp, info, rcond, solve_singularity_warning); + + typ = btyp.transpose (); + return result.hermitian (); +} + +// Funny element by element division operations. +// +// op2 \ op1: s cs +// +-- +---+----+ +// matrix | 1 | 3 | +// +---+----+ +// complex_matrix | 2 | 4 | +// +---+----+ + +FloatMatrix +x_el_div (float a, const FloatMatrix& b) +{ + octave_idx_type nr = b.rows (); + octave_idx_type nc = b.columns (); + + FloatMatrix result (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + { + OCTAVE_QUIT; + result (i, j) = a / b (i, j); + } + + return result; +} + +FloatComplexMatrix +x_el_div (float a, const FloatComplexMatrix& b) +{ + octave_idx_type nr = b.rows (); + octave_idx_type nc = b.columns (); + + FloatComplexMatrix result (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + { + OCTAVE_QUIT; + result (i, j) = a / b (i, j); + } + + return result; +} + +FloatComplexMatrix +x_el_div (const FloatComplex a, const FloatMatrix& b) +{ + octave_idx_type nr = b.rows (); + octave_idx_type nc = b.columns (); + + FloatComplexMatrix result (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + { + OCTAVE_QUIT; + result (i, j) = a / b (i, j); + } + + return result; +} + +FloatComplexMatrix +x_el_div (const FloatComplex a, const FloatComplexMatrix& b) +{ + octave_idx_type nr = b.rows (); + octave_idx_type nc = b.columns (); + + FloatComplexMatrix result (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + { + OCTAVE_QUIT; + result (i, j) = a / b (i, j); + } + + return result; +} + +// Funny element by element division operations. +// +// op2 \ op1: s cs +// +-- +---+----+ +// N-d array | 1 | 3 | +// +---+----+ +// complex N-d array | 2 | 4 | +// +---+----+ + +FloatNDArray +x_el_div (float a, const FloatNDArray& b) +{ + FloatNDArray result (b.dims ()); + + for (octave_idx_type i = 0; i < b.length (); i++) + { + OCTAVE_QUIT; + result (i) = a / b (i); + } + + return result; +} + +FloatComplexNDArray +x_el_div (float a, const FloatComplexNDArray& b) +{ + FloatComplexNDArray result (b.dims ()); + + for (octave_idx_type i = 0; i < b.length (); i++) + { + OCTAVE_QUIT; + result (i) = a / b (i); + } + + return result; +} + +FloatComplexNDArray +x_el_div (const FloatComplex a, const FloatNDArray& b) +{ + FloatComplexNDArray result (b.dims ()); + + for (octave_idx_type i = 0; i < b.length (); i++) + { + OCTAVE_QUIT; + result (i) = a / b (i); + } + + return result; +} + +FloatComplexNDArray +x_el_div (const FloatComplex a, const FloatComplexNDArray& b) +{ + FloatComplexNDArray result (b.dims ()); + + for (octave_idx_type i = 0; i < b.length (); i++) + { + OCTAVE_QUIT; + result (i) = a / b (i); + } + + return result; +} + +// Left division functions. +// +// op2 \ op1: m cm +// +-- +---+----+ +// matrix | 1 | 3 | +// +---+----+ +// complex_matrix | 2 | 4 | +// +---+----+ + +// -*- 1 -*- +FloatMatrix +xleftdiv (const FloatMatrix& a, const FloatMatrix& b, MatrixType &typ) +{ + if (! mx_leftdiv_conform (a, b)) + return FloatMatrix (); + + octave_idx_type info; + float rcond = 0.0; + return a.solve (typ, b, info, rcond, solve_singularity_warning); +} + +// -*- 2 -*- +FloatComplexMatrix +xleftdiv (const FloatMatrix& a, const FloatComplexMatrix& b, MatrixType &typ) +{ + if (! mx_leftdiv_conform (a, b)) + return FloatComplexMatrix (); + + octave_idx_type info; + float rcond = 0.0; + + return a.solve (typ, b, info, rcond, solve_singularity_warning); +} + +// -*- 3 -*- +FloatComplexMatrix +xleftdiv (const FloatComplexMatrix& a, const FloatMatrix& b, MatrixType &typ) +{ + if (! mx_leftdiv_conform (a, b)) + return FloatComplexMatrix (); + + octave_idx_type info; + float rcond = 0.0; + return a.solve (typ, b, info, rcond, solve_singularity_warning); +} + +// -*- 4 -*- +FloatComplexMatrix +xleftdiv (const FloatComplexMatrix& a, const FloatComplexMatrix& b, MatrixType &typ) +{ + if (! mx_leftdiv_conform (a, b)) + return FloatComplexMatrix (); + + octave_idx_type info; + float rcond = 0.0; + return a.solve (typ, b, info, rcond, solve_singularity_warning); +} + /* ;;; Local Variables: *** ;;; mode: C++ *** diff -r 45f5faba05a2 -r 82be108cc558 src/xdiv.h --- a/src/xdiv.h Wed May 14 18:09:56 2008 +0200 +++ b/src/xdiv.h Sun Apr 27 22:34:17 2008 +0200 @@ -59,6 +59,39 @@ extern ComplexMatrix xleftdiv (const ComplexMatrix& a, const ComplexMatrix& b, MatrixType &typ); +class FloatMatrix; +class FloatComplexMatrix; + +class FloatNDArray; +class FloatComplexNDArray; + +extern FloatMatrix xdiv (const FloatMatrix& a, const FloatMatrix& b, MatrixType &typ); +extern FloatComplexMatrix xdiv (const FloatMatrix& a, const FloatComplexMatrix& b, + MatrixType &typ); +extern FloatComplexMatrix xdiv (const FloatComplexMatrix& a, const FloatMatrix& b, + MatrixType &typ); +extern FloatComplexMatrix xdiv (const FloatComplexMatrix& a, const FloatComplexMatrix& b, + MatrixType &typ); + +extern FloatMatrix x_el_div (float a, const FloatMatrix& b); +extern FloatComplexMatrix x_el_div (float a, const FloatComplexMatrix& b); +extern FloatComplexMatrix x_el_div (const FloatComplex a, const FloatMatrix& b); +extern FloatComplexMatrix x_el_div (const FloatComplex a, const FloatComplexMatrix& b); + +extern FloatNDArray x_el_div (float a, const FloatNDArray& b); +extern FloatComplexNDArray x_el_div (float a, const FloatComplexNDArray& b); +extern FloatComplexNDArray x_el_div (const FloatComplex a, const FloatNDArray& b); +extern FloatComplexNDArray x_el_div (const FloatComplex a, const FloatComplexNDArray& b); + +extern FloatMatrix xleftdiv (const FloatMatrix& a, const FloatMatrix& b, MatrixType &typ); +extern FloatComplexMatrix xleftdiv (const FloatMatrix& a, const FloatComplexMatrix& b, + MatrixType &typ); +extern FloatComplexMatrix xleftdiv (const FloatComplexMatrix& a, const FloatMatrix& b, + MatrixType &typ); +extern FloatComplexMatrix xleftdiv (const FloatComplexMatrix& a, const FloatComplexMatrix& b, + MatrixType &typ); + + #endif /* diff -r 45f5faba05a2 -r 82be108cc558 src/xpow.cc --- a/src/xpow.cc Wed May 14 18:09:56 2008 +0200 +++ b/src/xpow.cc Sun Apr 27 22:34:17 2008 +0200 @@ -33,6 +33,7 @@ #include "CDiagMatrix.h" #include "CMatrix.h" #include "EIG.h" +#include "fEIG.h" #include "dDiagMatrix.h" #include "dMatrix.h" #include "mx-cm-cdm.h" @@ -1260,6 +1261,1218 @@ return result; } +static inline int +xisint (float x) +{ + return (D_NINT (x) == x + && ((x >= 0 && x < INT_MAX) + || (x <= 0 && x > INT_MIN))); +} + +// Safer pow functions. +// +// op2 \ op1: s m cs cm +// +-- +---+---+----+----+ +// scalar | | 1 | 5 | 7 | 11 | +// +---+---+----+----+ +// matrix | 2 | * | 8 | * | +// +---+---+----+----+ +// complex_scalar | 3 | 6 | 9 | 12 | +// +---+---+----+----+ +// complex_matrix | 4 | * | 10 | * | +// +---+---+----+----+ + +// -*- 1 -*- +octave_value +xpow (float a, float b) +{ + float retval; + + if (a < 0.0 && static_cast (b) != b) + { + FloatComplex atmp (a); + + return std::pow (atmp, b); + } + else + retval = std::pow (a, b); + + return retval; +} + +// -*- 2 -*- +octave_value +xpow (float a, const FloatMatrix& b) +{ + octave_value retval; + + octave_idx_type nr = b.rows (); + octave_idx_type nc = b.cols (); + + if (nr == 0 || nc == 0 || nr != nc) + error ("for x^A, A must be square"); + else + { + FloatEIG b_eig (b); + + if (! error_state) + { + FloatComplexColumnVector lambda (b_eig.eigenvalues ()); + FloatComplexMatrix Q (b_eig.eigenvectors ()); + + for (octave_idx_type i = 0; i < nr; i++) + { + FloatComplex elt = lambda(i); + if (std::imag (elt) == 0.0) + lambda(i) = std::pow (a, std::real (elt)); + else + lambda(i) = std::pow (a, elt); + } + FloatComplexDiagMatrix D (lambda); + + retval = FloatComplexMatrix (Q * D * Q.inverse ()); + } + else + error ("xpow: matrix diagonalization failed"); + } + + return retval; +} + +// -*- 3 -*- +octave_value +xpow (float a, const FloatComplex& b) +{ + FloatComplex result; + FloatComplex atmp (a); + result = std::pow (atmp, b); + return result; +} + +// -*- 4 -*- +octave_value +xpow (float a, const FloatComplexMatrix& b) +{ + octave_value retval; + + octave_idx_type nr = b.rows (); + octave_idx_type nc = b.cols (); + + if (nr == 0 || nc == 0 || nr != nc) + error ("for x^A, A must be square"); + else + { + FloatEIG b_eig (b); + + if (! error_state) + { + FloatComplexColumnVector lambda (b_eig.eigenvalues ()); + FloatComplexMatrix Q (b_eig.eigenvectors ()); + + for (octave_idx_type i = 0; i < nr; i++) + { + FloatComplex elt = lambda(i); + if (std::imag (elt) == 0.0) + lambda(i) = std::pow (a, std::real (elt)); + else + lambda(i) = std::pow (a, elt); + } + FloatComplexDiagMatrix D (lambda); + + retval = FloatComplexMatrix (Q * D * Q.inverse ()); + } + else + error ("xpow: matrix diagonalization failed"); + } + + return retval; +} + +// -*- 5 -*- +octave_value +xpow (const FloatMatrix& a, float b) +{ + octave_value retval; + + octave_idx_type nr = a.rows (); + octave_idx_type nc = a.cols (); + + if (nr == 0 || nc == 0 || nr != nc) + error ("for A^b, A must be square"); + else + { + if (static_cast (b) == b) + { + int btmp = static_cast (b); + if (btmp == 0) + { + retval = FloatDiagMatrix (nr, nr, 1.0); + } + else + { + // Too much copying? + // FIXME -- we shouldn't do this if the exponent is + // large... + + FloatMatrix atmp; + if (btmp < 0) + { + btmp = -btmp; + + octave_idx_type info; + float rcond = 0.0; + MatrixType mattype (a); + + atmp = a.inverse (mattype, info, rcond, 1); + + if (info == -1) + warning ("inverse: matrix singular to machine\ + precision, rcond = %g", rcond); + } + else + atmp = a; + + FloatMatrix result (atmp); + + btmp--; + + while (btmp > 0) + { + if (btmp & 1) + result = result * atmp; + + btmp >>= 1; + + if (btmp > 0) + atmp = atmp * atmp; + } + + retval = result; + } + } + else + { + FloatEIG a_eig (a); + + if (! error_state) + { + FloatComplexColumnVector lambda (a_eig.eigenvalues ()); + FloatComplexMatrix Q (a_eig.eigenvectors ()); + + for (octave_idx_type i = 0; i < nr; i++) + lambda(i) = std::pow (lambda(i), b); + + FloatComplexDiagMatrix D (lambda); + + retval = FloatComplexMatrix (Q * D * Q.inverse ()); + } + else + error ("xpow: matrix diagonalization failed"); + } + } + + return retval; +} + +// -*- 6 -*- +octave_value +xpow (const FloatMatrix& a, const FloatComplex& b) +{ + octave_value retval; + + octave_idx_type nr = a.rows (); + octave_idx_type nc = a.cols (); + + if (nr == 0 || nc == 0 || nr != nc) + error ("for A^b, A must be square"); + else + { + FloatEIG a_eig (a); + + if (! error_state) + { + FloatComplexColumnVector lambda (a_eig.eigenvalues ()); + FloatComplexMatrix Q (a_eig.eigenvectors ()); + + for (octave_idx_type i = 0; i < nr; i++) + lambda(i) = std::pow (lambda(i), b); + + FloatComplexDiagMatrix D (lambda); + + retval = FloatComplexMatrix (Q * D * Q.inverse ()); + } + else + error ("xpow: matrix diagonalization failed"); + } + + return retval; +} + +// -*- 7 -*- +octave_value +xpow (const FloatComplex& a, float b) +{ + FloatComplex result; + + if (xisint (b)) + result = std::pow (a, static_cast (b)); + else + result = std::pow (a, b); + + return result; +} + +// -*- 8 -*- +octave_value +xpow (const FloatComplex& a, const FloatMatrix& b) +{ + octave_value retval; + + octave_idx_type nr = b.rows (); + octave_idx_type nc = b.cols (); + + if (nr == 0 || nc == 0 || nr != nc) + error ("for x^A, A must be square"); + else + { + FloatEIG b_eig (b); + + if (! error_state) + { + FloatComplexColumnVector lambda (b_eig.eigenvalues ()); + FloatComplexMatrix Q (b_eig.eigenvectors ()); + + for (octave_idx_type i = 0; i < nr; i++) + { + FloatComplex elt = lambda(i); + if (std::imag (elt) == 0.0) + lambda(i) = std::pow (a, std::real (elt)); + else + lambda(i) = std::pow (a, elt); + } + FloatComplexDiagMatrix D (lambda); + + retval = FloatComplexMatrix (Q * D * Q.inverse ()); + } + else + error ("xpow: matrix diagonalization failed"); + } + + return retval; +} + +// -*- 9 -*- +octave_value +xpow (const FloatComplex& a, const FloatComplex& b) +{ + FloatComplex result; + result = std::pow (a, b); + return result; +} + +// -*- 10 -*- +octave_value +xpow (const FloatComplex& a, const FloatComplexMatrix& b) +{ + octave_value retval; + + octave_idx_type nr = b.rows (); + octave_idx_type nc = b.cols (); + + if (nr == 0 || nc == 0 || nr != nc) + error ("for x^A, A must be square"); + else + { + FloatEIG b_eig (b); + + if (! error_state) + { + FloatComplexColumnVector lambda (b_eig.eigenvalues ()); + FloatComplexMatrix Q (b_eig.eigenvectors ()); + + for (octave_idx_type i = 0; i < nr; i++) + { + FloatComplex elt = lambda(i); + if (std::imag (elt) == 0.0) + lambda(i) = std::pow (a, std::real (elt)); + else + lambda(i) = std::pow (a, elt); + } + FloatComplexDiagMatrix D (lambda); + + retval = FloatComplexMatrix (Q * D * Q.inverse ()); + } + else + error ("xpow: matrix diagonalization failed"); + } + + return retval; +} + +// -*- 11 -*- +octave_value +xpow (const FloatComplexMatrix& a, float b) +{ + octave_value retval; + + octave_idx_type nr = a.rows (); + octave_idx_type nc = a.cols (); + + if (nr == 0 || nc == 0 || nr != nc) + error ("for A^b, A must be square"); + else + { + if (static_cast (b) == b) + { + int btmp = static_cast (b); + if (btmp == 0) + { + retval = FloatDiagMatrix (nr, nr, 1.0); + } + else + { + // Too much copying? + // FIXME -- we shouldn't do this if the exponent is + // large... + + FloatComplexMatrix atmp; + if (btmp < 0) + { + btmp = -btmp; + + octave_idx_type info; + float rcond = 0.0; + MatrixType mattype (a); + + atmp = a.inverse (mattype, info, rcond, 1); + + if (info == -1) + warning ("inverse: matrix singular to machine\ + precision, rcond = %g", rcond); + } + else + atmp = a; + + FloatComplexMatrix result (atmp); + + btmp--; + + while (btmp > 0) + { + if (btmp & 1) + result = result * atmp; + + btmp >>= 1; + + if (btmp > 0) + atmp = atmp * atmp; + } + + retval = result; + } + } + else + { + FloatEIG a_eig (a); + + if (! error_state) + { + FloatComplexColumnVector lambda (a_eig.eigenvalues ()); + FloatComplexMatrix Q (a_eig.eigenvectors ()); + + for (octave_idx_type i = 0; i < nr; i++) + lambda(i) = std::pow (lambda(i), b); + + FloatComplexDiagMatrix D (lambda); + + retval = FloatComplexMatrix (Q * D * Q.inverse ()); + } + else + error ("xpow: matrix diagonalization failed"); + } + } + + return retval; +} + +// -*- 12 -*- +octave_value +xpow (const FloatComplexMatrix& a, const FloatComplex& b) +{ + octave_value retval; + + octave_idx_type nr = a.rows (); + octave_idx_type nc = a.cols (); + + if (nr == 0 || nc == 0 || nr != nc) + error ("for A^b, A must be square"); + else + { + FloatEIG a_eig (a); + + if (! error_state) + { + FloatComplexColumnVector lambda (a_eig.eigenvalues ()); + FloatComplexMatrix Q (a_eig.eigenvectors ()); + + for (octave_idx_type i = 0; i < nr; i++) + lambda(i) = std::pow (lambda(i), b); + + FloatComplexDiagMatrix D (lambda); + + retval = FloatComplexMatrix (Q * D * Q.inverse ()); + } + else + error ("xpow: matrix diagonalization failed"); + } + + return retval; +} + +// Safer pow functions that work elementwise for matrices. +// +// op2 \ op1: s m cs cm +// +-- +---+---+----+----+ +// scalar | | * | 3 | * | 9 | +// +---+---+----+----+ +// matrix | 1 | 4 | 7 | 10 | +// +---+---+----+----+ +// complex_scalar | * | 5 | * | 11 | +// +---+---+----+----+ +// complex_matrix | 2 | 6 | 8 | 12 | +// +---+---+----+----+ +// +// * -> not needed. + +// FIXME -- these functions need to be fixed so that things +// like +// +// a = -1; b = [ 0, 0.5, 1 ]; r = a .^ b +// +// and +// +// a = -1; b = [ 0, 0.5, 1 ]; for i = 1:3, r(i) = a .^ b(i), end +// +// produce identical results. Also, it would be nice if -1^0.5 +// produced a pure imaginary result instead of a complex number with a +// small real part. But perhaps that's really a problem with the math +// library... + +// -*- 1 -*- +octave_value +elem_xpow (float a, const FloatMatrix& b) +{ + octave_value retval; + + octave_idx_type nr = b.rows (); + octave_idx_type nc = b.cols (); + + float d1, d2; + + if (a < 0.0 && ! b.all_integers (d1, d2)) + { + FloatComplex atmp (a); + FloatComplexMatrix result (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + { + OCTAVE_QUIT; + result (i, j) = std::pow (atmp, b (i, j)); + } + + retval = result; + } + else + { + FloatMatrix result (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + { + OCTAVE_QUIT; + result (i, j) = std::pow (a, b (i, j)); + } + + retval = result; + } + + return retval; +} + +// -*- 2 -*- +octave_value +elem_xpow (float a, const FloatComplexMatrix& b) +{ + octave_idx_type nr = b.rows (); + octave_idx_type nc = b.cols (); + + FloatComplexMatrix result (nr, nc); + FloatComplex atmp (a); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + { + OCTAVE_QUIT; + result (i, j) = std::pow (atmp, b (i, j)); + } + + return result; +} + +// -*- 3 -*- +octave_value +elem_xpow (const FloatMatrix& a, float b) +{ + octave_value retval; + + octave_idx_type nr = a.rows (); + octave_idx_type nc = a.cols (); + + if (static_cast (b) != b && a.any_element_is_negative ()) + { + FloatComplexMatrix result (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + { + OCTAVE_QUIT; + + FloatComplex atmp (a (i, j)); + + result (i, j) = std::pow (atmp, b); + } + + retval = result; + } + else + { + FloatMatrix result (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + { + OCTAVE_QUIT; + result (i, j) = std::pow (a (i, j), b); + } + + retval = result; + } + + return retval; +} + +// -*- 4 -*- +octave_value +elem_xpow (const FloatMatrix& a, const FloatMatrix& b) +{ + octave_value retval; + + octave_idx_type nr = a.rows (); + octave_idx_type nc = a.cols (); + + octave_idx_type b_nr = b.rows (); + octave_idx_type b_nc = b.cols (); + + if (nr != b_nr || nc != b_nc) + { + gripe_nonconformant ("operator .^", nr, nc, b_nr, b_nc); + return octave_value (); + } + + int convert_to_complex = 0; + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + { + OCTAVE_QUIT; + float atmp = a (i, j); + float btmp = b (i, j); + if (atmp < 0.0 && static_cast (btmp) != btmp) + { + convert_to_complex = 1; + goto done; + } + } + +done: + + if (convert_to_complex) + { + FloatComplexMatrix complex_result (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + { + OCTAVE_QUIT; + FloatComplex atmp (a (i, j)); + FloatComplex btmp (b (i, j)); + complex_result (i, j) = std::pow (atmp, btmp); + } + + retval = complex_result; + } + else + { + FloatMatrix result (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + { + OCTAVE_QUIT; + result (i, j) = std::pow (a (i, j), b (i, j)); + } + + retval = result; + } + + return retval; +} + +// -*- 5 -*- +octave_value +elem_xpow (const FloatMatrix& a, const FloatComplex& b) +{ + octave_idx_type nr = a.rows (); + octave_idx_type nc = a.cols (); + + FloatComplexMatrix result (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + { + OCTAVE_QUIT; + result (i, j) = std::pow (FloatComplex (a (i, j)), b); + } + + return result; +} + +// -*- 6 -*- +octave_value +elem_xpow (const FloatMatrix& a, const FloatComplexMatrix& b) +{ + octave_idx_type nr = a.rows (); + octave_idx_type nc = a.cols (); + + octave_idx_type b_nr = b.rows (); + octave_idx_type b_nc = b.cols (); + + if (nr != b_nr || nc != b_nc) + { + gripe_nonconformant ("operator .^", nr, nc, b_nr, b_nc); + return octave_value (); + } + + FloatComplexMatrix result (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + { + OCTAVE_QUIT; + result (i, j) = std::pow (FloatComplex (a (i, j)), b (i, j)); + } + + return result; +} + +// -*- 7 -*- +octave_value +elem_xpow (const FloatComplex& a, const FloatMatrix& b) +{ + octave_idx_type nr = b.rows (); + octave_idx_type nc = b.cols (); + + FloatComplexMatrix result (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + { + OCTAVE_QUIT; + float btmp = b (i, j); + if (xisint (btmp)) + result (i, j) = std::pow (a, static_cast (btmp)); + else + result (i, j) = std::pow (a, btmp); + } + + return result; +} + +// -*- 8 -*- +octave_value +elem_xpow (const FloatComplex& a, const FloatComplexMatrix& b) +{ + octave_idx_type nr = b.rows (); + octave_idx_type nc = b.cols (); + + FloatComplexMatrix result (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + { + OCTAVE_QUIT; + result (i, j) = std::pow (a, b (i, j)); + } + + return result; +} + +// -*- 9 -*- +octave_value +elem_xpow (const FloatComplexMatrix& a, float b) +{ + octave_idx_type nr = a.rows (); + octave_idx_type nc = a.cols (); + + FloatComplexMatrix result (nr, nc); + + if (xisint (b)) + { + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + { + OCTAVE_QUIT; + result (i, j) = std::pow (a (i, j), static_cast (b)); + } + } + else + { + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + { + OCTAVE_QUIT; + result (i, j) = std::pow (a (i, j), b); + } + } + + return result; +} + +// -*- 10 -*- +octave_value +elem_xpow (const FloatComplexMatrix& a, const FloatMatrix& b) +{ + octave_idx_type nr = a.rows (); + octave_idx_type nc = a.cols (); + + octave_idx_type b_nr = b.rows (); + octave_idx_type b_nc = b.cols (); + + if (nr != b_nr || nc != b_nc) + { + gripe_nonconformant ("operator .^", nr, nc, b_nr, b_nc); + return octave_value (); + } + + FloatComplexMatrix result (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + { + OCTAVE_QUIT; + float btmp = b (i, j); + if (xisint (btmp)) + result (i, j) = std::pow (a (i, j), static_cast (btmp)); + else + result (i, j) = std::pow (a (i, j), btmp); + } + + return result; +} + +// -*- 11 -*- +octave_value +elem_xpow (const FloatComplexMatrix& a, const FloatComplex& b) +{ + octave_idx_type nr = a.rows (); + octave_idx_type nc = a.cols (); + + FloatComplexMatrix result (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + { + OCTAVE_QUIT; + result (i, j) = std::pow (a (i, j), b); + } + + return result; +} + +// -*- 12 -*- +octave_value +elem_xpow (const FloatComplexMatrix& a, const FloatComplexMatrix& b) +{ + octave_idx_type nr = a.rows (); + octave_idx_type nc = a.cols (); + + octave_idx_type b_nr = b.rows (); + octave_idx_type b_nc = b.cols (); + + if (nr != b_nr || nc != b_nc) + { + gripe_nonconformant ("operator .^", nr, nc, b_nr, b_nc); + return octave_value (); + } + + FloatComplexMatrix result (nr, nc); + + for (octave_idx_type j = 0; j < nc; j++) + for (octave_idx_type i = 0; i < nr; i++) + { + OCTAVE_QUIT; + result (i, j) = std::pow (a (i, j), b (i, j)); + } + + return result; +} + +// Safer pow functions that work elementwise for N-d arrays. +// +// op2 \ op1: s nd cs cnd +// +-- +---+---+----+----+ +// scalar | | * | 3 | * | 9 | +// +---+---+----+----+ +// N_d | 1 | 4 | 7 | 10 | +// +---+---+----+----+ +// complex_scalar | * | 5 | * | 11 | +// +---+---+----+----+ +// complex_N_d | 2 | 6 | 8 | 12 | +// +---+---+----+----+ +// +// * -> not needed. + +// FIXME -- these functions need to be fixed so that things +// like +// +// a = -1; b = [ 0, 0.5, 1 ]; r = a .^ b +// +// and +// +// a = -1; b = [ 0, 0.5, 1 ]; for i = 1:3, r(i) = a .^ b(i), end +// +// produce identical results. Also, it would be nice if -1^0.5 +// produced a pure imaginary result instead of a complex number with a +// small real part. But perhaps that's really a problem with the math +// library... + +// -*- 1 -*- +octave_value +elem_xpow (float a, const FloatNDArray& b) +{ + octave_value retval; + + float d1, d2; + + if (a < 0.0 && ! b.all_integers (d1, d2)) + { + FloatComplex atmp (a); + FloatComplexNDArray result (b.dims ()); + for (octave_idx_type i = 0; i < b.length (); i++) + { + OCTAVE_QUIT; + result(i) = std::pow (atmp, b(i)); + } + + retval = result; + } + else + { + FloatNDArray result (b.dims ()); + for (octave_idx_type i = 0; i < b.length (); i++) + { + OCTAVE_QUIT; + result (i) = std::pow (a, b(i)); + } + + retval = result; + } + + return retval; +} + +// -*- 2 -*- +octave_value +elem_xpow (float a, const FloatComplexNDArray& b) +{ + FloatComplexNDArray result (b.dims ()); + FloatComplex atmp (a); + + for (octave_idx_type i = 0; i < b.length (); i++) + { + OCTAVE_QUIT; + result(i) = std::pow (atmp, b(i)); + } + + return result; +} + +// -*- 3 -*- +octave_value +elem_xpow (const FloatNDArray& a, float b) +{ + octave_value retval; + + if (static_cast (b) != b && a.any_element_is_negative ()) + { + FloatComplexNDArray result (a.dims ()); + + for (octave_idx_type i = 0; i < a.length (); i++) + { + OCTAVE_QUIT; + + FloatComplex atmp (a (i)); + + result(i) = std::pow (atmp, b); + } + + retval = result; + } + else + { + FloatNDArray result (a.dims ()); + + for (octave_idx_type i = 0; i < a.length (); i++) + { + OCTAVE_QUIT; + result(i) = std::pow (a(i), b); + } + + retval = result; + } + + return retval; +} + +// -*- 4 -*- +octave_value +elem_xpow (const FloatNDArray& a, const FloatNDArray& b) +{ + octave_value retval; + + dim_vector a_dims = a.dims (); + dim_vector b_dims = b.dims (); + + if (a_dims != b_dims) + { + gripe_nonconformant ("operator .^", a_dims, b_dims); + return octave_value (); + } + + int len = a.length (); + + bool convert_to_complex = false; + + for (octave_idx_type i = 0; i < len; i++) + { + OCTAVE_QUIT; + float atmp = a(i); + float btmp = b(i); + if (atmp < 0.0 && static_cast (btmp) != btmp) + { + convert_to_complex = true; + goto done; + } + } + +done: + + if (convert_to_complex) + { + FloatComplexNDArray complex_result (a_dims); + + for (octave_idx_type i = 0; i < len; i++) + { + OCTAVE_QUIT; + FloatComplex atmp (a(i)); + FloatComplex btmp (b(i)); + complex_result(i) = std::pow (atmp, btmp); + } + + retval = complex_result; + } + else + { + FloatNDArray result (a_dims); + + for (octave_idx_type i = 0; i < len; i++) + { + OCTAVE_QUIT; + result(i) = std::pow (a(i), b(i)); + } + + retval = result; + } + + return retval; +} + +// -*- 5 -*- +octave_value +elem_xpow (const FloatNDArray& a, const FloatComplex& b) +{ + FloatComplexNDArray result (a.dims ()); + + for (octave_idx_type i = 0; i < a.length (); i++) + { + OCTAVE_QUIT; + result(i) = std::pow (FloatComplex (a(i)), b); + } + + return result; +} + +// -*- 6 -*- +octave_value +elem_xpow (const FloatNDArray& a, const FloatComplexNDArray& b) +{ + dim_vector a_dims = a.dims (); + dim_vector b_dims = b.dims (); + + if (a_dims != b_dims) + { + gripe_nonconformant ("operator .^", a_dims, b_dims); + return octave_value (); + } + + FloatComplexNDArray result (a_dims); + + for (octave_idx_type i = 0; i < a.length (); i++) + { + OCTAVE_QUIT; + result(i) = std::pow (FloatComplex (a(i)), b(i)); + } + + return result; +} + +// -*- 7 -*- +octave_value +elem_xpow (const FloatComplex& a, const FloatNDArray& b) +{ + FloatComplexNDArray result (b.dims ()); + + for (octave_idx_type i = 0; i < b.length (); i++) + { + OCTAVE_QUIT; + float btmp = b(i); + if (xisint (btmp)) + result(i) = std::pow (a, static_cast (btmp)); + else + result(i) = std::pow (a, btmp); + } + + return result; +} + +// -*- 8 -*- +octave_value +elem_xpow (const FloatComplex& a, const FloatComplexNDArray& b) +{ + FloatComplexNDArray result (b.dims ()); + + for (octave_idx_type i = 0; i < b.length (); i++) + { + OCTAVE_QUIT; + result(i) = std::pow (a, b(i)); + } + + return result; +} + +// -*- 9 -*- +octave_value +elem_xpow (const FloatComplexNDArray& a, float b) +{ + FloatComplexNDArray result (a.dims ()); + + if (xisint (b)) + { + for (octave_idx_type i = 0; i < a.length (); i++) + { + OCTAVE_QUIT; + result(i) = std::pow (a(i), static_cast (b)); + } + } + else + { + for (octave_idx_type i = 0; i < a.length (); i++) + { + OCTAVE_QUIT; + result(i) = std::pow (a(i), b); + } + } + + return result; +} + +// -*- 10 -*- +octave_value +elem_xpow (const FloatComplexNDArray& a, const FloatNDArray& b) +{ + dim_vector a_dims = a.dims (); + dim_vector b_dims = b.dims (); + + if (a_dims != b_dims) + { + gripe_nonconformant ("operator .^", a_dims, b_dims); + return octave_value (); + } + + FloatComplexNDArray result (a_dims); + + for (octave_idx_type i = 0; i < a.length (); i++) + { + OCTAVE_QUIT; + float btmp = b(i); + if (xisint (btmp)) + result(i) = std::pow (a(i), static_cast (btmp)); + else + result(i) = std::pow (a(i), btmp); + } + + return result; +} + +// -*- 11 -*- +octave_value +elem_xpow (const FloatComplexNDArray& a, const FloatComplex& b) +{ + FloatComplexNDArray result (a.dims ()); + + for (octave_idx_type i = 0; i < a.length (); i++) + { + OCTAVE_QUIT; + result(i) = std::pow (a(i), b); + } + + return result; +} + +// -*- 12 -*- +octave_value +elem_xpow (const FloatComplexNDArray& a, const FloatComplexNDArray& b) +{ + dim_vector a_dims = a.dims (); + dim_vector b_dims = b.dims (); + + if (a_dims != b_dims) + { + gripe_nonconformant ("operator .^", a_dims, b_dims); + return octave_value (); + } + + FloatComplexNDArray result (a_dims); + + for (octave_idx_type i = 0; i < a.length (); i++) + { + OCTAVE_QUIT; + result(i) = std::pow (a(i), b(i)); + } + + return result; +} + /* ;;; Local Variables: *** ;;; mode: C++ *** diff -r 45f5faba05a2 -r 82be108cc558 src/xpow.h --- a/src/xpow.h Wed May 14 18:09:56 2008 +0200 +++ b/src/xpow.h Sun Apr 27 22:34:17 2008 +0200 @@ -28,6 +28,8 @@ class Matrix; class ComplexMatrix; +class FloatMatrix; +class FloatComplexMatrix; class octave_value; extern octave_value xpow (double a, double b); @@ -79,6 +81,55 @@ extern octave_value elem_xpow (const ComplexNDArray& a, const Complex& b); extern octave_value elem_xpow (const ComplexNDArray& a, const ComplexNDArray& b); +extern octave_value xpow (float a, float b); +extern octave_value xpow (float a, const FloatMatrix& b); +extern octave_value xpow (float a, const FloatComplex& b); +extern octave_value xpow (float a, const FloatComplexMatrix& b); + +extern octave_value xpow (const FloatMatrix& a, float b); +extern octave_value xpow (const FloatMatrix& a, const FloatComplex& b); + +extern octave_value xpow (const FloatComplex& a, float b); +extern octave_value xpow (const FloatComplex& a, const FloatMatrix& b); +extern octave_value xpow (const FloatComplex& a, const FloatComplex& b); +extern octave_value xpow (const FloatComplex& a, const FloatComplexMatrix& b); + +extern octave_value xpow (const FloatComplexMatrix& a, float b); +extern octave_value xpow (const FloatComplexMatrix& a, const FloatComplex& b); + +extern octave_value elem_xpow (float a, const FloatMatrix& b); +extern octave_value elem_xpow (float a, const FloatComplexMatrix& b); + +extern octave_value elem_xpow (const FloatMatrix& a, float b); +extern octave_value elem_xpow (const FloatMatrix& a, const FloatMatrix& b); +extern octave_value elem_xpow (const FloatMatrix& a, const FloatComplex& b); +extern octave_value elem_xpow (const FloatMatrix& a, const FloatComplexMatrix& b); + +extern octave_value elem_xpow (const FloatComplex& a, const FloatMatrix& b); +extern octave_value elem_xpow (const FloatComplex& a, const FloatComplexMatrix& b); + +extern octave_value elem_xpow (const FloatComplexMatrix& a, float b); +extern octave_value elem_xpow (const FloatComplexMatrix& a, const FloatMatrix& b); +extern octave_value elem_xpow (const FloatComplexMatrix& a, const FloatComplex& b); +extern octave_value elem_xpow (const FloatComplexMatrix& a, const FloatComplexMatrix& b); + + +extern octave_value elem_xpow (float a, const FloatNDArray& b); +extern octave_value elem_xpow (float a, const FloatComplexNDArray& b); + +extern octave_value elem_xpow (const FloatNDArray& a, float b); +extern octave_value elem_xpow (const FloatNDArray& a, const FloatNDArray& b); +extern octave_value elem_xpow (const FloatNDArray& a, const FloatComplex& b); +extern octave_value elem_xpow (const FloatNDArray& a, const FloatComplexNDArray& b); + +extern octave_value elem_xpow (const FloatComplex& a, const FloatNDArray& b); +extern octave_value elem_xpow (const FloatComplex& a, const FloatComplexNDArray& b); + +extern octave_value elem_xpow (const FloatComplexNDArray& a, float b); +extern octave_value elem_xpow (const FloatComplexNDArray& a, const FloatNDArray& b); +extern octave_value elem_xpow (const FloatComplexNDArray& a, const FloatComplex& b); +extern octave_value elem_xpow (const FloatComplexNDArray& a, const FloatComplexNDArray& b); + #endif /*