diff libcruft/lapack/cgehrd.f @ 7789:82be108cc558

First attempt at single precision tyeps * * * corrections to qrupdate single precision routines * * * prefer demotion to single over promotion to double * * * Add single precision support to log2 function * * * Trivial PROJECT file update * * * Cache optimized hermitian/transpose methods * * * Add tests for tranpose/hermitian and ChangeLog entry for new transpose code
author David Bateman <dbateman@free.fr>
date Sun, 27 Apr 2008 22:34:17 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libcruft/lapack/cgehrd.f	Sun Apr 27 22:34:17 2008 +0200
@@ -0,0 +1,273 @@
+      SUBROUTINE CGEHRD( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.1) --
+*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+*     November 2006
+*
+*     .. Scalar Arguments ..
+      INTEGER            IHI, ILO, INFO, LDA, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGEHRD reduces a complex general matrix A to upper Hessenberg form H by
+*  an unitary similarity transformation:  Q' * A * Q = H .
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  ILO     (input) INTEGER
+*  IHI     (input) INTEGER
+*          It is assumed that A is already upper triangular in rows
+*          and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
+*          set by a previous call to CGEBAL; otherwise they should be
+*          set to 1 and N respectively. See Further Details.
+*          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the N-by-N general matrix to be reduced.
+*          On exit, the upper triangle and the first subdiagonal of A
+*          are overwritten with the upper Hessenberg matrix H, and the
+*          elements below the first subdiagonal, with the array TAU,
+*          represent the unitary matrix Q as a product of elementary
+*          reflectors. See Further Details.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  TAU     (output) COMPLEX array, dimension (N-1)
+*          The scalar factors of the elementary reflectors (see Further
+*          Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to
+*          zero.
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The length of the array WORK.  LWORK >= max(1,N).
+*          For optimum performance LWORK >= N*NB, where NB is the
+*          optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of (ihi-ilo) elementary
+*  reflectors
+*
+*     Q = H(ilo) H(ilo+1) . . . H(ihi-1).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a complex scalar, and v is a complex vector with
+*  v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on
+*  exit in A(i+2:ihi,i), and tau in TAU(i).
+*
+*  The contents of A are illustrated by the following example, with
+*  n = 7, ilo = 2 and ihi = 6:
+*
+*  on entry,                        on exit,
+*
+*  ( a   a   a   a   a   a   a )    (  a   a   h   h   h   h   a )
+*  (     a   a   a   a   a   a )    (      a   h   h   h   h   a )
+*  (     a   a   a   a   a   a )    (      h   h   h   h   h   h )
+*  (     a   a   a   a   a   a )    (      v2  h   h   h   h   h )
+*  (     a   a   a   a   a   a )    (      v2  v3  h   h   h   h )
+*  (     a   a   a   a   a   a )    (      v2  v3  v4  h   h   h )
+*  (                         a )    (                          a )
+*
+*  where a denotes an element of the original matrix A, h denotes a
+*  modified element of the upper Hessenberg matrix H, and vi denotes an
+*  element of the vector defining H(i).
+*
+*  This file is a slight modification of LAPACK-3.0's CGEHRD
+*  subroutine incorporating improvements proposed by Quintana-Orti and
+*  Van de Geijn (2005). 
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            NBMAX, LDT
+      PARAMETER          ( NBMAX = 64, LDT = NBMAX+1 )
+      COMPLEX            ZERO, ONE
+      PARAMETER          ( ZERO = ( 0.0E+0, 0.0E+0 ), 
+     $                     ONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IB, IINFO, IWS, J, LDWORK, LWKOPT, NB,
+     $                   NBMIN, NH, NX
+      COMPLEX            EI
+*     ..
+*     .. Local Arrays ..
+      COMPLEX            T( LDT, NBMAX )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CAXPY, CGEHD2, CGEMM, CLAHR2, CLARFB, CTRMM,
+     $                   XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      NB = MIN( NBMAX, ILAENV( 1, 'CGEHRD', ' ', N, ILO, IHI, -1 ) )
+      LWKOPT = N*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN
+         INFO = -2
+      ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGEHRD', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Set elements 1:ILO-1 and IHI:N-1 of TAU to zero
+*
+      DO 10 I = 1, ILO - 1
+         TAU( I ) = ZERO
+   10 CONTINUE
+      DO 20 I = MAX( 1, IHI ), N - 1
+         TAU( I ) = ZERO
+   20 CONTINUE
+*
+*     Quick return if possible
+*
+      NH = IHI - ILO + 1
+      IF( NH.LE.1 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+*     Determine the block size
+*
+      NB = MIN( NBMAX, ILAENV( 1, 'CGEHRD', ' ', N, ILO, IHI, -1 ) )
+      NBMIN = 2
+      IWS = 1
+      IF( NB.GT.1 .AND. NB.LT.NH ) THEN
+*
+*        Determine when to cross over from blocked to unblocked code
+*        (last block is always handled by unblocked code)
+*
+         NX = MAX( NB, ILAENV( 3, 'CGEHRD', ' ', N, ILO, IHI, -1 ) )
+         IF( NX.LT.NH ) THEN
+*
+*           Determine if workspace is large enough for blocked code
+*
+            IWS = N*NB
+            IF( LWORK.LT.IWS ) THEN
+*
+*              Not enough workspace to use optimal NB:  determine the
+*              minimum value of NB, and reduce NB or force use of
+*              unblocked code
+*
+               NBMIN = MAX( 2, ILAENV( 2, 'CGEHRD', ' ', N, ILO, IHI,
+     $                 -1 ) )
+               IF( LWORK.GE.N*NBMIN ) THEN
+                  NB = LWORK / N
+               ELSE
+                  NB = 1
+               END IF
+            END IF
+         END IF
+      END IF
+      LDWORK = N
+*
+      IF( NB.LT.NBMIN .OR. NB.GE.NH ) THEN
+*
+*        Use unblocked code below
+*
+         I = ILO
+*
+      ELSE
+*
+*        Use blocked code
+*
+         DO 40 I = ILO, IHI - 1 - NX, NB
+            IB = MIN( NB, IHI-I )
+*
+*           Reduce columns i:i+ib-1 to Hessenberg form, returning the
+*           matrices V and T of the block reflector H = I - V*T*V'
+*           which performs the reduction, and also the matrix Y = A*V*T
+*
+            CALL CLAHR2( IHI, I, IB, A( 1, I ), LDA, TAU( I ), T, LDT,
+     $                   WORK, LDWORK )
+*
+*           Apply the block reflector H to A(1:ihi,i+ib:ihi) from the
+*           right, computing  A := A - Y * V'. V(i+ib,ib-1) must be set
+*           to 1
+*
+            EI = A( I+IB, I+IB-1 )
+            A( I+IB, I+IB-1 ) = ONE
+            CALL CGEMM( 'No transpose', 'Conjugate transpose', 
+     $                  IHI, IHI-I-IB+1,
+     $                  IB, -ONE, WORK, LDWORK, A( I+IB, I ), LDA, ONE,
+     $                  A( 1, I+IB ), LDA )
+            A( I+IB, I+IB-1 ) = EI
+*
+*           Apply the block reflector H to A(1:i,i+1:i+ib-1) from the
+*           right
+*
+            CALL CTRMM( 'Right', 'Lower', 'Conjugate transpose',
+     $                  'Unit', I, IB-1,
+     $                  ONE, A( I+1, I ), LDA, WORK, LDWORK )
+            DO 30 J = 0, IB-2
+               CALL CAXPY( I, -ONE, WORK( LDWORK*J+1 ), 1,
+     $                     A( 1, I+J+1 ), 1 )
+   30       CONTINUE
+*
+*           Apply the block reflector H to A(i+1:ihi,i+ib:n) from the
+*           left
+*
+            CALL CLARFB( 'Left', 'Conjugate transpose', 'Forward',
+     $                   'Columnwise',
+     $                   IHI-I, N-I-IB+1, IB, A( I+1, I ), LDA, T, LDT,
+     $                   A( I+1, I+IB ), LDA, WORK, LDWORK )
+   40    CONTINUE
+      END IF
+*
+*     Use unblocked code to reduce the rest of the matrix
+*
+      CALL CGEHD2( N, I, IHI, A, LDA, TAU, WORK, IINFO )
+      WORK( 1 ) = IWS
+*
+      RETURN
+*
+*     End of CGEHRD
+*
+      END