diff libcruft/lapack/clalsa.f @ 7789:82be108cc558

First attempt at single precision tyeps * * * corrections to qrupdate single precision routines * * * prefer demotion to single over promotion to double * * * Add single precision support to log2 function * * * Trivial PROJECT file update * * * Cache optimized hermitian/transpose methods * * * Add tests for tranpose/hermitian and ChangeLog entry for new transpose code
author David Bateman <dbateman@free.fr>
date Sun, 27 Apr 2008 22:34:17 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libcruft/lapack/clalsa.f	Sun Apr 27 22:34:17 2008 +0200
@@ -0,0 +1,503 @@
+      SUBROUTINE CLALSA( ICOMPQ, SMLSIZ, N, NRHS, B, LDB, BX, LDBX, U,
+     $                   LDU, VT, K, DIFL, DIFR, Z, POLES, GIVPTR,
+     $                   GIVCOL, LDGCOL, PERM, GIVNUM, C, S, RWORK,
+     $                   IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.1) --
+*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+*     November 2006
+*
+*     .. Scalar Arguments ..
+      INTEGER            ICOMPQ, INFO, LDB, LDBX, LDGCOL, LDU, N, NRHS,
+     $                   SMLSIZ
+*     ..
+*     .. Array Arguments ..
+      INTEGER            GIVCOL( LDGCOL, * ), GIVPTR( * ), IWORK( * ),
+     $                   K( * ), PERM( LDGCOL, * )
+      REAL               C( * ), DIFL( LDU, * ), DIFR( LDU, * ),
+     $                   GIVNUM( LDU, * ), POLES( LDU, * ), RWORK( * ),
+     $                   S( * ), U( LDU, * ), VT( LDU, * ), Z( LDU, * )
+      COMPLEX            B( LDB, * ), BX( LDBX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLALSA is an itermediate step in solving the least squares problem
+*  by computing the SVD of the coefficient matrix in compact form (The
+*  singular vectors are computed as products of simple orthorgonal
+*  matrices.).
+*
+*  If ICOMPQ = 0, CLALSA applies the inverse of the left singular vector
+*  matrix of an upper bidiagonal matrix to the right hand side; and if
+*  ICOMPQ = 1, CLALSA applies the right singular vector matrix to the
+*  right hand side. The singular vector matrices were generated in
+*  compact form by CLALSA.
+*
+*  Arguments
+*  =========
+*
+*  ICOMPQ (input) INTEGER
+*         Specifies whether the left or the right singular vector
+*         matrix is involved.
+*         = 0: Left singular vector matrix
+*         = 1: Right singular vector matrix
+*
+*  SMLSIZ (input) INTEGER
+*         The maximum size of the subproblems at the bottom of the
+*         computation tree.
+*
+*  N      (input) INTEGER
+*         The row and column dimensions of the upper bidiagonal matrix.
+*
+*  NRHS   (input) INTEGER
+*         The number of columns of B and BX. NRHS must be at least 1.
+*
+*  B      (input/output) COMPLEX array, dimension ( LDB, NRHS )
+*         On input, B contains the right hand sides of the least
+*         squares problem in rows 1 through M.
+*         On output, B contains the solution X in rows 1 through N.
+*
+*  LDB    (input) INTEGER
+*         The leading dimension of B in the calling subprogram.
+*         LDB must be at least max(1,MAX( M, N ) ).
+*
+*  BX     (output) COMPLEX array, dimension ( LDBX, NRHS )
+*         On exit, the result of applying the left or right singular
+*         vector matrix to B.
+*
+*  LDBX   (input) INTEGER
+*         The leading dimension of BX.
+*
+*  U      (input) REAL array, dimension ( LDU, SMLSIZ ).
+*         On entry, U contains the left singular vector matrices of all
+*         subproblems at the bottom level.
+*
+*  LDU    (input) INTEGER, LDU = > N.
+*         The leading dimension of arrays U, VT, DIFL, DIFR,
+*         POLES, GIVNUM, and Z.
+*
+*  VT     (input) REAL array, dimension ( LDU, SMLSIZ+1 ).
+*         On entry, VT' contains the right singular vector matrices of
+*         all subproblems at the bottom level.
+*
+*  K      (input) INTEGER array, dimension ( N ).
+*
+*  DIFL   (input) REAL array, dimension ( LDU, NLVL ).
+*         where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1.
+*
+*  DIFR   (input) REAL array, dimension ( LDU, 2 * NLVL ).
+*         On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record
+*         distances between singular values on the I-th level and
+*         singular values on the (I -1)-th level, and DIFR(*, 2 * I)
+*         record the normalizing factors of the right singular vectors
+*         matrices of subproblems on I-th level.
+*
+*  Z      (input) REAL array, dimension ( LDU, NLVL ).
+*         On entry, Z(1, I) contains the components of the deflation-
+*         adjusted updating row vector for subproblems on the I-th
+*         level.
+*
+*  POLES  (input) REAL array, dimension ( LDU, 2 * NLVL ).
+*         On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old
+*         singular values involved in the secular equations on the I-th
+*         level.
+*
+*  GIVPTR (input) INTEGER array, dimension ( N ).
+*         On entry, GIVPTR( I ) records the number of Givens
+*         rotations performed on the I-th problem on the computation
+*         tree.
+*
+*  GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 * NLVL ).
+*         On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the
+*         locations of Givens rotations performed on the I-th level on
+*         the computation tree.
+*
+*  LDGCOL (input) INTEGER, LDGCOL = > N.
+*         The leading dimension of arrays GIVCOL and PERM.
+*
+*  PERM   (input) INTEGER array, dimension ( LDGCOL, NLVL ).
+*         On entry, PERM(*, I) records permutations done on the I-th
+*         level of the computation tree.
+*
+*  GIVNUM (input) REAL array, dimension ( LDU, 2 * NLVL ).
+*         On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S-
+*         values of Givens rotations performed on the I-th level on the
+*         computation tree.
+*
+*  C      (input) REAL array, dimension ( N ).
+*         On entry, if the I-th subproblem is not square,
+*         C( I ) contains the C-value of a Givens rotation related to
+*         the right null space of the I-th subproblem.
+*
+*  S      (input) REAL array, dimension ( N ).
+*         On entry, if the I-th subproblem is not square,
+*         S( I ) contains the S-value of a Givens rotation related to
+*         the right null space of the I-th subproblem.
+*
+*  RWORK  (workspace) REAL array, dimension at least
+*         max ( N, (SMLSZ+1)*NRHS*3 ).
+*
+*  IWORK  (workspace) INTEGER array.
+*         The dimension must be at least 3 * N
+*
+*  INFO   (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ming Gu and Ren-Cang Li, Computer Science Division, University of
+*       California at Berkeley, USA
+*     Osni Marques, LBNL/NERSC, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, I1, IC, IM1, INODE, J, JCOL, JIMAG, JREAL,
+     $                   JROW, LF, LL, LVL, LVL2, ND, NDB1, NDIML,
+     $                   NDIMR, NL, NLF, NLP1, NLVL, NR, NRF, NRP1, SQRE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CCOPY, CLALS0, SGEMM, SLASDT, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          AIMAG, CMPLX, REAL
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN
+         INFO = -1
+      ELSE IF( SMLSIZ.LT.3 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.SMLSIZ ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.1 ) THEN
+         INFO = -4
+      ELSE IF( LDB.LT.N ) THEN
+         INFO = -6
+      ELSE IF( LDBX.LT.N ) THEN
+         INFO = -8
+      ELSE IF( LDU.LT.N ) THEN
+         INFO = -10
+      ELSE IF( LDGCOL.LT.N ) THEN
+         INFO = -19
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CLALSA', -INFO )
+         RETURN
+      END IF
+*
+*     Book-keeping and  setting up the computation tree.
+*
+      INODE = 1
+      NDIML = INODE + N
+      NDIMR = NDIML + N
+*
+      CALL SLASDT( N, NLVL, ND, IWORK( INODE ), IWORK( NDIML ),
+     $             IWORK( NDIMR ), SMLSIZ )
+*
+*     The following code applies back the left singular vector factors.
+*     For applying back the right singular vector factors, go to 170.
+*
+      IF( ICOMPQ.EQ.1 ) THEN
+         GO TO 170
+      END IF
+*
+*     The nodes on the bottom level of the tree were solved
+*     by SLASDQ. The corresponding left and right singular vector
+*     matrices are in explicit form. First apply back the left
+*     singular vector matrices.
+*
+      NDB1 = ( ND+1 ) / 2
+      DO 130 I = NDB1, ND
+*
+*        IC : center row of each node
+*        NL : number of rows of left  subproblem
+*        NR : number of rows of right subproblem
+*        NLF: starting row of the left   subproblem
+*        NRF: starting row of the right  subproblem
+*
+         I1 = I - 1
+         IC = IWORK( INODE+I1 )
+         NL = IWORK( NDIML+I1 )
+         NR = IWORK( NDIMR+I1 )
+         NLF = IC - NL
+         NRF = IC + 1
+*
+*        Since B and BX are complex, the following call to SGEMM
+*        is performed in two steps (real and imaginary parts).
+*
+*        CALL SGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU,
+*     $               B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX )
+*
+         J = NL*NRHS*2
+         DO 20 JCOL = 1, NRHS
+            DO 10 JROW = NLF, NLF + NL - 1
+               J = J + 1
+               RWORK( J ) = REAL( B( JROW, JCOL ) )
+   10       CONTINUE
+   20    CONTINUE
+         CALL SGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU,
+     $               RWORK( 1+NL*NRHS*2 ), NL, ZERO, RWORK( 1 ), NL )
+         J = NL*NRHS*2
+         DO 40 JCOL = 1, NRHS
+            DO 30 JROW = NLF, NLF + NL - 1
+               J = J + 1
+               RWORK( J ) = AIMAG( B( JROW, JCOL ) )
+   30       CONTINUE
+   40    CONTINUE
+         CALL SGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU,
+     $               RWORK( 1+NL*NRHS*2 ), NL, ZERO, RWORK( 1+NL*NRHS ),
+     $               NL )
+         JREAL = 0
+         JIMAG = NL*NRHS
+         DO 60 JCOL = 1, NRHS
+            DO 50 JROW = NLF, NLF + NL - 1
+               JREAL = JREAL + 1
+               JIMAG = JIMAG + 1
+               BX( JROW, JCOL ) = CMPLX( RWORK( JREAL ),
+     $                            RWORK( JIMAG ) )
+   50       CONTINUE
+   60    CONTINUE
+*
+*        Since B and BX are complex, the following call to SGEMM
+*        is performed in two steps (real and imaginary parts).
+*
+*        CALL SGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU,
+*    $               B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX )
+*
+         J = NR*NRHS*2
+         DO 80 JCOL = 1, NRHS
+            DO 70 JROW = NRF, NRF + NR - 1
+               J = J + 1
+               RWORK( J ) = REAL( B( JROW, JCOL ) )
+   70       CONTINUE
+   80    CONTINUE
+         CALL SGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU,
+     $               RWORK( 1+NR*NRHS*2 ), NR, ZERO, RWORK( 1 ), NR )
+         J = NR*NRHS*2
+         DO 100 JCOL = 1, NRHS
+            DO 90 JROW = NRF, NRF + NR - 1
+               J = J + 1
+               RWORK( J ) = AIMAG( B( JROW, JCOL ) )
+   90       CONTINUE
+  100    CONTINUE
+         CALL SGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU,
+     $               RWORK( 1+NR*NRHS*2 ), NR, ZERO, RWORK( 1+NR*NRHS ),
+     $               NR )
+         JREAL = 0
+         JIMAG = NR*NRHS
+         DO 120 JCOL = 1, NRHS
+            DO 110 JROW = NRF, NRF + NR - 1
+               JREAL = JREAL + 1
+               JIMAG = JIMAG + 1
+               BX( JROW, JCOL ) = CMPLX( RWORK( JREAL ),
+     $                            RWORK( JIMAG ) )
+  110       CONTINUE
+  120    CONTINUE
+*
+  130 CONTINUE
+*
+*     Next copy the rows of B that correspond to unchanged rows
+*     in the bidiagonal matrix to BX.
+*
+      DO 140 I = 1, ND
+         IC = IWORK( INODE+I-1 )
+         CALL CCOPY( NRHS, B( IC, 1 ), LDB, BX( IC, 1 ), LDBX )
+  140 CONTINUE
+*
+*     Finally go through the left singular vector matrices of all
+*     the other subproblems bottom-up on the tree.
+*
+      J = 2**NLVL
+      SQRE = 0
+*
+      DO 160 LVL = NLVL, 1, -1
+         LVL2 = 2*LVL - 1
+*
+*        find the first node LF and last node LL on
+*        the current level LVL
+*
+         IF( LVL.EQ.1 ) THEN
+            LF = 1
+            LL = 1
+         ELSE
+            LF = 2**( LVL-1 )
+            LL = 2*LF - 1
+         END IF
+         DO 150 I = LF, LL
+            IM1 = I - 1
+            IC = IWORK( INODE+IM1 )
+            NL = IWORK( NDIML+IM1 )
+            NR = IWORK( NDIMR+IM1 )
+            NLF = IC - NL
+            NRF = IC + 1
+            J = J - 1
+            CALL CLALS0( ICOMPQ, NL, NR, SQRE, NRHS, BX( NLF, 1 ), LDBX,
+     $                   B( NLF, 1 ), LDB, PERM( NLF, LVL ),
+     $                   GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL,
+     $                   GIVNUM( NLF, LVL2 ), LDU, POLES( NLF, LVL2 ),
+     $                   DIFL( NLF, LVL ), DIFR( NLF, LVL2 ),
+     $                   Z( NLF, LVL ), K( J ), C( J ), S( J ), RWORK,
+     $                   INFO )
+  150    CONTINUE
+  160 CONTINUE
+      GO TO 330
+*
+*     ICOMPQ = 1: applying back the right singular vector factors.
+*
+  170 CONTINUE
+*
+*     First now go through the right singular vector matrices of all
+*     the tree nodes top-down.
+*
+      J = 0
+      DO 190 LVL = 1, NLVL
+         LVL2 = 2*LVL - 1
+*
+*        Find the first node LF and last node LL on
+*        the current level LVL.
+*
+         IF( LVL.EQ.1 ) THEN
+            LF = 1
+            LL = 1
+         ELSE
+            LF = 2**( LVL-1 )
+            LL = 2*LF - 1
+         END IF
+         DO 180 I = LL, LF, -1
+            IM1 = I - 1
+            IC = IWORK( INODE+IM1 )
+            NL = IWORK( NDIML+IM1 )
+            NR = IWORK( NDIMR+IM1 )
+            NLF = IC - NL
+            NRF = IC + 1
+            IF( I.EQ.LL ) THEN
+               SQRE = 0
+            ELSE
+               SQRE = 1
+            END IF
+            J = J + 1
+            CALL CLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B( NLF, 1 ), LDB,
+     $                   BX( NLF, 1 ), LDBX, PERM( NLF, LVL ),
+     $                   GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL,
+     $                   GIVNUM( NLF, LVL2 ), LDU, POLES( NLF, LVL2 ),
+     $                   DIFL( NLF, LVL ), DIFR( NLF, LVL2 ),
+     $                   Z( NLF, LVL ), K( J ), C( J ), S( J ), RWORK,
+     $                   INFO )
+  180    CONTINUE
+  190 CONTINUE
+*
+*     The nodes on the bottom level of the tree were solved
+*     by SLASDQ. The corresponding right singular vector
+*     matrices are in explicit form. Apply them back.
+*
+      NDB1 = ( ND+1 ) / 2
+      DO 320 I = NDB1, ND
+         I1 = I - 1
+         IC = IWORK( INODE+I1 )
+         NL = IWORK( NDIML+I1 )
+         NR = IWORK( NDIMR+I1 )
+         NLP1 = NL + 1
+         IF( I.EQ.ND ) THEN
+            NRP1 = NR
+         ELSE
+            NRP1 = NR + 1
+         END IF
+         NLF = IC - NL
+         NRF = IC + 1
+*
+*        Since B and BX are complex, the following call to SGEMM is
+*        performed in two steps (real and imaginary parts).
+*
+*        CALL SGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU,
+*    $               B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX )
+*
+         J = NLP1*NRHS*2
+         DO 210 JCOL = 1, NRHS
+            DO 200 JROW = NLF, NLF + NLP1 - 1
+               J = J + 1
+               RWORK( J ) = REAL( B( JROW, JCOL ) )
+  200       CONTINUE
+  210    CONTINUE
+         CALL SGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU,
+     $               RWORK( 1+NLP1*NRHS*2 ), NLP1, ZERO, RWORK( 1 ),
+     $               NLP1 )
+         J = NLP1*NRHS*2
+         DO 230 JCOL = 1, NRHS
+            DO 220 JROW = NLF, NLF + NLP1 - 1
+               J = J + 1
+               RWORK( J ) = AIMAG( B( JROW, JCOL ) )
+  220       CONTINUE
+  230    CONTINUE
+         CALL SGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU,
+     $               RWORK( 1+NLP1*NRHS*2 ), NLP1, ZERO,
+     $               RWORK( 1+NLP1*NRHS ), NLP1 )
+         JREAL = 0
+         JIMAG = NLP1*NRHS
+         DO 250 JCOL = 1, NRHS
+            DO 240 JROW = NLF, NLF + NLP1 - 1
+               JREAL = JREAL + 1
+               JIMAG = JIMAG + 1
+               BX( JROW, JCOL ) = CMPLX( RWORK( JREAL ),
+     $                            RWORK( JIMAG ) )
+  240       CONTINUE
+  250    CONTINUE
+*
+*        Since B and BX are complex, the following call to SGEMM is
+*        performed in two steps (real and imaginary parts).
+*
+*        CALL SGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU,
+*    $               B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX )
+*
+         J = NRP1*NRHS*2
+         DO 270 JCOL = 1, NRHS
+            DO 260 JROW = NRF, NRF + NRP1 - 1
+               J = J + 1
+               RWORK( J ) = REAL( B( JROW, JCOL ) )
+  260       CONTINUE
+  270    CONTINUE
+         CALL SGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU,
+     $               RWORK( 1+NRP1*NRHS*2 ), NRP1, ZERO, RWORK( 1 ),
+     $               NRP1 )
+         J = NRP1*NRHS*2
+         DO 290 JCOL = 1, NRHS
+            DO 280 JROW = NRF, NRF + NRP1 - 1
+               J = J + 1
+               RWORK( J ) = AIMAG( B( JROW, JCOL ) )
+  280       CONTINUE
+  290    CONTINUE
+         CALL SGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU,
+     $               RWORK( 1+NRP1*NRHS*2 ), NRP1, ZERO,
+     $               RWORK( 1+NRP1*NRHS ), NRP1 )
+         JREAL = 0
+         JIMAG = NRP1*NRHS
+         DO 310 JCOL = 1, NRHS
+            DO 300 JROW = NRF, NRF + NRP1 - 1
+               JREAL = JREAL + 1
+               JIMAG = JIMAG + 1
+               BX( JROW, JCOL ) = CMPLX( RWORK( JREAL ),
+     $                            RWORK( JIMAG ) )
+  300       CONTINUE
+  310    CONTINUE
+*
+  320 CONTINUE
+*
+  330 CONTINUE
+*
+      RETURN
+*
+*     End of CLALSA
+*
+      END