octave-nkf: libcruft/lapack/clalsa.f comparison

comparison libcruft/lapack/clalsa.f @ 7789:82be108cc558

First attempt at single precision tyeps * * * corrections to qrupdate single precision routines * * * prefer demotion to single over promotion to double * * * Add single precision support to log2 function * * * Trivial PROJECT file update * * * Cache optimized hermitian/transpose methods * * * Add tests for tranpose/hermitian and ChangeLog entry for new transpose code

author	David Bateman <dbateman@free.fr>
date	Sun, 27 Apr 2008 22:34:17 +0200
parents
children

comparison

equal deleted inserted replaced

-:45f5faba05a2
+:82be108cc558
+SUBROUTINE CLALSA( ICOMPQ, SMLSIZ, N, NRHS, B, LDB, BX, LDBX, U,
+$                   LDU, VT, K, DIFL, DIFR, Z, POLES, GIVPTR,
+$                   GIVCOL, LDGCOL, PERM, GIVNUM, C, S, RWORK,
+$                   IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.1) --
+*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+*     November 2006
+*
+*     .. Scalar Arguments ..
+INTEGER            ICOMPQ, INFO, LDB, LDBX, LDGCOL, LDU, N, NRHS,
+$                   SMLSIZ
+*     ..
+*     .. Array Arguments ..
+INTEGER            GIVCOL( LDGCOL, * ), GIVPTR( * ), IWORK( * ),
+$                   K( * ), PERM( LDGCOL, * )
+REAL               C( * ), DIFL( LDU, * ), DIFR( LDU, * ),
+$                   GIVNUM( LDU, * ), POLES( LDU, * ), RWORK( * ),
+$                   S( * ), U( LDU, * ), VT( LDU, * ), Z( LDU, * )
+COMPLEX            B( LDB, * ), BX( LDBX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLALSA is an itermediate step in solving the least squares problem
+*  by computing the SVD of the coefficient matrix in compact form (The
+*  singular vectors are computed as products of simple orthorgonal
+*  matrices.).
+*
+*  If ICOMPQ = 0, CLALSA applies the inverse of the left singular vector
+*  matrix of an upper bidiagonal matrix to the right hand side; and if
+*  ICOMPQ = 1, CLALSA applies the right singular vector matrix to the
+*  right hand side. The singular vector matrices were generated in
+*  compact form by CLALSA.
+*
+*  Arguments
+*  =========
+*
+*  ICOMPQ (input) INTEGER
+*         Specifies whether the left or the right singular vector
+*         matrix is involved.
+*         = 0: Left singular vector matrix
+*         = 1: Right singular vector matrix
+*
+*  SMLSIZ (input) INTEGER
+*         The maximum size of the subproblems at the bottom of the
+*         computation tree.
+*
+*  N      (input) INTEGER
+*         The row and column dimensions of the upper bidiagonal matrix.
+*
+*  NRHS   (input) INTEGER
+*         The number of columns of B and BX. NRHS must be at least 1.
+*
+*  B      (input/output) COMPLEX array, dimension ( LDB, NRHS )
+*         On input, B contains the right hand sides of the least
+*         squares problem in rows 1 through M.
+*         On output, B contains the solution X in rows 1 through N.
+*
+*  LDB    (input) INTEGER
+*         The leading dimension of B in the calling subprogram.
+*         LDB must be at least max(1,MAX( M, N ) ).
+*
+*  BX     (output) COMPLEX array, dimension ( LDBX, NRHS )
+*         On exit, the result of applying the left or right singular
+*         vector matrix to B.
+*
+*  LDBX   (input) INTEGER
+*         The leading dimension of BX.
+*
+*  U      (input) REAL array, dimension ( LDU, SMLSIZ ).
+*         On entry, U contains the left singular vector matrices of all
+*         subproblems at the bottom level.
+*
+*  LDU    (input) INTEGER, LDU = > N.
+*         The leading dimension of arrays U, VT, DIFL, DIFR,
+*         POLES, GIVNUM, and Z.
+*
+*  VT     (input) REAL array, dimension ( LDU, SMLSIZ+1 ).
+*         On entry, VT' contains the right singular vector matrices of
+*         all subproblems at the bottom level.
+*
+*  K      (input) INTEGER array, dimension ( N ).
+*
+*  DIFL   (input) REAL array, dimension ( LDU, NLVL ).
+*         where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1.
+*
+*  DIFR   (input) REAL array, dimension ( LDU, 2 * NLVL ).
+*         On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record
+*         distances between singular values on the I-th level and
+*         singular values on the (I -1)-th level, and DIFR(*, 2 * I)
+*         record the normalizing factors of the right singular vectors
+*         matrices of subproblems on I-th level.
+*
+*  Z      (input) REAL array, dimension ( LDU, NLVL ).
+*         On entry, Z(1, I) contains the components of the deflation-
+*         adjusted updating row vector for subproblems on the I-th
+*         level.
+*
+*  POLES  (input) REAL array, dimension ( LDU, 2 * NLVL ).
+*         On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old
+*         singular values involved in the secular equations on the I-th
+*         level.
+*
+*  GIVPTR (input) INTEGER array, dimension ( N ).
+*         On entry, GIVPTR( I ) records the number of Givens
+*         rotations performed on the I-th problem on the computation
+*         tree.
+*
+*  GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 * NLVL ).
+*         On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the
+*         locations of Givens rotations performed on the I-th level on
+*         the computation tree.
+*
+*  LDGCOL (input) INTEGER, LDGCOL = > N.
+*         The leading dimension of arrays GIVCOL and PERM.
+*
+*  PERM   (input) INTEGER array, dimension ( LDGCOL, NLVL ).
+*         On entry, PERM(*, I) records permutations done on the I-th
+*         level of the computation tree.
+*
+*  GIVNUM (input) REAL array, dimension ( LDU, 2 * NLVL ).
+*         On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S-
+*         values of Givens rotations performed on the I-th level on the
+*         computation tree.
+*
+*  C      (input) REAL array, dimension ( N ).
+*         On entry, if the I-th subproblem is not square,
+*         C( I ) contains the C-value of a Givens rotation related to
+*         the right null space of the I-th subproblem.
+*
+*  S      (input) REAL array, dimension ( N ).
+*         On entry, if the I-th subproblem is not square,
+*         S( I ) contains the S-value of a Givens rotation related to
+*         the right null space of the I-th subproblem.
+*
+*  RWORK  (workspace) REAL array, dimension at least
+*         max ( N, (SMLSZ+1)*NRHS*3 ).
+*
+*  IWORK  (workspace) INTEGER array.
+*         The dimension must be at least 3 * N
+*
+*  INFO   (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ming Gu and Ren-Cang Li, Computer Science Division, University of
+*       California at Berkeley, USA
+*     Osni Marques, LBNL/NERSC, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+REAL               ZERO, ONE
+PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
+*     ..
+*     .. Local Scalars ..
+INTEGER            I, I1, IC, IM1, INODE, J, JCOL, JIMAG, JREAL,
+$                   JROW, LF, LL, LVL, LVL2, ND, NDB1, NDIML,
+$                   NDIMR, NL, NLF, NLP1, NLVL, NR, NRF, NRP1, SQRE
+*     ..
+*     .. External Subroutines ..
+EXTERNAL           CCOPY, CLALS0, SGEMM, SLASDT, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+INTRINSIC          AIMAG, CMPLX, REAL
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+INFO = 0
+*
+IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN
+INFO = -1
+ELSE IF( SMLSIZ.LT.3 ) THEN
+INFO = -2
+ELSE IF( N.LT.SMLSIZ ) THEN
+INFO = -3
+ELSE IF( NRHS.LT.1 ) THEN
+INFO = -4
+ELSE IF( LDB.LT.N ) THEN
+INFO = -6
+ELSE IF( LDBX.LT.N ) THEN
+INFO = -8
+ELSE IF( LDU.LT.N ) THEN
+INFO = -10
+ELSE IF( LDGCOL.LT.N ) THEN
+INFO = -19
+END IF
+IF( INFO.NE.0 ) THEN
+CALL XERBLA( 'CLALSA', -INFO )
+RETURN
+END IF
+*
+*     Book-keeping and  setting up the computation tree.
+*
+INODE = 1
+NDIML = INODE + N
+NDIMR = NDIML + N
+*
+CALL SLASDT( N, NLVL, ND, IWORK( INODE ), IWORK( NDIML ),
+$             IWORK( NDIMR ), SMLSIZ )
+*
+*     The following code applies back the left singular vector factors.
+*     For applying back the right singular vector factors, go to 170.
+*
+IF( ICOMPQ.EQ.1 ) THEN
+GO TO 170
+END IF
+*
+*     The nodes on the bottom level of the tree were solved
+*     by SLASDQ. The corresponding left and right singular vector
+*     matrices are in explicit form. First apply back the left
+*     singular vector matrices.
+*
+NDB1 = ( ND+1 ) / 2
+DO 130 I = NDB1, ND
+*
+*        IC : center row of each node
+*        NL : number of rows of left  subproblem
+*        NR : number of rows of right subproblem
+*        NLF: starting row of the left   subproblem
+*        NRF: starting row of the right  subproblem
+*
+I1 = I - 1
+IC = IWORK( INODE+I1 )
+NL = IWORK( NDIML+I1 )
+NR = IWORK( NDIMR+I1 )
+NLF = IC - NL
+NRF = IC + 1
+*
+*        Since B and BX are complex, the following call to SGEMM
+*        is performed in two steps (real and imaginary parts).
+*
+*        CALL SGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU,
+*     $               B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX )
+*
+J = NL*NRHS*2
+DO 20 JCOL = 1, NRHS
+DO 10 JROW = NLF, NLF + NL - 1
+J = J + 1
+RWORK( J ) = REAL( B( JROW, JCOL ) )
+10       CONTINUE
+20    CONTINUE
+CALL SGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU,
+$               RWORK( 1+NL*NRHS*2 ), NL, ZERO, RWORK( 1 ), NL )
+J = NL*NRHS*2
+DO 40 JCOL = 1, NRHS
+DO 30 JROW = NLF, NLF + NL - 1
+J = J + 1
+RWORK( J ) = AIMAG( B( JROW, JCOL ) )
+30       CONTINUE
+40    CONTINUE
+CALL SGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU,
+$               RWORK( 1+NL*NRHS*2 ), NL, ZERO, RWORK( 1+NL*NRHS ),
+$               NL )
+JREAL = 0
+JIMAG = NL*NRHS
+DO 60 JCOL = 1, NRHS
+DO 50 JROW = NLF, NLF + NL - 1
+JREAL = JREAL + 1
+JIMAG = JIMAG + 1
+BX( JROW, JCOL ) = CMPLX( RWORK( JREAL ),
+$                            RWORK( JIMAG ) )
+50       CONTINUE
+60    CONTINUE
+*
+*        Since B and BX are complex, the following call to SGEMM
+*        is performed in two steps (real and imaginary parts).
+*
+*        CALL SGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU,
+*    $               B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX )
+*
+J = NR*NRHS*2
+DO 80 JCOL = 1, NRHS
+DO 70 JROW = NRF, NRF + NR - 1
+J = J + 1
+RWORK( J ) = REAL( B( JROW, JCOL ) )
+70       CONTINUE
+80    CONTINUE
+CALL SGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU,
+$               RWORK( 1+NR*NRHS*2 ), NR, ZERO, RWORK( 1 ), NR )
+J = NR*NRHS*2
+DO 100 JCOL = 1, NRHS
+DO 90 JROW = NRF, NRF + NR - 1
+J = J + 1
+RWORK( J ) = AIMAG( B( JROW, JCOL ) )
+90       CONTINUE
+100    CONTINUE
+CALL SGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU,
+$               RWORK( 1+NR*NRHS*2 ), NR, ZERO, RWORK( 1+NR*NRHS ),
+$               NR )
+JREAL = 0
+JIMAG = NR*NRHS
+DO 120 JCOL = 1, NRHS
+DO 110 JROW = NRF, NRF + NR - 1
+JREAL = JREAL + 1
+JIMAG = JIMAG + 1
+BX( JROW, JCOL ) = CMPLX( RWORK( JREAL ),
+$                            RWORK( JIMAG ) )
+110       CONTINUE
+120    CONTINUE
+*
+130 CONTINUE
+*
+*     Next copy the rows of B that correspond to unchanged rows
+*     in the bidiagonal matrix to BX.
+*
+DO 140 I = 1, ND
+IC = IWORK( INODE+I-1 )
+CALL CCOPY( NRHS, B( IC, 1 ), LDB, BX( IC, 1 ), LDBX )
+140 CONTINUE
+*
+*     Finally go through the left singular vector matrices of all
+*     the other subproblems bottom-up on the tree.
+*
+J = 2**NLVL
+SQRE = 0
+*
+DO 160 LVL = NLVL, 1, -1
+LVL2 = 2*LVL - 1
+*
+*        find the first node LF and last node LL on
+*        the current level LVL
+*
+IF( LVL.EQ.1 ) THEN
+LF = 1
+LL = 1
+ELSE
+LF = 2**( LVL-1 )
+LL = 2*LF - 1
+END IF
+DO 150 I = LF, LL
+IM1 = I - 1
+IC = IWORK( INODE+IM1 )
+NL = IWORK( NDIML+IM1 )
+NR = IWORK( NDIMR+IM1 )
+NLF = IC - NL
+NRF = IC + 1
+J = J - 1
+CALL CLALS0( ICOMPQ, NL, NR, SQRE, NRHS, BX( NLF, 1 ), LDBX,
+$                   B( NLF, 1 ), LDB, PERM( NLF, LVL ),
+$                   GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL,
+$                   GIVNUM( NLF, LVL2 ), LDU, POLES( NLF, LVL2 ),
+$                   DIFL( NLF, LVL ), DIFR( NLF, LVL2 ),
+$                   Z( NLF, LVL ), K( J ), C( J ), S( J ), RWORK,
+$                   INFO )
+150    CONTINUE
+160 CONTINUE
+GO TO 330
+*
+*     ICOMPQ = 1: applying back the right singular vector factors.
+*
+170 CONTINUE
+*
+*     First now go through the right singular vector matrices of all
+*     the tree nodes top-down.
+*
+J = 0
+DO 190 LVL = 1, NLVL
+LVL2 = 2*LVL - 1
+*
+*        Find the first node LF and last node LL on
+*        the current level LVL.
+*
+IF( LVL.EQ.1 ) THEN
+LF = 1
+LL = 1
+ELSE
+LF = 2**( LVL-1 )
+LL = 2*LF - 1
+END IF
+DO 180 I = LL, LF, -1
+IM1 = I - 1
+IC = IWORK( INODE+IM1 )
+NL = IWORK( NDIML+IM1 )
+NR = IWORK( NDIMR+IM1 )
+NLF = IC - NL
+NRF = IC + 1
+IF( I.EQ.LL ) THEN
+SQRE = 0
+ELSE
+SQRE = 1
+END IF
+J = J + 1
+CALL CLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B( NLF, 1 ), LDB,
+$                   BX( NLF, 1 ), LDBX, PERM( NLF, LVL ),
+$                   GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL,
+$                   GIVNUM( NLF, LVL2 ), LDU, POLES( NLF, LVL2 ),
+$                   DIFL( NLF, LVL ), DIFR( NLF, LVL2 ),
+$                   Z( NLF, LVL ), K( J ), C( J ), S( J ), RWORK,
+$                   INFO )
+180    CONTINUE
+190 CONTINUE
+*
+*     The nodes on the bottom level of the tree were solved
+*     by SLASDQ. The corresponding right singular vector
+*     matrices are in explicit form. Apply them back.
+*
+NDB1 = ( ND+1 ) / 2
+DO 320 I = NDB1, ND
+I1 = I - 1
+IC = IWORK( INODE+I1 )
+NL = IWORK( NDIML+I1 )
+NR = IWORK( NDIMR+I1 )
+NLP1 = NL + 1
+IF( I.EQ.ND ) THEN
+NRP1 = NR
+ELSE
+NRP1 = NR + 1
+END IF
+NLF = IC - NL
+NRF = IC + 1
+*
+*        Since B and BX are complex, the following call to SGEMM is
+*        performed in two steps (real and imaginary parts).
+*
+*        CALL SGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU,
+*    $               B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX )
+*
+J = NLP1*NRHS*2
+DO 210 JCOL = 1, NRHS
+DO 200 JROW = NLF, NLF + NLP1 - 1
+J = J + 1
+RWORK( J ) = REAL( B( JROW, JCOL ) )
+200       CONTINUE
+210    CONTINUE
+CALL SGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU,
+$               RWORK( 1+NLP1*NRHS*2 ), NLP1, ZERO, RWORK( 1 ),
+$               NLP1 )
+J = NLP1*NRHS*2
+DO 230 JCOL = 1, NRHS
+DO 220 JROW = NLF, NLF + NLP1 - 1
+J = J + 1
+RWORK( J ) = AIMAG( B( JROW, JCOL ) )
+220       CONTINUE
+230    CONTINUE
+CALL SGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU,
+$               RWORK( 1+NLP1*NRHS*2 ), NLP1, ZERO,
+$               RWORK( 1+NLP1*NRHS ), NLP1 )
+JREAL = 0
+JIMAG = NLP1*NRHS
+DO 250 JCOL = 1, NRHS
+DO 240 JROW = NLF, NLF + NLP1 - 1
+JREAL = JREAL + 1
+JIMAG = JIMAG + 1
+BX( JROW, JCOL ) = CMPLX( RWORK( JREAL ),
+$                            RWORK( JIMAG ) )
+240       CONTINUE
+250    CONTINUE
+*
+*        Since B and BX are complex, the following call to SGEMM is
+*        performed in two steps (real and imaginary parts).
+*
+*        CALL SGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU,
+*    $               B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX )
+*
+J = NRP1*NRHS*2
+DO 270 JCOL = 1, NRHS
+DO 260 JROW = NRF, NRF + NRP1 - 1
+J = J + 1
+RWORK( J ) = REAL( B( JROW, JCOL ) )
+260       CONTINUE
+270    CONTINUE
+CALL SGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU,
+$               RWORK( 1+NRP1*NRHS*2 ), NRP1, ZERO, RWORK( 1 ),
+$               NRP1 )
+J = NRP1*NRHS*2
+DO 290 JCOL = 1, NRHS
+DO 280 JROW = NRF, NRF + NRP1 - 1
+J = J + 1
+RWORK( J ) = AIMAG( B( JROW, JCOL ) )
+280       CONTINUE
+290    CONTINUE
+CALL SGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU,
+$               RWORK( 1+NRP1*NRHS*2 ), NRP1, ZERO,
+$               RWORK( 1+NRP1*NRHS ), NRP1 )
+JREAL = 0
+JIMAG = NRP1*NRHS
+DO 310 JCOL = 1, NRHS
+DO 300 JROW = NRF, NRF + NRP1 - 1
+JREAL = JREAL + 1
+JIMAG = JIMAG + 1
+BX( JROW, JCOL ) = CMPLX( RWORK( JREAL ),
+$                            RWORK( JIMAG ) )
+300       CONTINUE
+310    CONTINUE
+*
+320 CONTINUE
+*
+330 CONTINUE
+*
+RETURN
+*
+*     End of CLALSA
+*
+END

Mercurial > octave-nkf

comparison libcruft/lapack/clalsa.f @ 7789:82be108cc558