diff libcruft/lapack/cgeqp3.f @ 7789:82be108cc558

First attempt at single precision tyeps * * * corrections to qrupdate single precision routines * * * prefer demotion to single over promotion to double * * * Add single precision support to log2 function * * * Trivial PROJECT file update * * * Cache optimized hermitian/transpose methods * * * Add tests for tranpose/hermitian and ChangeLog entry for new transpose code
author David Bateman <dbateman@free.fr>
date Sun, 27 Apr 2008 22:34:17 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libcruft/lapack/cgeqp3.f	Sun Apr 27 22:34:17 2008 +0200
@@ -0,0 +1,293 @@
+      SUBROUTINE CGEQP3( M, N, A, LDA, JPVT, TAU, WORK, LWORK, RWORK,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.1) --
+*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+*     November 2006
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            JPVT( * )
+      REAL               RWORK( * )
+      COMPLEX            A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGEQP3 computes a QR factorization with column pivoting of a
+*  matrix A:  A*P = Q*R  using Level 3 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, the upper triangle of the array contains the
+*          min(M,N)-by-N upper trapezoidal matrix R; the elements below
+*          the diagonal, together with the array TAU, represent the
+*          unitary matrix Q as a product of min(M,N) elementary
+*          reflectors.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,M).
+*
+*  JPVT    (input/output) INTEGER array, dimension (N)
+*          On entry, if JPVT(J).ne.0, the J-th column of A is permuted
+*          to the front of A*P (a leading column); if JPVT(J)=0,
+*          the J-th column of A is a free column.
+*          On exit, if JPVT(J)=K, then the J-th column of A*P was the
+*          the K-th column of A.
+*
+*  TAU     (output) COMPLEX array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors.
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (MAX(1,LWORK))
+*          On exit, if INFO=0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= N+1.
+*          For optimal performance LWORK >= ( N+1 )*NB, where NB
+*          is the optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace) REAL array, dimension (2*N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit.
+*          < 0: if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(1) H(2) . . . H(k), where k = min(m,n).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a real/complex scalar, and v is a real/complex vector
+*  with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in
+*  A(i+1:m,i), and tau in TAU(i).
+*
+*  Based on contributions by
+*    G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain
+*    X. Sun, Computer Science Dept., Duke University, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            INB, INBMIN, IXOVER
+      PARAMETER          ( INB = 1, INBMIN = 2, IXOVER = 3 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            FJB, IWS, J, JB, LWKOPT, MINMN, MINWS, NA, NB,
+     $                   NBMIN, NFXD, NX, SM, SMINMN, SN, TOPBMN
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGEQRF, CLAQP2, CLAQPS, CSWAP, CUNMQR, XERBLA
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      REAL               SCNRM2
+      EXTERNAL           ILAENV, SCNRM2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          INT, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test input arguments
+*     ====================
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         MINMN = MIN( M, N )
+         IF( MINMN.EQ.0 ) THEN
+            IWS = 1
+            LWKOPT = 1
+         ELSE
+            IWS = N + 1
+            NB = ILAENV( INB, 'CGEQRF', ' ', M, N, -1, -1 )
+            LWKOPT = ( N + 1 )*NB
+         END IF
+         WORK( 1 ) = LWKOPT
+*
+         IF( ( LWORK.LT.IWS ) .AND. .NOT.LQUERY ) THEN
+            INFO = -8
+         END IF
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGEQP3', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( MINMN.EQ.0 ) THEN
+         RETURN
+      END IF
+*
+*     Move initial columns up front.
+*
+      NFXD = 1
+      DO 10 J = 1, N
+         IF( JPVT( J ).NE.0 ) THEN
+            IF( J.NE.NFXD ) THEN
+               CALL CSWAP( M, A( 1, J ), 1, A( 1, NFXD ), 1 )
+               JPVT( J ) = JPVT( NFXD )
+               JPVT( NFXD ) = J
+            ELSE
+               JPVT( J ) = J
+            END IF
+            NFXD = NFXD + 1
+         ELSE
+            JPVT( J ) = J
+         END IF
+   10 CONTINUE
+      NFXD = NFXD - 1
+*
+*     Factorize fixed columns
+*     =======================
+*
+*     Compute the QR factorization of fixed columns and update
+*     remaining columns.
+*
+      IF( NFXD.GT.0 ) THEN
+         NA = MIN( M, NFXD )
+*CC      CALL CGEQR2( M, NA, A, LDA, TAU, WORK, INFO )
+         CALL CGEQRF( M, NA, A, LDA, TAU, WORK, LWORK, INFO )
+         IWS = MAX( IWS, INT( WORK( 1 ) ) )
+         IF( NA.LT.N ) THEN
+*CC         CALL CUNM2R( 'Left', 'Conjugate Transpose', M, N-NA,
+*CC  $                   NA, A, LDA, TAU, A( 1, NA+1 ), LDA, WORK,
+*CC  $                   INFO )
+            CALL CUNMQR( 'Left', 'Conjugate Transpose', M, N-NA, NA, A,
+     $                   LDA, TAU, A( 1, NA+1 ), LDA, WORK, LWORK,
+     $                   INFO )
+            IWS = MAX( IWS, INT( WORK( 1 ) ) )
+         END IF
+      END IF
+*
+*     Factorize free columns
+*     ======================
+*
+      IF( NFXD.LT.MINMN ) THEN
+*
+         SM = M - NFXD
+         SN = N - NFXD
+         SMINMN = MINMN - NFXD
+*
+*        Determine the block size.
+*
+         NB = ILAENV( INB, 'CGEQRF', ' ', SM, SN, -1, -1 )
+         NBMIN = 2
+         NX = 0
+*
+         IF( ( NB.GT.1 ) .AND. ( NB.LT.SMINMN ) ) THEN
+*
+*           Determine when to cross over from blocked to unblocked code.
+*
+            NX = MAX( 0, ILAENV( IXOVER, 'CGEQRF', ' ', SM, SN, -1,
+     $           -1 ) )
+*
+*
+            IF( NX.LT.SMINMN ) THEN
+*
+*              Determine if workspace is large enough for blocked code.
+*
+               MINWS = ( SN+1 )*NB
+               IWS = MAX( IWS, MINWS )
+               IF( LWORK.LT.MINWS ) THEN
+*
+*                 Not enough workspace to use optimal NB: Reduce NB and
+*                 determine the minimum value of NB.
+*
+                  NB = LWORK / ( SN+1 )
+                  NBMIN = MAX( 2, ILAENV( INBMIN, 'CGEQRF', ' ', SM, SN,
+     $                    -1, -1 ) )
+*
+*
+               END IF
+            END IF
+         END IF
+*
+*        Initialize partial column norms. The first N elements of work
+*        store the exact column norms.
+*
+         DO 20 J = NFXD + 1, N
+            RWORK( J ) = SCNRM2( SM, A( NFXD+1, J ), 1 )
+            RWORK( N+J ) = RWORK( J )
+   20    CONTINUE
+*
+         IF( ( NB.GE.NBMIN ) .AND. ( NB.LT.SMINMN ) .AND.
+     $       ( NX.LT.SMINMN ) ) THEN
+*
+*           Use blocked code initially.
+*
+            J = NFXD + 1
+*
+*           Compute factorization: while loop.
+*
+*
+            TOPBMN = MINMN - NX
+   30       CONTINUE
+            IF( J.LE.TOPBMN ) THEN
+               JB = MIN( NB, TOPBMN-J+1 )
+*
+*              Factorize JB columns among columns J:N.
+*
+               CALL CLAQPS( M, N-J+1, J-1, JB, FJB, A( 1, J ), LDA,
+     $                      JPVT( J ), TAU( J ), RWORK( J ),
+     $                      RWORK( N+J ), WORK( 1 ), WORK( JB+1 ),
+     $                      N-J+1 )
+*
+               J = J + FJB
+               GO TO 30
+            END IF
+         ELSE
+            J = NFXD + 1
+         END IF
+*
+*        Use unblocked code to factor the last or only block.
+*
+*
+         IF( J.LE.MINMN )
+     $      CALL CLAQP2( M, N-J+1, J-1, A( 1, J ), LDA, JPVT( J ),
+     $                   TAU( J ), RWORK( J ), RWORK( N+J ), WORK( 1 ) )
+*
+      END IF
+*
+      WORK( 1 ) = IWS
+      RETURN
+*
+*     End of CGEQP3
+*
+      END