comparison libcruft/lapack/cgeqp3.f @ 7789:82be108cc558

First attempt at single precision tyeps * * * corrections to qrupdate single precision routines * * * prefer demotion to single over promotion to double * * * Add single precision support to log2 function * * * Trivial PROJECT file update * * * Cache optimized hermitian/transpose methods * * * Add tests for tranpose/hermitian and ChangeLog entry for new transpose code
author David Bateman <dbateman@free.fr>
date Sun, 27 Apr 2008 22:34:17 +0200
parents
children
comparison
equal deleted inserted replaced
7788:45f5faba05a2 7789:82be108cc558
1 SUBROUTINE CGEQP3( M, N, A, LDA, JPVT, TAU, WORK, LWORK, RWORK,
2 $ INFO )
3 *
4 * -- LAPACK routine (version 3.1) --
5 * Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
6 * November 2006
7 *
8 * .. Scalar Arguments ..
9 INTEGER INFO, LDA, LWORK, M, N
10 * ..
11 * .. Array Arguments ..
12 INTEGER JPVT( * )
13 REAL RWORK( * )
14 COMPLEX A( LDA, * ), TAU( * ), WORK( * )
15 * ..
16 *
17 * Purpose
18 * =======
19 *
20 * CGEQP3 computes a QR factorization with column pivoting of a
21 * matrix A: A*P = Q*R using Level 3 BLAS.
22 *
23 * Arguments
24 * =========
25 *
26 * M (input) INTEGER
27 * The number of rows of the matrix A. M >= 0.
28 *
29 * N (input) INTEGER
30 * The number of columns of the matrix A. N >= 0.
31 *
32 * A (input/output) COMPLEX array, dimension (LDA,N)
33 * On entry, the M-by-N matrix A.
34 * On exit, the upper triangle of the array contains the
35 * min(M,N)-by-N upper trapezoidal matrix R; the elements below
36 * the diagonal, together with the array TAU, represent the
37 * unitary matrix Q as a product of min(M,N) elementary
38 * reflectors.
39 *
40 * LDA (input) INTEGER
41 * The leading dimension of the array A. LDA >= max(1,M).
42 *
43 * JPVT (input/output) INTEGER array, dimension (N)
44 * On entry, if JPVT(J).ne.0, the J-th column of A is permuted
45 * to the front of A*P (a leading column); if JPVT(J)=0,
46 * the J-th column of A is a free column.
47 * On exit, if JPVT(J)=K, then the J-th column of A*P was the
48 * the K-th column of A.
49 *
50 * TAU (output) COMPLEX array, dimension (min(M,N))
51 * The scalar factors of the elementary reflectors.
52 *
53 * WORK (workspace/output) COMPLEX array, dimension (MAX(1,LWORK))
54 * On exit, if INFO=0, WORK(1) returns the optimal LWORK.
55 *
56 * LWORK (input) INTEGER
57 * The dimension of the array WORK. LWORK >= N+1.
58 * For optimal performance LWORK >= ( N+1 )*NB, where NB
59 * is the optimal blocksize.
60 *
61 * If LWORK = -1, then a workspace query is assumed; the routine
62 * only calculates the optimal size of the WORK array, returns
63 * this value as the first entry of the WORK array, and no error
64 * message related to LWORK is issued by XERBLA.
65 *
66 * RWORK (workspace) REAL array, dimension (2*N)
67 *
68 * INFO (output) INTEGER
69 * = 0: successful exit.
70 * < 0: if INFO = -i, the i-th argument had an illegal value.
71 *
72 * Further Details
73 * ===============
74 *
75 * The matrix Q is represented as a product of elementary reflectors
76 *
77 * Q = H(1) H(2) . . . H(k), where k = min(m,n).
78 *
79 * Each H(i) has the form
80 *
81 * H(i) = I - tau * v * v'
82 *
83 * where tau is a real/complex scalar, and v is a real/complex vector
84 * with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in
85 * A(i+1:m,i), and tau in TAU(i).
86 *
87 * Based on contributions by
88 * G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain
89 * X. Sun, Computer Science Dept., Duke University, USA
90 *
91 * =====================================================================
92 *
93 * .. Parameters ..
94 INTEGER INB, INBMIN, IXOVER
95 PARAMETER ( INB = 1, INBMIN = 2, IXOVER = 3 )
96 * ..
97 * .. Local Scalars ..
98 LOGICAL LQUERY
99 INTEGER FJB, IWS, J, JB, LWKOPT, MINMN, MINWS, NA, NB,
100 $ NBMIN, NFXD, NX, SM, SMINMN, SN, TOPBMN
101 * ..
102 * .. External Subroutines ..
103 EXTERNAL CGEQRF, CLAQP2, CLAQPS, CSWAP, CUNMQR, XERBLA
104 * ..
105 * .. External Functions ..
106 INTEGER ILAENV
107 REAL SCNRM2
108 EXTERNAL ILAENV, SCNRM2
109 * ..
110 * .. Intrinsic Functions ..
111 INTRINSIC INT, MAX, MIN
112 * ..
113 * .. Executable Statements ..
114 *
115 * Test input arguments
116 * ====================
117 *
118 INFO = 0
119 LQUERY = ( LWORK.EQ.-1 )
120 IF( M.LT.0 ) THEN
121 INFO = -1
122 ELSE IF( N.LT.0 ) THEN
123 INFO = -2
124 ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
125 INFO = -4
126 END IF
127 *
128 IF( INFO.EQ.0 ) THEN
129 MINMN = MIN( M, N )
130 IF( MINMN.EQ.0 ) THEN
131 IWS = 1
132 LWKOPT = 1
133 ELSE
134 IWS = N + 1
135 NB = ILAENV( INB, 'CGEQRF', ' ', M, N, -1, -1 )
136 LWKOPT = ( N + 1 )*NB
137 END IF
138 WORK( 1 ) = LWKOPT
139 *
140 IF( ( LWORK.LT.IWS ) .AND. .NOT.LQUERY ) THEN
141 INFO = -8
142 END IF
143 END IF
144 *
145 IF( INFO.NE.0 ) THEN
146 CALL XERBLA( 'CGEQP3', -INFO )
147 RETURN
148 ELSE IF( LQUERY ) THEN
149 RETURN
150 END IF
151 *
152 * Quick return if possible.
153 *
154 IF( MINMN.EQ.0 ) THEN
155 RETURN
156 END IF
157 *
158 * Move initial columns up front.
159 *
160 NFXD = 1
161 DO 10 J = 1, N
162 IF( JPVT( J ).NE.0 ) THEN
163 IF( J.NE.NFXD ) THEN
164 CALL CSWAP( M, A( 1, J ), 1, A( 1, NFXD ), 1 )
165 JPVT( J ) = JPVT( NFXD )
166 JPVT( NFXD ) = J
167 ELSE
168 JPVT( J ) = J
169 END IF
170 NFXD = NFXD + 1
171 ELSE
172 JPVT( J ) = J
173 END IF
174 10 CONTINUE
175 NFXD = NFXD - 1
176 *
177 * Factorize fixed columns
178 * =======================
179 *
180 * Compute the QR factorization of fixed columns and update
181 * remaining columns.
182 *
183 IF( NFXD.GT.0 ) THEN
184 NA = MIN( M, NFXD )
185 *CC CALL CGEQR2( M, NA, A, LDA, TAU, WORK, INFO )
186 CALL CGEQRF( M, NA, A, LDA, TAU, WORK, LWORK, INFO )
187 IWS = MAX( IWS, INT( WORK( 1 ) ) )
188 IF( NA.LT.N ) THEN
189 *CC CALL CUNM2R( 'Left', 'Conjugate Transpose', M, N-NA,
190 *CC $ NA, A, LDA, TAU, A( 1, NA+1 ), LDA, WORK,
191 *CC $ INFO )
192 CALL CUNMQR( 'Left', 'Conjugate Transpose', M, N-NA, NA, A,
193 $ LDA, TAU, A( 1, NA+1 ), LDA, WORK, LWORK,
194 $ INFO )
195 IWS = MAX( IWS, INT( WORK( 1 ) ) )
196 END IF
197 END IF
198 *
199 * Factorize free columns
200 * ======================
201 *
202 IF( NFXD.LT.MINMN ) THEN
203 *
204 SM = M - NFXD
205 SN = N - NFXD
206 SMINMN = MINMN - NFXD
207 *
208 * Determine the block size.
209 *
210 NB = ILAENV( INB, 'CGEQRF', ' ', SM, SN, -1, -1 )
211 NBMIN = 2
212 NX = 0
213 *
214 IF( ( NB.GT.1 ) .AND. ( NB.LT.SMINMN ) ) THEN
215 *
216 * Determine when to cross over from blocked to unblocked code.
217 *
218 NX = MAX( 0, ILAENV( IXOVER, 'CGEQRF', ' ', SM, SN, -1,
219 $ -1 ) )
220 *
221 *
222 IF( NX.LT.SMINMN ) THEN
223 *
224 * Determine if workspace is large enough for blocked code.
225 *
226 MINWS = ( SN+1 )*NB
227 IWS = MAX( IWS, MINWS )
228 IF( LWORK.LT.MINWS ) THEN
229 *
230 * Not enough workspace to use optimal NB: Reduce NB and
231 * determine the minimum value of NB.
232 *
233 NB = LWORK / ( SN+1 )
234 NBMIN = MAX( 2, ILAENV( INBMIN, 'CGEQRF', ' ', SM, SN,
235 $ -1, -1 ) )
236 *
237 *
238 END IF
239 END IF
240 END IF
241 *
242 * Initialize partial column norms. The first N elements of work
243 * store the exact column norms.
244 *
245 DO 20 J = NFXD + 1, N
246 RWORK( J ) = SCNRM2( SM, A( NFXD+1, J ), 1 )
247 RWORK( N+J ) = RWORK( J )
248 20 CONTINUE
249 *
250 IF( ( NB.GE.NBMIN ) .AND. ( NB.LT.SMINMN ) .AND.
251 $ ( NX.LT.SMINMN ) ) THEN
252 *
253 * Use blocked code initially.
254 *
255 J = NFXD + 1
256 *
257 * Compute factorization: while loop.
258 *
259 *
260 TOPBMN = MINMN - NX
261 30 CONTINUE
262 IF( J.LE.TOPBMN ) THEN
263 JB = MIN( NB, TOPBMN-J+1 )
264 *
265 * Factorize JB columns among columns J:N.
266 *
267 CALL CLAQPS( M, N-J+1, J-1, JB, FJB, A( 1, J ), LDA,
268 $ JPVT( J ), TAU( J ), RWORK( J ),
269 $ RWORK( N+J ), WORK( 1 ), WORK( JB+1 ),
270 $ N-J+1 )
271 *
272 J = J + FJB
273 GO TO 30
274 END IF
275 ELSE
276 J = NFXD + 1
277 END IF
278 *
279 * Use unblocked code to factor the last or only block.
280 *
281 *
282 IF( J.LE.MINMN )
283 $ CALL CLAQP2( M, N-J+1, J-1, A( 1, J ), LDA, JPVT( J ),
284 $ TAU( J ), RWORK( J ), RWORK( N+J ), WORK( 1 ) )
285 *
286 END IF
287 *
288 WORK( 1 ) = IWS
289 RETURN
290 *
291 * End of CGEQP3
292 *
293 END