2329
|
1 SUBROUTINE DGELSS( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK, |
|
2 $ WORK, LWORK, INFO ) |
|
3 * |
3333
|
4 * -- LAPACK driver routine (version 3.0) -- |
2329
|
5 * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., |
|
6 * Courant Institute, Argonne National Lab, and Rice University |
3596
|
7 * October 31, 1999 |
2329
|
8 * |
|
9 * .. Scalar Arguments .. |
|
10 INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK |
|
11 DOUBLE PRECISION RCOND |
|
12 * .. |
|
13 * .. Array Arguments .. |
|
14 DOUBLE PRECISION A( LDA, * ), B( LDB, * ), S( * ), WORK( * ) |
|
15 * .. |
|
16 * |
|
17 * Purpose |
|
18 * ======= |
|
19 * |
|
20 * DGELSS computes the minimum norm solution to a real linear least |
|
21 * squares problem: |
|
22 * |
|
23 * Minimize 2-norm(| b - A*x |). |
|
24 * |
|
25 * using the singular value decomposition (SVD) of A. A is an M-by-N |
|
26 * matrix which may be rank-deficient. |
|
27 * |
|
28 * Several right hand side vectors b and solution vectors x can be |
|
29 * handled in a single call; they are stored as the columns of the |
|
30 * M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix |
|
31 * X. |
|
32 * |
|
33 * The effective rank of A is determined by treating as zero those |
|
34 * singular values which are less than RCOND times the largest singular |
|
35 * value. |
|
36 * |
|
37 * Arguments |
|
38 * ========= |
|
39 * |
|
40 * M (input) INTEGER |
|
41 * The number of rows of the matrix A. M >= 0. |
|
42 * |
|
43 * N (input) INTEGER |
|
44 * The number of columns of the matrix A. N >= 0. |
|
45 * |
|
46 * NRHS (input) INTEGER |
|
47 * The number of right hand sides, i.e., the number of columns |
|
48 * of the matrices B and X. NRHS >= 0. |
|
49 * |
|
50 * A (input/output) DOUBLE PRECISION array, dimension (LDA,N) |
|
51 * On entry, the M-by-N matrix A. |
|
52 * On exit, the first min(m,n) rows of A are overwritten with |
|
53 * its right singular vectors, stored rowwise. |
|
54 * |
|
55 * LDA (input) INTEGER |
|
56 * The leading dimension of the array A. LDA >= max(1,M). |
|
57 * |
|
58 * B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) |
|
59 * On entry, the M-by-NRHS right hand side matrix B. |
|
60 * On exit, B is overwritten by the N-by-NRHS solution |
|
61 * matrix X. If m >= n and RANK = n, the residual |
|
62 * sum-of-squares for the solution in the i-th column is given |
|
63 * by the sum of squares of elements n+1:m in that column. |
|
64 * |
|
65 * LDB (input) INTEGER |
|
66 * The leading dimension of the array B. LDB >= max(1,max(M,N)). |
|
67 * |
|
68 * S (output) DOUBLE PRECISION array, dimension (min(M,N)) |
|
69 * The singular values of A in decreasing order. |
|
70 * The condition number of A in the 2-norm = S(1)/S(min(m,n)). |
|
71 * |
|
72 * RCOND (input) DOUBLE PRECISION |
|
73 * RCOND is used to determine the effective rank of A. |
|
74 * Singular values S(i) <= RCOND*S(1) are treated as zero. |
|
75 * If RCOND < 0, machine precision is used instead. |
|
76 * |
|
77 * RANK (output) INTEGER |
|
78 * The effective rank of A, i.e., the number of singular values |
|
79 * which are greater than RCOND*S(1). |
|
80 * |
|
81 * WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) |
|
82 * On exit, if INFO = 0, WORK(1) returns the optimal LWORK. |
|
83 * |
|
84 * LWORK (input) INTEGER |
|
85 * The dimension of the array WORK. LWORK >= 1, and also: |
|
86 * LWORK >= 3*min(M,N) + max( 2*min(M,N), max(M,N), NRHS ) |
|
87 * For good performance, LWORK should generally be larger. |
|
88 * |
3333
|
89 * If LWORK = -1, then a workspace query is assumed; the routine |
|
90 * only calculates the optimal size of the WORK array, returns |
|
91 * this value as the first entry of the WORK array, and no error |
|
92 * message related to LWORK is issued by XERBLA. |
|
93 * |
2329
|
94 * INFO (output) INTEGER |
|
95 * = 0: successful exit |
|
96 * < 0: if INFO = -i, the i-th argument had an illegal value. |
|
97 * > 0: the algorithm for computing the SVD failed to converge; |
|
98 * if INFO = i, i off-diagonal elements of an intermediate |
|
99 * bidiagonal form did not converge to zero. |
|
100 * |
|
101 * ===================================================================== |
|
102 * |
|
103 * .. Parameters .. |
|
104 DOUBLE PRECISION ZERO, ONE |
3333
|
105 PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) |
2329
|
106 * .. |
|
107 * .. Local Scalars .. |
3333
|
108 LOGICAL LQUERY |
2329
|
109 INTEGER BDSPAC, BL, CHUNK, I, IASCL, IBSCL, IE, IL, |
|
110 $ ITAU, ITAUP, ITAUQ, IWORK, LDWORK, MAXMN, |
|
111 $ MAXWRK, MINMN, MINWRK, MM, MNTHR |
|
112 DOUBLE PRECISION ANRM, BIGNUM, BNRM, EPS, SFMIN, SMLNUM, THR |
|
113 * .. |
|
114 * .. Local Arrays .. |
|
115 DOUBLE PRECISION VDUM( 1 ) |
|
116 * .. |
|
117 * .. External Subroutines .. |
|
118 EXTERNAL DBDSQR, DCOPY, DGEBRD, DGELQF, DGEMM, DGEMV, |
|
119 $ DGEQRF, DLABAD, DLACPY, DLASCL, DLASET, DORGBR, |
|
120 $ DORMBR, DORMLQ, DORMQR, DRSCL, XERBLA |
|
121 * .. |
|
122 * .. External Functions .. |
|
123 INTEGER ILAENV |
|
124 DOUBLE PRECISION DLAMCH, DLANGE |
|
125 EXTERNAL ILAENV, DLAMCH, DLANGE |
|
126 * .. |
|
127 * .. Intrinsic Functions .. |
|
128 INTRINSIC MAX, MIN |
|
129 * .. |
|
130 * .. Executable Statements .. |
|
131 * |
|
132 * Test the input arguments |
|
133 * |
|
134 INFO = 0 |
|
135 MINMN = MIN( M, N ) |
|
136 MAXMN = MAX( M, N ) |
|
137 MNTHR = ILAENV( 6, 'DGELSS', ' ', M, N, NRHS, -1 ) |
3333
|
138 LQUERY = ( LWORK.EQ.-1 ) |
2329
|
139 IF( M.LT.0 ) THEN |
|
140 INFO = -1 |
|
141 ELSE IF( N.LT.0 ) THEN |
|
142 INFO = -2 |
|
143 ELSE IF( NRHS.LT.0 ) THEN |
|
144 INFO = -3 |
|
145 ELSE IF( LDA.LT.MAX( 1, M ) ) THEN |
|
146 INFO = -5 |
|
147 ELSE IF( LDB.LT.MAX( 1, MAXMN ) ) THEN |
|
148 INFO = -7 |
|
149 END IF |
|
150 * |
|
151 * Compute workspace |
|
152 * (Note: Comments in the code beginning "Workspace:" describe the |
|
153 * minimal amount of workspace needed at that point in the code, |
|
154 * as well as the preferred amount for good performance. |
|
155 * NB refers to the optimal block size for the immediately |
|
156 * following subroutine, as returned by ILAENV.) |
|
157 * |
|
158 MINWRK = 1 |
3333
|
159 IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN |
2329
|
160 MAXWRK = 0 |
|
161 MM = M |
|
162 IF( M.GE.N .AND. M.GE.MNTHR ) THEN |
|
163 * |
|
164 * Path 1a - overdetermined, with many more rows than columns |
|
165 * |
|
166 MM = N |
|
167 MAXWRK = MAX( MAXWRK, N+N*ILAENV( 1, 'DGEQRF', ' ', M, N, |
|
168 $ -1, -1 ) ) |
|
169 MAXWRK = MAX( MAXWRK, N+NRHS* |
|
170 $ ILAENV( 1, 'DORMQR', 'LT', M, NRHS, N, -1 ) ) |
|
171 END IF |
|
172 IF( M.GE.N ) THEN |
|
173 * |
|
174 * Path 1 - overdetermined or exactly determined |
|
175 * |
3333
|
176 * Compute workspace needed for DBDSQR |
2329
|
177 * |
3596
|
178 BDSPAC = MAX( 1, 5*N ) |
2329
|
179 MAXWRK = MAX( MAXWRK, 3*N+( MM+N )* |
|
180 $ ILAENV( 1, 'DGEBRD', ' ', MM, N, -1, -1 ) ) |
|
181 MAXWRK = MAX( MAXWRK, 3*N+NRHS* |
|
182 $ ILAENV( 1, 'DORMBR', 'QLT', MM, NRHS, N, -1 ) ) |
|
183 MAXWRK = MAX( MAXWRK, 3*N+( N-1 )* |
|
184 $ ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) ) |
|
185 MAXWRK = MAX( MAXWRK, BDSPAC ) |
|
186 MAXWRK = MAX( MAXWRK, N*NRHS ) |
|
187 MINWRK = MAX( 3*N+MM, 3*N+NRHS, BDSPAC ) |
|
188 MAXWRK = MAX( MINWRK, MAXWRK ) |
|
189 END IF |
|
190 IF( N.GT.M ) THEN |
|
191 * |
3333
|
192 * Compute workspace needed for DBDSQR |
2329
|
193 * |
3596
|
194 BDSPAC = MAX( 1, 5*M ) |
2329
|
195 MINWRK = MAX( 3*M+NRHS, 3*M+N, BDSPAC ) |
|
196 IF( N.GE.MNTHR ) THEN |
|
197 * |
|
198 * Path 2a - underdetermined, with many more columns |
|
199 * than rows |
|
200 * |
|
201 MAXWRK = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 ) |
|
202 MAXWRK = MAX( MAXWRK, M*M+4*M+2*M* |
|
203 $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) ) |
|
204 MAXWRK = MAX( MAXWRK, M*M+4*M+NRHS* |
|
205 $ ILAENV( 1, 'DORMBR', 'QLT', M, NRHS, M, -1 ) ) |
|
206 MAXWRK = MAX( MAXWRK, M*M+4*M+( M-1 )* |
|
207 $ ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) ) |
|
208 MAXWRK = MAX( MAXWRK, M*M+M+BDSPAC ) |
|
209 IF( NRHS.GT.1 ) THEN |
|
210 MAXWRK = MAX( MAXWRK, M*M+M+M*NRHS ) |
|
211 ELSE |
|
212 MAXWRK = MAX( MAXWRK, M*M+2*M ) |
|
213 END IF |
|
214 MAXWRK = MAX( MAXWRK, M+NRHS* |
|
215 $ ILAENV( 1, 'DORMLQ', 'LT', N, NRHS, M, -1 ) ) |
|
216 ELSE |
|
217 * |
|
218 * Path 2 - underdetermined |
|
219 * |
|
220 MAXWRK = 3*M + ( N+M )*ILAENV( 1, 'DGEBRD', ' ', M, N, |
|
221 $ -1, -1 ) |
|
222 MAXWRK = MAX( MAXWRK, 3*M+NRHS* |
|
223 $ ILAENV( 1, 'DORMBR', 'QLT', M, NRHS, M, -1 ) ) |
|
224 MAXWRK = MAX( MAXWRK, 3*M+M* |
|
225 $ ILAENV( 1, 'DORGBR', 'P', M, N, M, -1 ) ) |
|
226 MAXWRK = MAX( MAXWRK, BDSPAC ) |
|
227 MAXWRK = MAX( MAXWRK, N*NRHS ) |
|
228 END IF |
|
229 END IF |
|
230 MAXWRK = MAX( MINWRK, MAXWRK ) |
|
231 WORK( 1 ) = MAXWRK |
|
232 END IF |
|
233 * |
|
234 MINWRK = MAX( MINWRK, 1 ) |
3333
|
235 IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) |
2329
|
236 $ INFO = -12 |
|
237 IF( INFO.NE.0 ) THEN |
|
238 CALL XERBLA( 'DGELSS', -INFO ) |
|
239 RETURN |
3333
|
240 ELSE IF( LQUERY ) THEN |
|
241 RETURN |
2329
|
242 END IF |
|
243 * |
|
244 * Quick return if possible |
|
245 * |
|
246 IF( M.EQ.0 .OR. N.EQ.0 ) THEN |
|
247 RANK = 0 |
|
248 RETURN |
|
249 END IF |
|
250 * |
|
251 * Get machine parameters |
|
252 * |
|
253 EPS = DLAMCH( 'P' ) |
|
254 SFMIN = DLAMCH( 'S' ) |
|
255 SMLNUM = SFMIN / EPS |
|
256 BIGNUM = ONE / SMLNUM |
|
257 CALL DLABAD( SMLNUM, BIGNUM ) |
|
258 * |
|
259 * Scale A if max element outside range [SMLNUM,BIGNUM] |
|
260 * |
|
261 ANRM = DLANGE( 'M', M, N, A, LDA, WORK ) |
|
262 IASCL = 0 |
|
263 IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN |
|
264 * |
|
265 * Scale matrix norm up to SMLNUM |
|
266 * |
|
267 CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) |
|
268 IASCL = 1 |
|
269 ELSE IF( ANRM.GT.BIGNUM ) THEN |
|
270 * |
|
271 * Scale matrix norm down to BIGNUM |
|
272 * |
|
273 CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) |
|
274 IASCL = 2 |
|
275 ELSE IF( ANRM.EQ.ZERO ) THEN |
|
276 * |
|
277 * Matrix all zero. Return zero solution. |
|
278 * |
|
279 CALL DLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) |
|
280 CALL DLASET( 'F', MINMN, 1, ZERO, ZERO, S, 1 ) |
|
281 RANK = 0 |
|
282 GO TO 70 |
|
283 END IF |
|
284 * |
|
285 * Scale B if max element outside range [SMLNUM,BIGNUM] |
|
286 * |
|
287 BNRM = DLANGE( 'M', M, NRHS, B, LDB, WORK ) |
|
288 IBSCL = 0 |
|
289 IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN |
|
290 * |
|
291 * Scale matrix norm up to SMLNUM |
|
292 * |
|
293 CALL DLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO ) |
|
294 IBSCL = 1 |
|
295 ELSE IF( BNRM.GT.BIGNUM ) THEN |
|
296 * |
|
297 * Scale matrix norm down to BIGNUM |
|
298 * |
|
299 CALL DLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO ) |
|
300 IBSCL = 2 |
|
301 END IF |
|
302 * |
|
303 * Overdetermined case |
|
304 * |
|
305 IF( M.GE.N ) THEN |
|
306 * |
|
307 * Path 1 - overdetermined or exactly determined |
|
308 * |
|
309 MM = M |
|
310 IF( M.GE.MNTHR ) THEN |
|
311 * |
|
312 * Path 1a - overdetermined, with many more rows than columns |
|
313 * |
|
314 MM = N |
|
315 ITAU = 1 |
|
316 IWORK = ITAU + N |
|
317 * |
|
318 * Compute A=Q*R |
|
319 * (Workspace: need 2*N, prefer N+N*NB) |
|
320 * |
|
321 CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ), |
|
322 $ LWORK-IWORK+1, INFO ) |
|
323 * |
|
324 * Multiply B by transpose(Q) |
|
325 * (Workspace: need N+NRHS, prefer N+NRHS*NB) |
|
326 * |
|
327 CALL DORMQR( 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAU ), B, |
|
328 $ LDB, WORK( IWORK ), LWORK-IWORK+1, INFO ) |
|
329 * |
|
330 * Zero out below R |
|
331 * |
|
332 IF( N.GT.1 ) |
|
333 $ CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA ) |
|
334 END IF |
|
335 * |
|
336 IE = 1 |
|
337 ITAUQ = IE + N |
|
338 ITAUP = ITAUQ + N |
|
339 IWORK = ITAUP + N |
|
340 * |
|
341 * Bidiagonalize R in A |
|
342 * (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB) |
|
343 * |
|
344 CALL DGEBRD( MM, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), |
|
345 $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, |
|
346 $ INFO ) |
|
347 * |
|
348 * Multiply B by transpose of left bidiagonalizing vectors of R |
|
349 * (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB) |
|
350 * |
|
351 CALL DORMBR( 'Q', 'L', 'T', MM, NRHS, N, A, LDA, WORK( ITAUQ ), |
|
352 $ B, LDB, WORK( IWORK ), LWORK-IWORK+1, INFO ) |
|
353 * |
|
354 * Generate right bidiagonalizing vectors of R in A |
|
355 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) |
|
356 * |
|
357 CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), |
|
358 $ WORK( IWORK ), LWORK-IWORK+1, INFO ) |
|
359 IWORK = IE + N |
|
360 * |
|
361 * Perform bidiagonal QR iteration |
|
362 * multiply B by transpose of left singular vectors |
|
363 * compute right singular vectors in A |
|
364 * (Workspace: need BDSPAC) |
|
365 * |
|
366 CALL DBDSQR( 'U', N, N, 0, NRHS, S, WORK( IE ), A, LDA, VDUM, |
|
367 $ 1, B, LDB, WORK( IWORK ), INFO ) |
|
368 IF( INFO.NE.0 ) |
|
369 $ GO TO 70 |
|
370 * |
|
371 * Multiply B by reciprocals of singular values |
|
372 * |
|
373 THR = MAX( RCOND*S( 1 ), SFMIN ) |
|
374 IF( RCOND.LT.ZERO ) |
|
375 $ THR = MAX( EPS*S( 1 ), SFMIN ) |
|
376 RANK = 0 |
|
377 DO 10 I = 1, N |
|
378 IF( S( I ).GT.THR ) THEN |
|
379 CALL DRSCL( NRHS, S( I ), B( I, 1 ), LDB ) |
|
380 RANK = RANK + 1 |
|
381 ELSE |
|
382 CALL DLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB ) |
|
383 END IF |
|
384 10 CONTINUE |
|
385 * |
|
386 * Multiply B by right singular vectors |
|
387 * (Workspace: need N, prefer N*NRHS) |
|
388 * |
|
389 IF( LWORK.GE.LDB*NRHS .AND. NRHS.GT.1 ) THEN |
|
390 CALL DGEMM( 'T', 'N', N, NRHS, N, ONE, A, LDA, B, LDB, ZERO, |
|
391 $ WORK, LDB ) |
|
392 CALL DLACPY( 'G', N, NRHS, WORK, LDB, B, LDB ) |
|
393 ELSE IF( NRHS.GT.1 ) THEN |
|
394 CHUNK = LWORK / N |
|
395 DO 20 I = 1, NRHS, CHUNK |
|
396 BL = MIN( NRHS-I+1, CHUNK ) |
3333
|
397 CALL DGEMM( 'T', 'N', N, BL, N, ONE, A, LDA, B( 1, I ), |
|
398 $ LDB, ZERO, WORK, N ) |
|
399 CALL DLACPY( 'G', N, BL, WORK, N, B( 1, I ), LDB ) |
2329
|
400 20 CONTINUE |
|
401 ELSE |
|
402 CALL DGEMV( 'T', N, N, ONE, A, LDA, B, 1, ZERO, WORK, 1 ) |
|
403 CALL DCOPY( N, WORK, 1, B, 1 ) |
|
404 END IF |
|
405 * |
|
406 ELSE IF( N.GE.MNTHR .AND. LWORK.GE.4*M+M*M+ |
|
407 $ MAX( M, 2*M-4, NRHS, N-3*M ) ) THEN |
|
408 * |
|
409 * Path 2a - underdetermined, with many more columns than rows |
|
410 * and sufficient workspace for an efficient algorithm |
|
411 * |
|
412 LDWORK = M |
|
413 IF( LWORK.GE.MAX( 4*M+M*LDA+MAX( M, 2*M-4, NRHS, N-3*M ), |
|
414 $ M*LDA+M+M*NRHS ) )LDWORK = LDA |
|
415 ITAU = 1 |
|
416 IWORK = M + 1 |
|
417 * |
|
418 * Compute A=L*Q |
|
419 * (Workspace: need 2*M, prefer M+M*NB) |
|
420 * |
|
421 CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ), |
|
422 $ LWORK-IWORK+1, INFO ) |
|
423 IL = IWORK |
|
424 * |
|
425 * Copy L to WORK(IL), zeroing out above it |
|
426 * |
|
427 CALL DLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWORK ) |
|
428 CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, WORK( IL+LDWORK ), |
|
429 $ LDWORK ) |
|
430 IE = IL + LDWORK*M |
|
431 ITAUQ = IE + M |
|
432 ITAUP = ITAUQ + M |
|
433 IWORK = ITAUP + M |
|
434 * |
|
435 * Bidiagonalize L in WORK(IL) |
|
436 * (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB) |
|
437 * |
|
438 CALL DGEBRD( M, M, WORK( IL ), LDWORK, S, WORK( IE ), |
|
439 $ WORK( ITAUQ ), WORK( ITAUP ), WORK( IWORK ), |
|
440 $ LWORK-IWORK+1, INFO ) |
|
441 * |
|
442 * Multiply B by transpose of left bidiagonalizing vectors of L |
|
443 * (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB) |
|
444 * |
|
445 CALL DORMBR( 'Q', 'L', 'T', M, NRHS, M, WORK( IL ), LDWORK, |
|
446 $ WORK( ITAUQ ), B, LDB, WORK( IWORK ), |
|
447 $ LWORK-IWORK+1, INFO ) |
|
448 * |
|
449 * Generate right bidiagonalizing vectors of R in WORK(IL) |
|
450 * (Workspace: need M*M+5*M-1, prefer M*M+4*M+(M-1)*NB) |
|
451 * |
|
452 CALL DORGBR( 'P', M, M, M, WORK( IL ), LDWORK, WORK( ITAUP ), |
|
453 $ WORK( IWORK ), LWORK-IWORK+1, INFO ) |
|
454 IWORK = IE + M |
|
455 * |
|
456 * Perform bidiagonal QR iteration, |
|
457 * computing right singular vectors of L in WORK(IL) and |
|
458 * multiplying B by transpose of left singular vectors |
|
459 * (Workspace: need M*M+M+BDSPAC) |
|
460 * |
|
461 CALL DBDSQR( 'U', M, M, 0, NRHS, S, WORK( IE ), WORK( IL ), |
|
462 $ LDWORK, A, LDA, B, LDB, WORK( IWORK ), INFO ) |
|
463 IF( INFO.NE.0 ) |
|
464 $ GO TO 70 |
|
465 * |
|
466 * Multiply B by reciprocals of singular values |
|
467 * |
|
468 THR = MAX( RCOND*S( 1 ), SFMIN ) |
|
469 IF( RCOND.LT.ZERO ) |
|
470 $ THR = MAX( EPS*S( 1 ), SFMIN ) |
|
471 RANK = 0 |
|
472 DO 30 I = 1, M |
|
473 IF( S( I ).GT.THR ) THEN |
|
474 CALL DRSCL( NRHS, S( I ), B( I, 1 ), LDB ) |
|
475 RANK = RANK + 1 |
|
476 ELSE |
|
477 CALL DLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB ) |
|
478 END IF |
|
479 30 CONTINUE |
|
480 IWORK = IE |
|
481 * |
|
482 * Multiply B by right singular vectors of L in WORK(IL) |
|
483 * (Workspace: need M*M+2*M, prefer M*M+M+M*NRHS) |
|
484 * |
|
485 IF( LWORK.GE.LDB*NRHS+IWORK-1 .AND. NRHS.GT.1 ) THEN |
|
486 CALL DGEMM( 'T', 'N', M, NRHS, M, ONE, WORK( IL ), LDWORK, |
|
487 $ B, LDB, ZERO, WORK( IWORK ), LDB ) |
|
488 CALL DLACPY( 'G', M, NRHS, WORK( IWORK ), LDB, B, LDB ) |
|
489 ELSE IF( NRHS.GT.1 ) THEN |
|
490 CHUNK = ( LWORK-IWORK+1 ) / M |
|
491 DO 40 I = 1, NRHS, CHUNK |
|
492 BL = MIN( NRHS-I+1, CHUNK ) |
|
493 CALL DGEMM( 'T', 'N', M, BL, M, ONE, WORK( IL ), LDWORK, |
3754
|
494 $ B( 1, I ), LDB, ZERO, WORK( IWORK ), M ) |
|
495 CALL DLACPY( 'G', M, BL, WORK( IWORK ), M, B( 1, I ), |
3333
|
496 $ LDB ) |
2329
|
497 40 CONTINUE |
|
498 ELSE |
|
499 CALL DGEMV( 'T', M, M, ONE, WORK( IL ), LDWORK, B( 1, 1 ), |
|
500 $ 1, ZERO, WORK( IWORK ), 1 ) |
|
501 CALL DCOPY( M, WORK( IWORK ), 1, B( 1, 1 ), 1 ) |
|
502 END IF |
|
503 * |
|
504 * Zero out below first M rows of B |
|
505 * |
|
506 CALL DLASET( 'F', N-M, NRHS, ZERO, ZERO, B( M+1, 1 ), LDB ) |
|
507 IWORK = ITAU + M |
|
508 * |
|
509 * Multiply transpose(Q) by B |
|
510 * (Workspace: need M+NRHS, prefer M+NRHS*NB) |
|
511 * |
|
512 CALL DORMLQ( 'L', 'T', N, NRHS, M, A, LDA, WORK( ITAU ), B, |
|
513 $ LDB, WORK( IWORK ), LWORK-IWORK+1, INFO ) |
|
514 * |
|
515 ELSE |
|
516 * |
|
517 * Path 2 - remaining underdetermined cases |
|
518 * |
|
519 IE = 1 |
|
520 ITAUQ = IE + M |
|
521 ITAUP = ITAUQ + M |
|
522 IWORK = ITAUP + M |
|
523 * |
|
524 * Bidiagonalize A |
|
525 * (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) |
|
526 * |
|
527 CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), |
|
528 $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, |
|
529 $ INFO ) |
|
530 * |
|
531 * Multiply B by transpose of left bidiagonalizing vectors |
|
532 * (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB) |
|
533 * |
|
534 CALL DORMBR( 'Q', 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAUQ ), |
|
535 $ B, LDB, WORK( IWORK ), LWORK-IWORK+1, INFO ) |
|
536 * |
|
537 * Generate right bidiagonalizing vectors in A |
|
538 * (Workspace: need 4*M, prefer 3*M+M*NB) |
|
539 * |
|
540 CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ), |
|
541 $ WORK( IWORK ), LWORK-IWORK+1, INFO ) |
|
542 IWORK = IE + M |
|
543 * |
|
544 * Perform bidiagonal QR iteration, |
|
545 * computing right singular vectors of A in A and |
|
546 * multiplying B by transpose of left singular vectors |
|
547 * (Workspace: need BDSPAC) |
|
548 * |
|
549 CALL DBDSQR( 'L', M, N, 0, NRHS, S, WORK( IE ), A, LDA, VDUM, |
|
550 $ 1, B, LDB, WORK( IWORK ), INFO ) |
|
551 IF( INFO.NE.0 ) |
|
552 $ GO TO 70 |
|
553 * |
|
554 * Multiply B by reciprocals of singular values |
|
555 * |
|
556 THR = MAX( RCOND*S( 1 ), SFMIN ) |
|
557 IF( RCOND.LT.ZERO ) |
|
558 $ THR = MAX( EPS*S( 1 ), SFMIN ) |
|
559 RANK = 0 |
|
560 DO 50 I = 1, M |
|
561 IF( S( I ).GT.THR ) THEN |
|
562 CALL DRSCL( NRHS, S( I ), B( I, 1 ), LDB ) |
|
563 RANK = RANK + 1 |
|
564 ELSE |
|
565 CALL DLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB ) |
|
566 END IF |
|
567 50 CONTINUE |
|
568 * |
|
569 * Multiply B by right singular vectors of A |
|
570 * (Workspace: need N, prefer N*NRHS) |
|
571 * |
|
572 IF( LWORK.GE.LDB*NRHS .AND. NRHS.GT.1 ) THEN |
|
573 CALL DGEMM( 'T', 'N', N, NRHS, M, ONE, A, LDA, B, LDB, ZERO, |
|
574 $ WORK, LDB ) |
|
575 CALL DLACPY( 'F', N, NRHS, WORK, LDB, B, LDB ) |
|
576 ELSE IF( NRHS.GT.1 ) THEN |
|
577 CHUNK = LWORK / N |
|
578 DO 60 I = 1, NRHS, CHUNK |
|
579 BL = MIN( NRHS-I+1, CHUNK ) |
|
580 CALL DGEMM( 'T', 'N', N, BL, M, ONE, A, LDA, B( 1, I ), |
|
581 $ LDB, ZERO, WORK, N ) |
|
582 CALL DLACPY( 'F', N, BL, WORK, N, B( 1, I ), LDB ) |
|
583 60 CONTINUE |
|
584 ELSE |
|
585 CALL DGEMV( 'T', M, N, ONE, A, LDA, B, 1, ZERO, WORK, 1 ) |
|
586 CALL DCOPY( N, WORK, 1, B, 1 ) |
|
587 END IF |
|
588 END IF |
|
589 * |
|
590 * Undo scaling |
|
591 * |
|
592 IF( IASCL.EQ.1 ) THEN |
|
593 CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO ) |
|
594 CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN, |
|
595 $ INFO ) |
|
596 ELSE IF( IASCL.EQ.2 ) THEN |
|
597 CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO ) |
|
598 CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN, |
|
599 $ INFO ) |
|
600 END IF |
|
601 IF( IBSCL.EQ.1 ) THEN |
|
602 CALL DLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO ) |
|
603 ELSE IF( IBSCL.EQ.2 ) THEN |
|
604 CALL DLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO ) |
|
605 END IF |
|
606 * |
|
607 70 CONTINUE |
|
608 WORK( 1 ) = MAXWRK |
|
609 RETURN |
|
610 * |
|
611 * End of DGELSS |
|
612 * |
|
613 END |