comparison libcruft/lapack/dlaqr2.f @ 7034:68db500cb558

[project @ 2007-10-16 18:54:19 by jwe]
author jwe
date Tue, 16 Oct 2007 18:54:23 +0000
parents
children
comparison
equal deleted inserted replaced
7033:f0142f2afdc6 7034:68db500cb558
1 SUBROUTINE DLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ,
2 $ IHIZ, Z, LDZ, NS, ND, SR, SI, V, LDV, NH, T,
3 $ LDT, NV, WV, LDWV, WORK, LWORK )
4 *
5 * -- LAPACK auxiliary routine (version 3.1) --
6 * Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
7 * November 2006
8 *
9 * .. Scalar Arguments ..
10 INTEGER IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV,
11 $ LDZ, LWORK, N, ND, NH, NS, NV, NW
12 LOGICAL WANTT, WANTZ
13 * ..
14 * .. Array Arguments ..
15 DOUBLE PRECISION H( LDH, * ), SI( * ), SR( * ), T( LDT, * ),
16 $ V( LDV, * ), WORK( * ), WV( LDWV, * ),
17 $ Z( LDZ, * )
18 * ..
19 *
20 * This subroutine is identical to DLAQR3 except that it avoids
21 * recursion by calling DLAHQR instead of DLAQR4.
22 *
23 *
24 * ******************************************************************
25 * Aggressive early deflation:
26 *
27 * This subroutine accepts as input an upper Hessenberg matrix
28 * H and performs an orthogonal similarity transformation
29 * designed to detect and deflate fully converged eigenvalues from
30 * a trailing principal submatrix. On output H has been over-
31 * written by a new Hessenberg matrix that is a perturbation of
32 * an orthogonal similarity transformation of H. It is to be
33 * hoped that the final version of H has many zero subdiagonal
34 * entries.
35 *
36 * ******************************************************************
37 * WANTT (input) LOGICAL
38 * If .TRUE., then the Hessenberg matrix H is fully updated
39 * so that the quasi-triangular Schur factor may be
40 * computed (in cooperation with the calling subroutine).
41 * If .FALSE., then only enough of H is updated to preserve
42 * the eigenvalues.
43 *
44 * WANTZ (input) LOGICAL
45 * If .TRUE., then the orthogonal matrix Z is updated so
46 * so that the orthogonal Schur factor may be computed
47 * (in cooperation with the calling subroutine).
48 * If .FALSE., then Z is not referenced.
49 *
50 * N (input) INTEGER
51 * The order of the matrix H and (if WANTZ is .TRUE.) the
52 * order of the orthogonal matrix Z.
53 *
54 * KTOP (input) INTEGER
55 * It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0.
56 * KBOT and KTOP together determine an isolated block
57 * along the diagonal of the Hessenberg matrix.
58 *
59 * KBOT (input) INTEGER
60 * It is assumed without a check that either
61 * KBOT = N or H(KBOT+1,KBOT)=0. KBOT and KTOP together
62 * determine an isolated block along the diagonal of the
63 * Hessenberg matrix.
64 *
65 * NW (input) INTEGER
66 * Deflation window size. 1 .LE. NW .LE. (KBOT-KTOP+1).
67 *
68 * H (input/output) DOUBLE PRECISION array, dimension (LDH,N)
69 * On input the initial N-by-N section of H stores the
70 * Hessenberg matrix undergoing aggressive early deflation.
71 * On output H has been transformed by an orthogonal
72 * similarity transformation, perturbed, and the returned
73 * to Hessenberg form that (it is to be hoped) has some
74 * zero subdiagonal entries.
75 *
76 * LDH (input) integer
77 * Leading dimension of H just as declared in the calling
78 * subroutine. N .LE. LDH
79 *
80 * ILOZ (input) INTEGER
81 * IHIZ (input) INTEGER
82 * Specify the rows of Z to which transformations must be
83 * applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.
84 *
85 * Z (input/output) DOUBLE PRECISION array, dimension (LDZ,IHI)
86 * IF WANTZ is .TRUE., then on output, the orthogonal
87 * similarity transformation mentioned above has been
88 * accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.
89 * If WANTZ is .FALSE., then Z is unreferenced.
90 *
91 * LDZ (input) integer
92 * The leading dimension of Z just as declared in the
93 * calling subroutine. 1 .LE. LDZ.
94 *
95 * NS (output) integer
96 * The number of unconverged (ie approximate) eigenvalues
97 * returned in SR and SI that may be used as shifts by the
98 * calling subroutine.
99 *
100 * ND (output) integer
101 * The number of converged eigenvalues uncovered by this
102 * subroutine.
103 *
104 * SR (output) DOUBLE PRECISION array, dimension KBOT
105 * SI (output) DOUBLE PRECISION array, dimension KBOT
106 * On output, the real and imaginary parts of approximate
107 * eigenvalues that may be used for shifts are stored in
108 * SR(KBOT-ND-NS+1) through SR(KBOT-ND) and
109 * SI(KBOT-ND-NS+1) through SI(KBOT-ND), respectively.
110 * The real and imaginary parts of converged eigenvalues
111 * are stored in SR(KBOT-ND+1) through SR(KBOT) and
112 * SI(KBOT-ND+1) through SI(KBOT), respectively.
113 *
114 * V (workspace) DOUBLE PRECISION array, dimension (LDV,NW)
115 * An NW-by-NW work array.
116 *
117 * LDV (input) integer scalar
118 * The leading dimension of V just as declared in the
119 * calling subroutine. NW .LE. LDV
120 *
121 * NH (input) integer scalar
122 * The number of columns of T. NH.GE.NW.
123 *
124 * T (workspace) DOUBLE PRECISION array, dimension (LDT,NW)
125 *
126 * LDT (input) integer
127 * The leading dimension of T just as declared in the
128 * calling subroutine. NW .LE. LDT
129 *
130 * NV (input) integer
131 * The number of rows of work array WV available for
132 * workspace. NV.GE.NW.
133 *
134 * WV (workspace) DOUBLE PRECISION array, dimension (LDWV,NW)
135 *
136 * LDWV (input) integer
137 * The leading dimension of W just as declared in the
138 * calling subroutine. NW .LE. LDV
139 *
140 * WORK (workspace) DOUBLE PRECISION array, dimension LWORK.
141 * On exit, WORK(1) is set to an estimate of the optimal value
142 * of LWORK for the given values of N, NW, KTOP and KBOT.
143 *
144 * LWORK (input) integer
145 * The dimension of the work array WORK. LWORK = 2*NW
146 * suffices, but greater efficiency may result from larger
147 * values of LWORK.
148 *
149 * If LWORK = -1, then a workspace query is assumed; DLAQR2
150 * only estimates the optimal workspace size for the given
151 * values of N, NW, KTOP and KBOT. The estimate is returned
152 * in WORK(1). No error message related to LWORK is issued
153 * by XERBLA. Neither H nor Z are accessed.
154 *
155 * ================================================================
156 * Based on contributions by
157 * Karen Braman and Ralph Byers, Department of Mathematics,
158 * University of Kansas, USA
159 *
160 * ================================================================
161 * .. Parameters ..
162 DOUBLE PRECISION ZERO, ONE
163 PARAMETER ( ZERO = 0.0d0, ONE = 1.0d0 )
164 * ..
165 * .. Local Scalars ..
166 DOUBLE PRECISION AA, BB, BETA, CC, CS, DD, EVI, EVK, FOO, S,
167 $ SAFMAX, SAFMIN, SMLNUM, SN, TAU, ULP
168 INTEGER I, IFST, ILST, INFO, INFQR, J, JW, K, KCOL,
169 $ KEND, KLN, KROW, KWTOP, LTOP, LWK1, LWK2,
170 $ LWKOPT
171 LOGICAL BULGE, SORTED
172 * ..
173 * .. External Functions ..
174 DOUBLE PRECISION DLAMCH
175 EXTERNAL DLAMCH
176 * ..
177 * .. External Subroutines ..
178 EXTERNAL DCOPY, DGEHRD, DGEMM, DLABAD, DLACPY, DLAHQR,
179 $ DLANV2, DLARF, DLARFG, DLASET, DORGHR, DTREXC
180 * ..
181 * .. Intrinsic Functions ..
182 INTRINSIC ABS, DBLE, INT, MAX, MIN, SQRT
183 * ..
184 * .. Executable Statements ..
185 *
186 * ==== Estimate optimal workspace. ====
187 *
188 JW = MIN( NW, KBOT-KTOP+1 )
189 IF( JW.LE.2 ) THEN
190 LWKOPT = 1
191 ELSE
192 *
193 * ==== Workspace query call to DGEHRD ====
194 *
195 CALL DGEHRD( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO )
196 LWK1 = INT( WORK( 1 ) )
197 *
198 * ==== Workspace query call to DORGHR ====
199 *
200 CALL DORGHR( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO )
201 LWK2 = INT( WORK( 1 ) )
202 *
203 * ==== Optimal workspace ====
204 *
205 LWKOPT = JW + MAX( LWK1, LWK2 )
206 END IF
207 *
208 * ==== Quick return in case of workspace query. ====
209 *
210 IF( LWORK.EQ.-1 ) THEN
211 WORK( 1 ) = DBLE( LWKOPT )
212 RETURN
213 END IF
214 *
215 * ==== Nothing to do ...
216 * ... for an empty active block ... ====
217 NS = 0
218 ND = 0
219 IF( KTOP.GT.KBOT )
220 $ RETURN
221 * ... nor for an empty deflation window. ====
222 IF( NW.LT.1 )
223 $ RETURN
224 *
225 * ==== Machine constants ====
226 *
227 SAFMIN = DLAMCH( 'SAFE MINIMUM' )
228 SAFMAX = ONE / SAFMIN
229 CALL DLABAD( SAFMIN, SAFMAX )
230 ULP = DLAMCH( 'PRECISION' )
231 SMLNUM = SAFMIN*( DBLE( N ) / ULP )
232 *
233 * ==== Setup deflation window ====
234 *
235 JW = MIN( NW, KBOT-KTOP+1 )
236 KWTOP = KBOT - JW + 1
237 IF( KWTOP.EQ.KTOP ) THEN
238 S = ZERO
239 ELSE
240 S = H( KWTOP, KWTOP-1 )
241 END IF
242 *
243 IF( KBOT.EQ.KWTOP ) THEN
244 *
245 * ==== 1-by-1 deflation window: not much to do ====
246 *
247 SR( KWTOP ) = H( KWTOP, KWTOP )
248 SI( KWTOP ) = ZERO
249 NS = 1
250 ND = 0
251 IF( ABS( S ).LE.MAX( SMLNUM, ULP*ABS( H( KWTOP, KWTOP ) ) ) )
252 $ THEN
253 NS = 0
254 ND = 1
255 IF( KWTOP.GT.KTOP )
256 $ H( KWTOP, KWTOP-1 ) = ZERO
257 END IF
258 RETURN
259 END IF
260 *
261 * ==== Convert to spike-triangular form. (In case of a
262 * . rare QR failure, this routine continues to do
263 * . aggressive early deflation using that part of
264 * . the deflation window that converged using INFQR
265 * . here and there to keep track.) ====
266 *
267 CALL DLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT )
268 CALL DCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), LDT+1 )
269 *
270 CALL DLASET( 'A', JW, JW, ZERO, ONE, V, LDV )
271 CALL DLAHQR( .true., .true., JW, 1, JW, T, LDT, SR( KWTOP ),
272 $ SI( KWTOP ), 1, JW, V, LDV, INFQR )
273 *
274 * ==== DTREXC needs a clean margin near the diagonal ====
275 *
276 DO 10 J = 1, JW - 3
277 T( J+2, J ) = ZERO
278 T( J+3, J ) = ZERO
279 10 CONTINUE
280 IF( JW.GT.2 )
281 $ T( JW, JW-2 ) = ZERO
282 *
283 * ==== Deflation detection loop ====
284 *
285 NS = JW
286 ILST = INFQR + 1
287 20 CONTINUE
288 IF( ILST.LE.NS ) THEN
289 IF( NS.EQ.1 ) THEN
290 BULGE = .FALSE.
291 ELSE
292 BULGE = T( NS, NS-1 ).NE.ZERO
293 END IF
294 *
295 * ==== Small spike tip test for deflation ====
296 *
297 IF( .NOT.BULGE ) THEN
298 *
299 * ==== Real eigenvalue ====
300 *
301 FOO = ABS( T( NS, NS ) )
302 IF( FOO.EQ.ZERO )
303 $ FOO = ABS( S )
304 IF( ABS( S*V( 1, NS ) ).LE.MAX( SMLNUM, ULP*FOO ) ) THEN
305 *
306 * ==== Deflatable ====
307 *
308 NS = NS - 1
309 ELSE
310 *
311 * ==== Undeflatable. Move it up out of the way.
312 * . (DTREXC can not fail in this case.) ====
313 *
314 IFST = NS
315 CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK,
316 $ INFO )
317 ILST = ILST + 1
318 END IF
319 ELSE
320 *
321 * ==== Complex conjugate pair ====
322 *
323 FOO = ABS( T( NS, NS ) ) + SQRT( ABS( T( NS, NS-1 ) ) )*
324 $ SQRT( ABS( T( NS-1, NS ) ) )
325 IF( FOO.EQ.ZERO )
326 $ FOO = ABS( S )
327 IF( MAX( ABS( S*V( 1, NS ) ), ABS( S*V( 1, NS-1 ) ) ).LE.
328 $ MAX( SMLNUM, ULP*FOO ) ) THEN
329 *
330 * ==== Deflatable ====
331 *
332 NS = NS - 2
333 ELSE
334 *
335 * ==== Undflatable. Move them up out of the way.
336 * . Fortunately, DTREXC does the right thing with
337 * . ILST in case of a rare exchange failure. ====
338 *
339 IFST = NS
340 CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK,
341 $ INFO )
342 ILST = ILST + 2
343 END IF
344 END IF
345 *
346 * ==== End deflation detection loop ====
347 *
348 GO TO 20
349 END IF
350 *
351 * ==== Return to Hessenberg form ====
352 *
353 IF( NS.EQ.0 )
354 $ S = ZERO
355 *
356 IF( NS.LT.JW ) THEN
357 *
358 * ==== sorting diagonal blocks of T improves accuracy for
359 * . graded matrices. Bubble sort deals well with
360 * . exchange failures. ====
361 *
362 SORTED = .false.
363 I = NS + 1
364 30 CONTINUE
365 IF( SORTED )
366 $ GO TO 50
367 SORTED = .true.
368 *
369 KEND = I - 1
370 I = INFQR + 1
371 IF( I.EQ.NS ) THEN
372 K = I + 1
373 ELSE IF( T( I+1, I ).EQ.ZERO ) THEN
374 K = I + 1
375 ELSE
376 K = I + 2
377 END IF
378 40 CONTINUE
379 IF( K.LE.KEND ) THEN
380 IF( K.EQ.I+1 ) THEN
381 EVI = ABS( T( I, I ) )
382 ELSE
383 EVI = ABS( T( I, I ) ) + SQRT( ABS( T( I+1, I ) ) )*
384 $ SQRT( ABS( T( I, I+1 ) ) )
385 END IF
386 *
387 IF( K.EQ.KEND ) THEN
388 EVK = ABS( T( K, K ) )
389 ELSE IF( T( K+1, K ).EQ.ZERO ) THEN
390 EVK = ABS( T( K, K ) )
391 ELSE
392 EVK = ABS( T( K, K ) ) + SQRT( ABS( T( K+1, K ) ) )*
393 $ SQRT( ABS( T( K, K+1 ) ) )
394 END IF
395 *
396 IF( EVI.GE.EVK ) THEN
397 I = K
398 ELSE
399 SORTED = .false.
400 IFST = I
401 ILST = K
402 CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK,
403 $ INFO )
404 IF( INFO.EQ.0 ) THEN
405 I = ILST
406 ELSE
407 I = K
408 END IF
409 END IF
410 IF( I.EQ.KEND ) THEN
411 K = I + 1
412 ELSE IF( T( I+1, I ).EQ.ZERO ) THEN
413 K = I + 1
414 ELSE
415 K = I + 2
416 END IF
417 GO TO 40
418 END IF
419 GO TO 30
420 50 CONTINUE
421 END IF
422 *
423 * ==== Restore shift/eigenvalue array from T ====
424 *
425 I = JW
426 60 CONTINUE
427 IF( I.GE.INFQR+1 ) THEN
428 IF( I.EQ.INFQR+1 ) THEN
429 SR( KWTOP+I-1 ) = T( I, I )
430 SI( KWTOP+I-1 ) = ZERO
431 I = I - 1
432 ELSE IF( T( I, I-1 ).EQ.ZERO ) THEN
433 SR( KWTOP+I-1 ) = T( I, I )
434 SI( KWTOP+I-1 ) = ZERO
435 I = I - 1
436 ELSE
437 AA = T( I-1, I-1 )
438 CC = T( I, I-1 )
439 BB = T( I-1, I )
440 DD = T( I, I )
441 CALL DLANV2( AA, BB, CC, DD, SR( KWTOP+I-2 ),
442 $ SI( KWTOP+I-2 ), SR( KWTOP+I-1 ),
443 $ SI( KWTOP+I-1 ), CS, SN )
444 I = I - 2
445 END IF
446 GO TO 60
447 END IF
448 *
449 IF( NS.LT.JW .OR. S.EQ.ZERO ) THEN
450 IF( NS.GT.1 .AND. S.NE.ZERO ) THEN
451 *
452 * ==== Reflect spike back into lower triangle ====
453 *
454 CALL DCOPY( NS, V, LDV, WORK, 1 )
455 BETA = WORK( 1 )
456 CALL DLARFG( NS, BETA, WORK( 2 ), 1, TAU )
457 WORK( 1 ) = ONE
458 *
459 CALL DLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), LDT )
460 *
461 CALL DLARF( 'L', NS, JW, WORK, 1, TAU, T, LDT,
462 $ WORK( JW+1 ) )
463 CALL DLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT,
464 $ WORK( JW+1 ) )
465 CALL DLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV,
466 $ WORK( JW+1 ) )
467 *
468 CALL DGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ),
469 $ LWORK-JW, INFO )
470 END IF
471 *
472 * ==== Copy updated reduced window into place ====
473 *
474 IF( KWTOP.GT.1 )
475 $ H( KWTOP, KWTOP-1 ) = S*V( 1, 1 )
476 CALL DLACPY( 'U', JW, JW, T, LDT, H( KWTOP, KWTOP ), LDH )
477 CALL DCOPY( JW-1, T( 2, 1 ), LDT+1, H( KWTOP+1, KWTOP ),
478 $ LDH+1 )
479 *
480 * ==== Accumulate orthogonal matrix in order update
481 * . H and Z, if requested. (A modified version
482 * . of DORGHR that accumulates block Householder
483 * . transformations into V directly might be
484 * . marginally more efficient than the following.) ====
485 *
486 IF( NS.GT.1 .AND. S.NE.ZERO ) THEN
487 CALL DORGHR( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ),
488 $ LWORK-JW, INFO )
489 CALL DGEMM( 'N', 'N', JW, NS, NS, ONE, V, LDV, T, LDT, ZERO,
490 $ WV, LDWV )
491 CALL DLACPY( 'A', JW, NS, WV, LDWV, V, LDV )
492 END IF
493 *
494 * ==== Update vertical slab in H ====
495 *
496 IF( WANTT ) THEN
497 LTOP = 1
498 ELSE
499 LTOP = KTOP
500 END IF
501 DO 70 KROW = LTOP, KWTOP - 1, NV
502 KLN = MIN( NV, KWTOP-KROW )
503 CALL DGEMM( 'N', 'N', KLN, JW, JW, ONE, H( KROW, KWTOP ),
504 $ LDH, V, LDV, ZERO, WV, LDWV )
505 CALL DLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH )
506 70 CONTINUE
507 *
508 * ==== Update horizontal slab in H ====
509 *
510 IF( WANTT ) THEN
511 DO 80 KCOL = KBOT + 1, N, NH
512 KLN = MIN( NH, N-KCOL+1 )
513 CALL DGEMM( 'C', 'N', JW, KLN, JW, ONE, V, LDV,
514 $ H( KWTOP, KCOL ), LDH, ZERO, T, LDT )
515 CALL DLACPY( 'A', JW, KLN, T, LDT, H( KWTOP, KCOL ),
516 $ LDH )
517 80 CONTINUE
518 END IF
519 *
520 * ==== Update vertical slab in Z ====
521 *
522 IF( WANTZ ) THEN
523 DO 90 KROW = ILOZ, IHIZ, NV
524 KLN = MIN( NV, IHIZ-KROW+1 )
525 CALL DGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, KWTOP ),
526 $ LDZ, V, LDV, ZERO, WV, LDWV )
527 CALL DLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ),
528 $ LDZ )
529 90 CONTINUE
530 END IF
531 END IF
532 *
533 * ==== Return the number of deflations ... ====
534 *
535 ND = JW - NS
536 *
537 * ==== ... and the number of shifts. (Subtracting
538 * . INFQR from the spike length takes care
539 * . of the case of a rare QR failure while
540 * . calculating eigenvalues of the deflation
541 * . window.) ====
542 *
543 NS = NS - INFQR
544 *
545 * ==== Return optimal workspace. ====
546 *
547 WORK( 1 ) = DBLE( LWKOPT )
548 *
549 * ==== End of DLAQR2 ====
550 *
551 END