Mercurial > octave-nkf
comparison libcruft/arpack/src/znaitr.f @ 12274:9f5d2ef078e8 release-3-4-x
import ARPACK sources to libcruft from Debian package libarpack2 2.1+parpack96.dfsg-3+b1
author | John W. Eaton <jwe@octave.org> |
---|---|
date | Fri, 28 Jan 2011 14:04:33 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
12273:83133b5bf392 | 12274:9f5d2ef078e8 |
---|---|
1 c\BeginDoc | |
2 c | |
3 c\Name: znaitr | |
4 c | |
5 c\Description: | |
6 c Reverse communication interface for applying NP additional steps to | |
7 c a K step nonsymmetric Arnoldi factorization. | |
8 c | |
9 c Input: OP*V_{k} - V_{k}*H = r_{k}*e_{k}^T | |
10 c | |
11 c with (V_{k}^T)*B*V_{k} = I, (V_{k}^T)*B*r_{k} = 0. | |
12 c | |
13 c Output: OP*V_{k+p} - V_{k+p}*H = r_{k+p}*e_{k+p}^T | |
14 c | |
15 c with (V_{k+p}^T)*B*V_{k+p} = I, (V_{k+p}^T)*B*r_{k+p} = 0. | |
16 c | |
17 c where OP and B are as in znaupd. The B-norm of r_{k+p} is also | |
18 c computed and returned. | |
19 c | |
20 c\Usage: | |
21 c call znaitr | |
22 c ( IDO, BMAT, N, K, NP, NB, RESID, RNORM, V, LDV, H, LDH, | |
23 c IPNTR, WORKD, INFO ) | |
24 c | |
25 c\Arguments | |
26 c IDO Integer. (INPUT/OUTPUT) | |
27 c Reverse communication flag. | |
28 c ------------------------------------------------------------- | |
29 c IDO = 0: first call to the reverse communication interface | |
30 c IDO = -1: compute Y = OP * X where | |
31 c IPNTR(1) is the pointer into WORK for X, | |
32 c IPNTR(2) is the pointer into WORK for Y. | |
33 c This is for the restart phase to force the new | |
34 c starting vector into the range of OP. | |
35 c IDO = 1: compute Y = OP * X where | |
36 c IPNTR(1) is the pointer into WORK for X, | |
37 c IPNTR(2) is the pointer into WORK for Y, | |
38 c IPNTR(3) is the pointer into WORK for B * X. | |
39 c IDO = 2: compute Y = B * X where | |
40 c IPNTR(1) is the pointer into WORK for X, | |
41 c IPNTR(2) is the pointer into WORK for Y. | |
42 c IDO = 99: done | |
43 c ------------------------------------------------------------- | |
44 c When the routine is used in the "shift-and-invert" mode, the | |
45 c vector B * Q is already available and do not need to be | |
46 c recomputed in forming OP * Q. | |
47 c | |
48 c BMAT Character*1. (INPUT) | |
49 c BMAT specifies the type of the matrix B that defines the | |
50 c semi-inner product for the operator OP. See znaupd. | |
51 c B = 'I' -> standard eigenvalue problem A*x = lambda*x | |
52 c B = 'G' -> generalized eigenvalue problem A*x = lambda*M**x | |
53 c | |
54 c N Integer. (INPUT) | |
55 c Dimension of the eigenproblem. | |
56 c | |
57 c K Integer. (INPUT) | |
58 c Current size of V and H. | |
59 c | |
60 c NP Integer. (INPUT) | |
61 c Number of additional Arnoldi steps to take. | |
62 c | |
63 c NB Integer. (INPUT) | |
64 c Blocksize to be used in the recurrence. | |
65 c Only work for NB = 1 right now. The goal is to have a | |
66 c program that implement both the block and non-block method. | |
67 c | |
68 c RESID Complex*16 array of length N. (INPUT/OUTPUT) | |
69 c On INPUT: RESID contains the residual vector r_{k}. | |
70 c On OUTPUT: RESID contains the residual vector r_{k+p}. | |
71 c | |
72 c RNORM Double precision scalar. (INPUT/OUTPUT) | |
73 c B-norm of the starting residual on input. | |
74 c B-norm of the updated residual r_{k+p} on output. | |
75 c | |
76 c V Complex*16 N by K+NP array. (INPUT/OUTPUT) | |
77 c On INPUT: V contains the Arnoldi vectors in the first K | |
78 c columns. | |
79 c On OUTPUT: V contains the new NP Arnoldi vectors in the next | |
80 c NP columns. The first K columns are unchanged. | |
81 c | |
82 c LDV Integer. (INPUT) | |
83 c Leading dimension of V exactly as declared in the calling | |
84 c program. | |
85 c | |
86 c H Complex*16 (K+NP) by (K+NP) array. (INPUT/OUTPUT) | |
87 c H is used to store the generated upper Hessenberg matrix. | |
88 c | |
89 c LDH Integer. (INPUT) | |
90 c Leading dimension of H exactly as declared in the calling | |
91 c program. | |
92 c | |
93 c IPNTR Integer array of length 3. (OUTPUT) | |
94 c Pointer to mark the starting locations in the WORK for | |
95 c vectors used by the Arnoldi iteration. | |
96 c ------------------------------------------------------------- | |
97 c IPNTR(1): pointer to the current operand vector X. | |
98 c IPNTR(2): pointer to the current result vector Y. | |
99 c IPNTR(3): pointer to the vector B * X when used in the | |
100 c shift-and-invert mode. X is the current operand. | |
101 c ------------------------------------------------------------- | |
102 c | |
103 c WORKD Complex*16 work array of length 3*N. (REVERSE COMMUNICATION) | |
104 c Distributed array to be used in the basic Arnoldi iteration | |
105 c for reverse communication. The calling program should not | |
106 c use WORKD as temporary workspace during the iteration !!!!!! | |
107 c On input, WORKD(1:N) = B*RESID and is used to save some | |
108 c computation at the first step. | |
109 c | |
110 c INFO Integer. (OUTPUT) | |
111 c = 0: Normal exit. | |
112 c > 0: Size of the spanning invariant subspace of OP found. | |
113 c | |
114 c\EndDoc | |
115 c | |
116 c----------------------------------------------------------------------- | |
117 c | |
118 c\BeginLib | |
119 c | |
120 c\Local variables: | |
121 c xxxxxx Complex*16 | |
122 c | |
123 c\References: | |
124 c 1. D.C. Sorensen, "Implicit Application of Polynomial Filters in | |
125 c a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992), | |
126 c pp 357-385. | |
127 c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly | |
128 c Restarted Arnoldi Iteration", Rice University Technical Report | |
129 c TR95-13, Department of Computational and Applied Mathematics. | |
130 c | |
131 c\Routines called: | |
132 c zgetv0 ARPACK routine to generate the initial vector. | |
133 c ivout ARPACK utility routine that prints integers. | |
134 c arscnd ARPACK utility routine for timing. | |
135 c zmout ARPACK utility routine that prints matrices | |
136 c zvout ARPACK utility routine that prints vectors. | |
137 c zlanhs LAPACK routine that computes various norms of a matrix. | |
138 c zlascl LAPACK routine for careful scaling of a matrix. | |
139 c dlabad LAPACK routine for defining the underflow and overflow | |
140 c limits. | |
141 c dlamch LAPACK routine that determines machine constants. | |
142 c dlapy2 LAPACK routine to compute sqrt(x**2+y**2) carefully. | |
143 c zgemv Level 2 BLAS routine for matrix vector multiplication. | |
144 c zaxpy Level 1 BLAS that computes a vector triad. | |
145 c zcopy Level 1 BLAS that copies one vector to another . | |
146 c zdotc Level 1 BLAS that computes the scalar product of two vectors. | |
147 c zscal Level 1 BLAS that scales a vector. | |
148 c zdscal Level 1 BLAS that scales a complex vector by a real number. | |
149 c dznrm2 Level 1 BLAS that computes the norm of a vector. | |
150 c | |
151 c\Author | |
152 c Danny Sorensen Phuong Vu | |
153 c Richard Lehoucq CRPC / Rice University | |
154 c Dept. of Computational & Houston, Texas | |
155 c Applied Mathematics | |
156 c Rice University | |
157 c Houston, Texas | |
158 c | |
159 c\SCCS Information: @(#) | |
160 c FILE: naitr.F SID: 2.3 DATE OF SID: 8/27/96 RELEASE: 2 | |
161 c | |
162 c\Remarks | |
163 c The algorithm implemented is: | |
164 c | |
165 c restart = .false. | |
166 c Given V_{k} = [v_{1}, ..., v_{k}], r_{k}; | |
167 c r_{k} contains the initial residual vector even for k = 0; | |
168 c Also assume that rnorm = || B*r_{k} || and B*r_{k} are already | |
169 c computed by the calling program. | |
170 c | |
171 c betaj = rnorm ; p_{k+1} = B*r_{k} ; | |
172 c For j = k+1, ..., k+np Do | |
173 c 1) if ( betaj < tol ) stop or restart depending on j. | |
174 c ( At present tol is zero ) | |
175 c if ( restart ) generate a new starting vector. | |
176 c 2) v_{j} = r(j-1)/betaj; V_{j} = [V_{j-1}, v_{j}]; | |
177 c p_{j} = p_{j}/betaj | |
178 c 3) r_{j} = OP*v_{j} where OP is defined as in znaupd | |
179 c For shift-invert mode p_{j} = B*v_{j} is already available. | |
180 c wnorm = || OP*v_{j} || | |
181 c 4) Compute the j-th step residual vector. | |
182 c w_{j} = V_{j}^T * B * OP * v_{j} | |
183 c r_{j} = OP*v_{j} - V_{j} * w_{j} | |
184 c H(:,j) = w_{j}; | |
185 c H(j,j-1) = rnorm | |
186 c rnorm = || r_(j) || | |
187 c If (rnorm > 0.717*wnorm) accept step and go back to 1) | |
188 c 5) Re-orthogonalization step: | |
189 c s = V_{j}'*B*r_{j} | |
190 c r_{j} = r_{j} - V_{j}*s; rnorm1 = || r_{j} || | |
191 c alphaj = alphaj + s_{j}; | |
192 c 6) Iterative refinement step: | |
193 c If (rnorm1 > 0.717*rnorm) then | |
194 c rnorm = rnorm1 | |
195 c accept step and go back to 1) | |
196 c Else | |
197 c rnorm = rnorm1 | |
198 c If this is the first time in step 6), go to 5) | |
199 c Else r_{j} lies in the span of V_{j} numerically. | |
200 c Set r_{j} = 0 and rnorm = 0; go to 1) | |
201 c EndIf | |
202 c End Do | |
203 c | |
204 c\EndLib | |
205 c | |
206 c----------------------------------------------------------------------- | |
207 c | |
208 subroutine znaitr | |
209 & (ido, bmat, n, k, np, nb, resid, rnorm, v, ldv, h, ldh, | |
210 & ipntr, workd, info) | |
211 c | |
212 c %----------------------------------------------------% | |
213 c | Include files for debugging and timing information | | |
214 c %----------------------------------------------------% | |
215 c | |
216 include 'debug.h' | |
217 include 'stat.h' | |
218 c | |
219 c %------------------% | |
220 c | Scalar Arguments | | |
221 c %------------------% | |
222 c | |
223 character bmat*1 | |
224 integer ido, info, k, ldh, ldv, n, nb, np | |
225 Double precision | |
226 & rnorm | |
227 c | |
228 c %-----------------% | |
229 c | Array Arguments | | |
230 c %-----------------% | |
231 c | |
232 integer ipntr(3) | |
233 Complex*16 | |
234 & h(ldh,k+np), resid(n), v(ldv,k+np), workd(3*n) | |
235 c | |
236 c %------------% | |
237 c | Parameters | | |
238 c %------------% | |
239 c | |
240 Complex*16 | |
241 & one, zero | |
242 Double precision | |
243 & rone, rzero | |
244 parameter (one = (1.0D+0, 0.0D+0), zero = (0.0D+0, 0.0D+0), | |
245 & rone = 1.0D+0, rzero = 0.0D+0) | |
246 c | |
247 c %--------------% | |
248 c | Local Arrays | | |
249 c %--------------% | |
250 c | |
251 Double precision | |
252 & rtemp(2) | |
253 c | |
254 c %---------------% | |
255 c | Local Scalars | | |
256 c %---------------% | |
257 c | |
258 logical first, orth1, orth2, rstart, step3, step4 | |
259 integer ierr, i, infol, ipj, irj, ivj, iter, itry, j, msglvl, | |
260 & jj | |
261 Double precision | |
262 & ovfl, smlnum, tst1, ulp, unfl, betaj, | |
263 & temp1, rnorm1, wnorm | |
264 Complex*16 | |
265 & cnorm | |
266 c | |
267 save first, orth1, orth2, rstart, step3, step4, | |
268 & ierr, ipj, irj, ivj, iter, itry, j, msglvl, ovfl, | |
269 & betaj, rnorm1, smlnum, ulp, unfl, wnorm | |
270 c | |
271 c %----------------------% | |
272 c | External Subroutines | | |
273 c %----------------------% | |
274 c | |
275 external zaxpy, zcopy, zscal, zdscal, zgemv, zgetv0, | |
276 & dlabad, zvout, zmout, ivout, arscnd | |
277 c | |
278 c %--------------------% | |
279 c | External Functions | | |
280 c %--------------------% | |
281 c | |
282 Complex*16 | |
283 & zdotc | |
284 Double precision | |
285 & dlamch, dznrm2, zlanhs, dlapy2 | |
286 external zdotc, dznrm2, zlanhs, dlamch, dlapy2 | |
287 c | |
288 c %---------------------% | |
289 c | Intrinsic Functions | | |
290 c %---------------------% | |
291 c | |
292 intrinsic dimag, dble, max, sqrt | |
293 c | |
294 c %-----------------% | |
295 c | Data statements | | |
296 c %-----------------% | |
297 c | |
298 data first / .true. / | |
299 c | |
300 c %-----------------------% | |
301 c | Executable Statements | | |
302 c %-----------------------% | |
303 c | |
304 if (first) then | |
305 c | |
306 c %-----------------------------------------% | |
307 c | Set machine-dependent constants for the | | |
308 c | the splitting and deflation criterion. | | |
309 c | If norm(H) <= sqrt(OVFL), | | |
310 c | overflow should not occur. | | |
311 c | REFERENCE: LAPACK subroutine zlahqr | | |
312 c %-----------------------------------------% | |
313 c | |
314 unfl = dlamch( 'safe minimum' ) | |
315 ovfl = dble(one / unfl) | |
316 call dlabad( unfl, ovfl ) | |
317 ulp = dlamch( 'precision' ) | |
318 smlnum = unfl*( n / ulp ) | |
319 first = .false. | |
320 end if | |
321 c | |
322 if (ido .eq. 0) then | |
323 c | |
324 c %-------------------------------% | |
325 c | Initialize timing statistics | | |
326 c | & message level for debugging | | |
327 c %-------------------------------% | |
328 c | |
329 call arscnd (t0) | |
330 msglvl = mcaitr | |
331 c | |
332 c %------------------------------% | |
333 c | Initial call to this routine | | |
334 c %------------------------------% | |
335 c | |
336 info = 0 | |
337 step3 = .false. | |
338 step4 = .false. | |
339 rstart = .false. | |
340 orth1 = .false. | |
341 orth2 = .false. | |
342 j = k + 1 | |
343 ipj = 1 | |
344 irj = ipj + n | |
345 ivj = irj + n | |
346 end if | |
347 c | |
348 c %-------------------------------------------------% | |
349 c | When in reverse communication mode one of: | | |
350 c | STEP3, STEP4, ORTH1, ORTH2, RSTART | | |
351 c | will be .true. when .... | | |
352 c | STEP3: return from computing OP*v_{j}. | | |
353 c | STEP4: return from computing B-norm of OP*v_{j} | | |
354 c | ORTH1: return from computing B-norm of r_{j+1} | | |
355 c | ORTH2: return from computing B-norm of | | |
356 c | correction to the residual vector. | | |
357 c | RSTART: return from OP computations needed by | | |
358 c | zgetv0. | | |
359 c %-------------------------------------------------% | |
360 c | |
361 if (step3) go to 50 | |
362 if (step4) go to 60 | |
363 if (orth1) go to 70 | |
364 if (orth2) go to 90 | |
365 if (rstart) go to 30 | |
366 c | |
367 c %-----------------------------% | |
368 c | Else this is the first step | | |
369 c %-----------------------------% | |
370 c | |
371 c %--------------------------------------------------------------% | |
372 c | | | |
373 c | A R N O L D I I T E R A T I O N L O O P | | |
374 c | | | |
375 c | Note: B*r_{j-1} is already in WORKD(1:N)=WORKD(IPJ:IPJ+N-1) | | |
376 c %--------------------------------------------------------------% | |
377 | |
378 1000 continue | |
379 c | |
380 if (msglvl .gt. 1) then | |
381 call ivout (logfil, 1, j, ndigit, | |
382 & '_naitr: generating Arnoldi vector number') | |
383 call dvout (logfil, 1, rnorm, ndigit, | |
384 & '_naitr: B-norm of the current residual is') | |
385 end if | |
386 c | |
387 c %---------------------------------------------------% | |
388 c | STEP 1: Check if the B norm of j-th residual | | |
389 c | vector is zero. Equivalent to determine whether | | |
390 c | an exact j-step Arnoldi factorization is present. | | |
391 c %---------------------------------------------------% | |
392 c | |
393 betaj = rnorm | |
394 if (rnorm .gt. rzero) go to 40 | |
395 c | |
396 c %---------------------------------------------------% | |
397 c | Invariant subspace found, generate a new starting | | |
398 c | vector which is orthogonal to the current Arnoldi | | |
399 c | basis and continue the iteration. | | |
400 c %---------------------------------------------------% | |
401 c | |
402 if (msglvl .gt. 0) then | |
403 call ivout (logfil, 1, j, ndigit, | |
404 & '_naitr: ****** RESTART AT STEP ******') | |
405 end if | |
406 c | |
407 c %---------------------------------------------% | |
408 c | ITRY is the loop variable that controls the | | |
409 c | maximum amount of times that a restart is | | |
410 c | attempted. NRSTRT is used by stat.h | | |
411 c %---------------------------------------------% | |
412 c | |
413 betaj = rzero | |
414 nrstrt = nrstrt + 1 | |
415 itry = 1 | |
416 20 continue | |
417 rstart = .true. | |
418 ido = 0 | |
419 30 continue | |
420 c | |
421 c %--------------------------------------% | |
422 c | If in reverse communication mode and | | |
423 c | RSTART = .true. flow returns here. | | |
424 c %--------------------------------------% | |
425 c | |
426 call zgetv0 (ido, bmat, itry, .false., n, j, v, ldv, | |
427 & resid, rnorm, ipntr, workd, ierr) | |
428 if (ido .ne. 99) go to 9000 | |
429 if (ierr .lt. 0) then | |
430 itry = itry + 1 | |
431 if (itry .le. 3) go to 20 | |
432 c | |
433 c %------------------------------------------------% | |
434 c | Give up after several restart attempts. | | |
435 c | Set INFO to the size of the invariant subspace | | |
436 c | which spans OP and exit. | | |
437 c %------------------------------------------------% | |
438 c | |
439 info = j - 1 | |
440 call arscnd (t1) | |
441 tcaitr = tcaitr + (t1 - t0) | |
442 ido = 99 | |
443 go to 9000 | |
444 end if | |
445 c | |
446 40 continue | |
447 c | |
448 c %---------------------------------------------------------% | |
449 c | STEP 2: v_{j} = r_{j-1}/rnorm and p_{j} = p_{j}/rnorm | | |
450 c | Note that p_{j} = B*r_{j-1}. In order to avoid overflow | | |
451 c | when reciprocating a small RNORM, test against lower | | |
452 c | machine bound. | | |
453 c %---------------------------------------------------------% | |
454 c | |
455 call zcopy (n, resid, 1, v(1,j), 1) | |
456 if ( rnorm .ge. unfl) then | |
457 temp1 = rone / rnorm | |
458 call zdscal (n, temp1, v(1,j), 1) | |
459 call zdscal (n, temp1, workd(ipj), 1) | |
460 else | |
461 c | |
462 c %-----------------------------------------% | |
463 c | To scale both v_{j} and p_{j} carefully | | |
464 c | use LAPACK routine zlascl | | |
465 c %-----------------------------------------% | |
466 c | |
467 call zlascl ('General', i, i, rnorm, rone, | |
468 & n, 1, v(1,j), n, infol) | |
469 call zlascl ('General', i, i, rnorm, rone, | |
470 & n, 1, workd(ipj), n, infol) | |
471 end if | |
472 c | |
473 c %------------------------------------------------------% | |
474 c | STEP 3: r_{j} = OP*v_{j}; Note that p_{j} = B*v_{j} | | |
475 c | Note that this is not quite yet r_{j}. See STEP 4 | | |
476 c %------------------------------------------------------% | |
477 c | |
478 step3 = .true. | |
479 nopx = nopx + 1 | |
480 call arscnd (t2) | |
481 call zcopy (n, v(1,j), 1, workd(ivj), 1) | |
482 ipntr(1) = ivj | |
483 ipntr(2) = irj | |
484 ipntr(3) = ipj | |
485 ido = 1 | |
486 c | |
487 c %-----------------------------------% | |
488 c | Exit in order to compute OP*v_{j} | | |
489 c %-----------------------------------% | |
490 c | |
491 go to 9000 | |
492 50 continue | |
493 c | |
494 c %----------------------------------% | |
495 c | Back from reverse communication; | | |
496 c | WORKD(IRJ:IRJ+N-1) := OP*v_{j} | | |
497 c | if step3 = .true. | | |
498 c %----------------------------------% | |
499 c | |
500 call arscnd (t3) | |
501 tmvopx = tmvopx + (t3 - t2) | |
502 | |
503 step3 = .false. | |
504 c | |
505 c %------------------------------------------% | |
506 c | Put another copy of OP*v_{j} into RESID. | | |
507 c %------------------------------------------% | |
508 c | |
509 call zcopy (n, workd(irj), 1, resid, 1) | |
510 c | |
511 c %---------------------------------------% | |
512 c | STEP 4: Finish extending the Arnoldi | | |
513 c | factorization to length j. | | |
514 c %---------------------------------------% | |
515 c | |
516 call arscnd (t2) | |
517 if (bmat .eq. 'G') then | |
518 nbx = nbx + 1 | |
519 step4 = .true. | |
520 ipntr(1) = irj | |
521 ipntr(2) = ipj | |
522 ido = 2 | |
523 c | |
524 c %-------------------------------------% | |
525 c | Exit in order to compute B*OP*v_{j} | | |
526 c %-------------------------------------% | |
527 c | |
528 go to 9000 | |
529 else if (bmat .eq. 'I') then | |
530 call zcopy (n, resid, 1, workd(ipj), 1) | |
531 end if | |
532 60 continue | |
533 c | |
534 c %----------------------------------% | |
535 c | Back from reverse communication; | | |
536 c | WORKD(IPJ:IPJ+N-1) := B*OP*v_{j} | | |
537 c | if step4 = .true. | | |
538 c %----------------------------------% | |
539 c | |
540 if (bmat .eq. 'G') then | |
541 call arscnd (t3) | |
542 tmvbx = tmvbx + (t3 - t2) | |
543 end if | |
544 c | |
545 step4 = .false. | |
546 c | |
547 c %-------------------------------------% | |
548 c | The following is needed for STEP 5. | | |
549 c | Compute the B-norm of OP*v_{j}. | | |
550 c %-------------------------------------% | |
551 c | |
552 if (bmat .eq. 'G') then | |
553 cnorm = zdotc (n, resid, 1, workd(ipj), 1) | |
554 wnorm = sqrt( dlapy2(dble(cnorm),dimag(cnorm)) ) | |
555 else if (bmat .eq. 'I') then | |
556 wnorm = dznrm2(n, resid, 1) | |
557 end if | |
558 c | |
559 c %-----------------------------------------% | |
560 c | Compute the j-th residual corresponding | | |
561 c | to the j step factorization. | | |
562 c | Use Classical Gram Schmidt and compute: | | |
563 c | w_{j} <- V_{j}^T * B * OP * v_{j} | | |
564 c | r_{j} <- OP*v_{j} - V_{j} * w_{j} | | |
565 c %-----------------------------------------% | |
566 c | |
567 c | |
568 c %------------------------------------------% | |
569 c | Compute the j Fourier coefficients w_{j} | | |
570 c | WORKD(IPJ:IPJ+N-1) contains B*OP*v_{j}. | | |
571 c %------------------------------------------% | |
572 c | |
573 call zgemv ('C', n, j, one, v, ldv, workd(ipj), 1, | |
574 & zero, h(1,j), 1) | |
575 c | |
576 c %--------------------------------------% | |
577 c | Orthogonalize r_{j} against V_{j}. | | |
578 c | RESID contains OP*v_{j}. See STEP 3. | | |
579 c %--------------------------------------% | |
580 c | |
581 call zgemv ('N', n, j, -one, v, ldv, h(1,j), 1, | |
582 & one, resid, 1) | |
583 c | |
584 if (j .gt. 1) h(j,j-1) = dcmplx(betaj, rzero) | |
585 c | |
586 call arscnd (t4) | |
587 c | |
588 orth1 = .true. | |
589 c | |
590 call arscnd (t2) | |
591 if (bmat .eq. 'G') then | |
592 nbx = nbx + 1 | |
593 call zcopy (n, resid, 1, workd(irj), 1) | |
594 ipntr(1) = irj | |
595 ipntr(2) = ipj | |
596 ido = 2 | |
597 c | |
598 c %----------------------------------% | |
599 c | Exit in order to compute B*r_{j} | | |
600 c %----------------------------------% | |
601 c | |
602 go to 9000 | |
603 else if (bmat .eq. 'I') then | |
604 call zcopy (n, resid, 1, workd(ipj), 1) | |
605 end if | |
606 70 continue | |
607 c | |
608 c %---------------------------------------------------% | |
609 c | Back from reverse communication if ORTH1 = .true. | | |
610 c | WORKD(IPJ:IPJ+N-1) := B*r_{j}. | | |
611 c %---------------------------------------------------% | |
612 c | |
613 if (bmat .eq. 'G') then | |
614 call arscnd (t3) | |
615 tmvbx = tmvbx + (t3 - t2) | |
616 end if | |
617 c | |
618 orth1 = .false. | |
619 c | |
620 c %------------------------------% | |
621 c | Compute the B-norm of r_{j}. | | |
622 c %------------------------------% | |
623 c | |
624 if (bmat .eq. 'G') then | |
625 cnorm = zdotc (n, resid, 1, workd(ipj), 1) | |
626 rnorm = sqrt( dlapy2(dble(cnorm),dimag(cnorm)) ) | |
627 else if (bmat .eq. 'I') then | |
628 rnorm = dznrm2(n, resid, 1) | |
629 end if | |
630 c | |
631 c %-----------------------------------------------------------% | |
632 c | STEP 5: Re-orthogonalization / Iterative refinement phase | | |
633 c | Maximum NITER_ITREF tries. | | |
634 c | | | |
635 c | s = V_{j}^T * B * r_{j} | | |
636 c | r_{j} = r_{j} - V_{j}*s | | |
637 c | alphaj = alphaj + s_{j} | | |
638 c | | | |
639 c | The stopping criteria used for iterative refinement is | | |
640 c | discussed in Parlett's book SEP, page 107 and in Gragg & | | |
641 c | Reichel ACM TOMS paper; Algorithm 686, Dec. 1990. | | |
642 c | Determine if we need to correct the residual. The goal is | | |
643 c | to enforce ||v(:,1:j)^T * r_{j}|| .le. eps * || r_{j} || | | |
644 c | The following test determines whether the sine of the | | |
645 c | angle between OP*x and the computed residual is less | | |
646 c | than or equal to 0.717. | | |
647 c %-----------------------------------------------------------% | |
648 c | |
649 if ( rnorm .gt. 0.717*wnorm ) go to 100 | |
650 c | |
651 iter = 0 | |
652 nrorth = nrorth + 1 | |
653 c | |
654 c %---------------------------------------------------% | |
655 c | Enter the Iterative refinement phase. If further | | |
656 c | refinement is necessary, loop back here. The loop | | |
657 c | variable is ITER. Perform a step of Classical | | |
658 c | Gram-Schmidt using all the Arnoldi vectors V_{j} | | |
659 c %---------------------------------------------------% | |
660 c | |
661 80 continue | |
662 c | |
663 if (msglvl .gt. 2) then | |
664 rtemp(1) = wnorm | |
665 rtemp(2) = rnorm | |
666 call dvout (logfil, 2, rtemp, ndigit, | |
667 & '_naitr: re-orthogonalization; wnorm and rnorm are') | |
668 call zvout (logfil, j, h(1,j), ndigit, | |
669 & '_naitr: j-th column of H') | |
670 end if | |
671 c | |
672 c %----------------------------------------------------% | |
673 c | Compute V_{j}^T * B * r_{j}. | | |
674 c | WORKD(IRJ:IRJ+J-1) = v(:,1:J)'*WORKD(IPJ:IPJ+N-1). | | |
675 c %----------------------------------------------------% | |
676 c | |
677 call zgemv ('C', n, j, one, v, ldv, workd(ipj), 1, | |
678 & zero, workd(irj), 1) | |
679 c | |
680 c %---------------------------------------------% | |
681 c | Compute the correction to the residual: | | |
682 c | r_{j} = r_{j} - V_{j} * WORKD(IRJ:IRJ+J-1). | | |
683 c | The correction to H is v(:,1:J)*H(1:J,1:J) | | |
684 c | + v(:,1:J)*WORKD(IRJ:IRJ+J-1)*e'_j. | | |
685 c %---------------------------------------------% | |
686 c | |
687 call zgemv ('N', n, j, -one, v, ldv, workd(irj), 1, | |
688 & one, resid, 1) | |
689 call zaxpy (j, one, workd(irj), 1, h(1,j), 1) | |
690 c | |
691 orth2 = .true. | |
692 call arscnd (t2) | |
693 if (bmat .eq. 'G') then | |
694 nbx = nbx + 1 | |
695 call zcopy (n, resid, 1, workd(irj), 1) | |
696 ipntr(1) = irj | |
697 ipntr(2) = ipj | |
698 ido = 2 | |
699 c | |
700 c %-----------------------------------% | |
701 c | Exit in order to compute B*r_{j}. | | |
702 c | r_{j} is the corrected residual. | | |
703 c %-----------------------------------% | |
704 c | |
705 go to 9000 | |
706 else if (bmat .eq. 'I') then | |
707 call zcopy (n, resid, 1, workd(ipj), 1) | |
708 end if | |
709 90 continue | |
710 c | |
711 c %---------------------------------------------------% | |
712 c | Back from reverse communication if ORTH2 = .true. | | |
713 c %---------------------------------------------------% | |
714 c | |
715 if (bmat .eq. 'G') then | |
716 call arscnd (t3) | |
717 tmvbx = tmvbx + (t3 - t2) | |
718 end if | |
719 c | |
720 c %-----------------------------------------------------% | |
721 c | Compute the B-norm of the corrected residual r_{j}. | | |
722 c %-----------------------------------------------------% | |
723 c | |
724 if (bmat .eq. 'G') then | |
725 cnorm = zdotc (n, resid, 1, workd(ipj), 1) | |
726 rnorm1 = sqrt( dlapy2(dble(cnorm),dimag(cnorm)) ) | |
727 else if (bmat .eq. 'I') then | |
728 rnorm1 = dznrm2(n, resid, 1) | |
729 end if | |
730 c | |
731 if (msglvl .gt. 0 .and. iter .gt. 0 ) then | |
732 call ivout (logfil, 1, j, ndigit, | |
733 & '_naitr: Iterative refinement for Arnoldi residual') | |
734 if (msglvl .gt. 2) then | |
735 rtemp(1) = rnorm | |
736 rtemp(2) = rnorm1 | |
737 call dvout (logfil, 2, rtemp, ndigit, | |
738 & '_naitr: iterative refinement ; rnorm and rnorm1 are') | |
739 end if | |
740 end if | |
741 c | |
742 c %-----------------------------------------% | |
743 c | Determine if we need to perform another | | |
744 c | step of re-orthogonalization. | | |
745 c %-----------------------------------------% | |
746 c | |
747 if ( rnorm1 .gt. 0.717*rnorm ) then | |
748 c | |
749 c %---------------------------------------% | |
750 c | No need for further refinement. | | |
751 c | The cosine of the angle between the | | |
752 c | corrected residual vector and the old | | |
753 c | residual vector is greater than 0.717 | | |
754 c | In other words the corrected residual | | |
755 c | and the old residual vector share an | | |
756 c | angle of less than arcCOS(0.717) | | |
757 c %---------------------------------------% | |
758 c | |
759 rnorm = rnorm1 | |
760 c | |
761 else | |
762 c | |
763 c %-------------------------------------------% | |
764 c | Another step of iterative refinement step | | |
765 c | is required. NITREF is used by stat.h | | |
766 c %-------------------------------------------% | |
767 c | |
768 nitref = nitref + 1 | |
769 rnorm = rnorm1 | |
770 iter = iter + 1 | |
771 if (iter .le. 1) go to 80 | |
772 c | |
773 c %-------------------------------------------------% | |
774 c | Otherwise RESID is numerically in the span of V | | |
775 c %-------------------------------------------------% | |
776 c | |
777 do 95 jj = 1, n | |
778 resid(jj) = zero | |
779 95 continue | |
780 rnorm = rzero | |
781 end if | |
782 c | |
783 c %----------------------------------------------% | |
784 c | Branch here directly if iterative refinement | | |
785 c | wasn't necessary or after at most NITER_REF | | |
786 c | steps of iterative refinement. | | |
787 c %----------------------------------------------% | |
788 c | |
789 100 continue | |
790 c | |
791 rstart = .false. | |
792 orth2 = .false. | |
793 c | |
794 call arscnd (t5) | |
795 titref = titref + (t5 - t4) | |
796 c | |
797 c %------------------------------------% | |
798 c | STEP 6: Update j = j+1; Continue | | |
799 c %------------------------------------% | |
800 c | |
801 j = j + 1 | |
802 if (j .gt. k+np) then | |
803 call arscnd (t1) | |
804 tcaitr = tcaitr + (t1 - t0) | |
805 ido = 99 | |
806 do 110 i = max(1,k), k+np-1 | |
807 c | |
808 c %--------------------------------------------% | |
809 c | Check for splitting and deflation. | | |
810 c | Use a standard test as in the QR algorithm | | |
811 c | REFERENCE: LAPACK subroutine zlahqr | | |
812 c %--------------------------------------------% | |
813 c | |
814 tst1 = dlapy2(dble(h(i,i)),dimag(h(i,i))) | |
815 & + dlapy2(dble(h(i+1,i+1)), dimag(h(i+1,i+1))) | |
816 if( tst1.eq.dble(zero) ) | |
817 & tst1 = zlanhs( '1', k+np, h, ldh, workd(n+1) ) | |
818 if( dlapy2(dble(h(i+1,i)),dimag(h(i+1,i))) .le. | |
819 & max( ulp*tst1, smlnum ) ) | |
820 & h(i+1,i) = zero | |
821 110 continue | |
822 c | |
823 if (msglvl .gt. 2) then | |
824 call zmout (logfil, k+np, k+np, h, ldh, ndigit, | |
825 & '_naitr: Final upper Hessenberg matrix H of order K+NP') | |
826 end if | |
827 c | |
828 go to 9000 | |
829 end if | |
830 c | |
831 c %--------------------------------------------------------% | |
832 c | Loop back to extend the factorization by another step. | | |
833 c %--------------------------------------------------------% | |
834 c | |
835 go to 1000 | |
836 c | |
837 c %---------------------------------------------------------------% | |
838 c | | | |
839 c | E N D O F M A I N I T E R A T I O N L O O P | | |
840 c | | | |
841 c %---------------------------------------------------------------% | |
842 c | |
843 9000 continue | |
844 return | |
845 c | |
846 c %---------------% | |
847 c | End of znaitr | | |
848 c %---------------% | |
849 c | |
850 end |