diff libcruft/arpack/src/znaitr.f @ 12274:9f5d2ef078e8 release-3-4-x

import ARPACK sources to libcruft from Debian package libarpack2 2.1+parpack96.dfsg-3+b1
author John W. Eaton <jwe@octave.org>
date Fri, 28 Jan 2011 14:04:33 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libcruft/arpack/src/znaitr.f	Fri Jan 28 14:04:33 2011 -0500
@@ -0,0 +1,850 @@
+c\BeginDoc
+c
+c\Name: znaitr
+c
+c\Description: 
+c  Reverse communication interface for applying NP additional steps to 
+c  a K step nonsymmetric Arnoldi factorization.
+c
+c  Input:  OP*V_{k}  -  V_{k}*H = r_{k}*e_{k}^T
+c
+c          with (V_{k}^T)*B*V_{k} = I, (V_{k}^T)*B*r_{k} = 0.
+c
+c  Output: OP*V_{k+p}  -  V_{k+p}*H = r_{k+p}*e_{k+p}^T
+c
+c          with (V_{k+p}^T)*B*V_{k+p} = I, (V_{k+p}^T)*B*r_{k+p} = 0.
+c
+c  where OP and B are as in znaupd.  The B-norm of r_{k+p} is also
+c  computed and returned.
+c
+c\Usage:
+c  call znaitr
+c     ( IDO, BMAT, N, K, NP, NB, RESID, RNORM, V, LDV, H, LDH, 
+c       IPNTR, WORKD, INFO )
+c
+c\Arguments
+c  IDO     Integer.  (INPUT/OUTPUT)
+c          Reverse communication flag.
+c          -------------------------------------------------------------
+c          IDO =  0: first call to the reverse communication interface
+c          IDO = -1: compute  Y = OP * X  where
+c                    IPNTR(1) is the pointer into WORK for X,
+c                    IPNTR(2) is the pointer into WORK for Y.
+c                    This is for the restart phase to force the new
+c                    starting vector into the range of OP.
+c          IDO =  1: compute  Y = OP * X  where
+c                    IPNTR(1) is the pointer into WORK for X,
+c                    IPNTR(2) is the pointer into WORK for Y,
+c                    IPNTR(3) is the pointer into WORK for B * X.
+c          IDO =  2: compute  Y = B * X  where
+c                    IPNTR(1) is the pointer into WORK for X,
+c                    IPNTR(2) is the pointer into WORK for Y.
+c          IDO = 99: done
+c          -------------------------------------------------------------
+c          When the routine is used in the "shift-and-invert" mode, the
+c          vector B * Q is already available and do not need to be
+c          recomputed in forming OP * Q.
+c
+c  BMAT    Character*1.  (INPUT)
+c          BMAT specifies the type of the matrix B that defines the
+c          semi-inner product for the operator OP.  See znaupd.
+c          B = 'I' -> standard eigenvalue problem A*x = lambda*x
+c          B = 'G' -> generalized eigenvalue problem A*x = lambda*M**x
+c
+c  N       Integer.  (INPUT)
+c          Dimension of the eigenproblem.
+c
+c  K       Integer.  (INPUT)
+c          Current size of V and H.
+c
+c  NP      Integer.  (INPUT)
+c          Number of additional Arnoldi steps to take.
+c
+c  NB      Integer.  (INPUT)
+c          Blocksize to be used in the recurrence.          
+c          Only work for NB = 1 right now.  The goal is to have a 
+c          program that implement both the block and non-block method.
+c
+c  RESID   Complex*16 array of length N.  (INPUT/OUTPUT)
+c          On INPUT:  RESID contains the residual vector r_{k}.
+c          On OUTPUT: RESID contains the residual vector r_{k+p}.
+c
+c  RNORM   Double precision scalar.  (INPUT/OUTPUT)
+c          B-norm of the starting residual on input.
+c          B-norm of the updated residual r_{k+p} on output.
+c
+c  V       Complex*16 N by K+NP array.  (INPUT/OUTPUT)
+c          On INPUT:  V contains the Arnoldi vectors in the first K 
+c          columns.
+c          On OUTPUT: V contains the new NP Arnoldi vectors in the next
+c          NP columns.  The first K columns are unchanged.
+c
+c  LDV     Integer.  (INPUT)
+c          Leading dimension of V exactly as declared in the calling 
+c          program.
+c
+c  H       Complex*16 (K+NP) by (K+NP) array.  (INPUT/OUTPUT)
+c          H is used to store the generated upper Hessenberg matrix.
+c
+c  LDH     Integer.  (INPUT)
+c          Leading dimension of H exactly as declared in the calling 
+c          program.
+c
+c  IPNTR   Integer array of length 3.  (OUTPUT)
+c          Pointer to mark the starting locations in the WORK for 
+c          vectors used by the Arnoldi iteration.
+c          -------------------------------------------------------------
+c          IPNTR(1): pointer to the current operand vector X.
+c          IPNTR(2): pointer to the current result vector Y.
+c          IPNTR(3): pointer to the vector B * X when used in the 
+c                    shift-and-invert mode.  X is the current operand.
+c          -------------------------------------------------------------
+c          
+c  WORKD   Complex*16 work array of length 3*N.  (REVERSE COMMUNICATION)
+c          Distributed array to be used in the basic Arnoldi iteration
+c          for reverse communication.  The calling program should not 
+c          use WORKD as temporary workspace during the iteration !!!!!!
+c          On input, WORKD(1:N) = B*RESID and is used to save some 
+c          computation at the first step.
+c
+c  INFO    Integer.  (OUTPUT)
+c          = 0: Normal exit.
+c          > 0: Size of the spanning invariant subspace of OP found.
+c
+c\EndDoc
+c
+c-----------------------------------------------------------------------
+c
+c\BeginLib
+c
+c\Local variables:
+c     xxxxxx  Complex*16
+c
+c\References:
+c  1. D.C. Sorensen, "Implicit Application of Polynomial Filters in
+c     a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992),
+c     pp 357-385.
+c  2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly 
+c     Restarted Arnoldi Iteration", Rice University Technical Report
+c     TR95-13, Department of Computational and Applied Mathematics.
+c
+c\Routines called:
+c     zgetv0  ARPACK routine to generate the initial vector.
+c     ivout   ARPACK utility routine that prints integers.
+c     arscnd  ARPACK utility routine for timing.
+c     zmout   ARPACK utility routine that prints matrices
+c     zvout   ARPACK utility routine that prints vectors.
+c     zlanhs  LAPACK routine that computes various norms of a matrix.
+c     zlascl  LAPACK routine for careful scaling of a matrix.
+c     dlabad  LAPACK routine for defining the underflow and overflow
+c             limits.
+c     dlamch  LAPACK routine that determines machine constants.
+c     dlapy2  LAPACK routine to compute sqrt(x**2+y**2) carefully.
+c     zgemv   Level 2 BLAS routine for matrix vector multiplication.
+c     zaxpy   Level 1 BLAS that computes a vector triad.
+c     zcopy   Level 1 BLAS that copies one vector to another .
+c     zdotc   Level 1 BLAS that computes the scalar product of two vectors. 
+c     zscal   Level 1 BLAS that scales a vector.
+c     zdscal  Level 1 BLAS that scales a complex vector by a real number. 
+c     dznrm2  Level 1 BLAS that computes the norm of a vector.
+c
+c\Author
+c     Danny Sorensen               Phuong Vu
+c     Richard Lehoucq              CRPC / Rice University
+c     Dept. of Computational &     Houston, Texas 
+c     Applied Mathematics 
+c     Rice University           
+c     Houston, Texas 
+c 
+c\SCCS Information: @(#)
+c FILE: naitr.F   SID: 2.3   DATE OF SID: 8/27/96   RELEASE: 2
+c
+c\Remarks
+c  The algorithm implemented is:
+c  
+c  restart = .false.
+c  Given V_{k} = [v_{1}, ..., v_{k}], r_{k}; 
+c  r_{k} contains the initial residual vector even for k = 0;
+c  Also assume that rnorm = || B*r_{k} || and B*r_{k} are already 
+c  computed by the calling program.
+c
+c  betaj = rnorm ; p_{k+1} = B*r_{k} ;
+c  For  j = k+1, ..., k+np  Do
+c     1) if ( betaj < tol ) stop or restart depending on j.
+c        ( At present tol is zero )
+c        if ( restart ) generate a new starting vector.
+c     2) v_{j} = r(j-1)/betaj;  V_{j} = [V_{j-1}, v_{j}];  
+c        p_{j} = p_{j}/betaj
+c     3) r_{j} = OP*v_{j} where OP is defined as in znaupd
+c        For shift-invert mode p_{j} = B*v_{j} is already available.
+c        wnorm = || OP*v_{j} ||
+c     4) Compute the j-th step residual vector.
+c        w_{j} =  V_{j}^T * B * OP * v_{j}
+c        r_{j} =  OP*v_{j} - V_{j} * w_{j}
+c        H(:,j) = w_{j};
+c        H(j,j-1) = rnorm
+c        rnorm = || r_(j) ||
+c        If (rnorm > 0.717*wnorm) accept step and go back to 1)
+c     5) Re-orthogonalization step:
+c        s = V_{j}'*B*r_{j}
+c        r_{j} = r_{j} - V_{j}*s;  rnorm1 = || r_{j} ||
+c        alphaj = alphaj + s_{j};   
+c     6) Iterative refinement step:
+c        If (rnorm1 > 0.717*rnorm) then
+c           rnorm = rnorm1
+c           accept step and go back to 1)
+c        Else
+c           rnorm = rnorm1
+c           If this is the first time in step 6), go to 5)
+c           Else r_{j} lies in the span of V_{j} numerically.
+c              Set r_{j} = 0 and rnorm = 0; go to 1)
+c        EndIf 
+c  End Do
+c
+c\EndLib
+c
+c-----------------------------------------------------------------------
+c
+      subroutine znaitr
+     &   (ido, bmat, n, k, np, nb, resid, rnorm, v, ldv, h, ldh, 
+     &    ipntr, workd, info)
+c
+c     %----------------------------------------------------%
+c     | Include files for debugging and timing information |
+c     %----------------------------------------------------%
+c
+      include   'debug.h'
+      include   'stat.h'
+c
+c     %------------------%
+c     | Scalar Arguments |
+c     %------------------%
+c
+      character  bmat*1
+      integer    ido, info, k, ldh, ldv, n, nb, np
+      Double precision
+     &           rnorm
+c
+c     %-----------------%
+c     | Array Arguments |
+c     %-----------------%
+c
+      integer    ipntr(3)
+      Complex*16
+     &           h(ldh,k+np), resid(n), v(ldv,k+np), workd(3*n)
+c
+c     %------------%
+c     | Parameters |
+c     %------------%
+c
+      Complex*16
+     &           one, zero
+      Double precision
+     &           rone, rzero
+      parameter (one = (1.0D+0, 0.0D+0), zero = (0.0D+0, 0.0D+0), 
+     &           rone = 1.0D+0, rzero = 0.0D+0)
+c
+c     %--------------%
+c     | Local Arrays |
+c     %--------------%
+c
+      Double precision
+     &           rtemp(2)
+c
+c     %---------------%
+c     | Local Scalars |
+c     %---------------%
+c
+      logical    first, orth1, orth2, rstart, step3, step4
+      integer    ierr, i, infol, ipj, irj, ivj, iter, itry, j, msglvl,
+     &           jj
+      Double precision            
+     &           ovfl, smlnum, tst1, ulp, unfl, betaj,
+     &           temp1, rnorm1, wnorm
+      Complex*16
+     &           cnorm
+c
+      save       first, orth1, orth2, rstart, step3, step4,
+     &           ierr, ipj, irj, ivj, iter, itry, j, msglvl, ovfl,
+     &           betaj, rnorm1, smlnum, ulp, unfl, wnorm
+c
+c     %----------------------%
+c     | External Subroutines |
+c     %----------------------%
+c
+      external   zaxpy, zcopy, zscal, zdscal, zgemv, zgetv0, 
+     &           dlabad, zvout, zmout, ivout, arscnd
+c
+c     %--------------------%
+c     | External Functions |
+c     %--------------------%
+c
+      Complex*16
+     &           zdotc 
+      Double precision            
+     &           dlamch,  dznrm2, zlanhs, dlapy2
+      external   zdotc, dznrm2, zlanhs, dlamch, dlapy2
+c
+c     %---------------------%
+c     | Intrinsic Functions |
+c     %---------------------%
+c
+      intrinsic  dimag, dble, max, sqrt 
+c
+c     %-----------------%
+c     | Data statements |
+c     %-----------------%
+c
+      data       first / .true. /
+c
+c     %-----------------------%
+c     | Executable Statements |
+c     %-----------------------%
+c
+      if (first) then
+c
+c        %-----------------------------------------%
+c        | Set machine-dependent constants for the |
+c        | the splitting and deflation criterion.  |
+c        | If norm(H) <= sqrt(OVFL),               |
+c        | overflow should not occur.              |
+c        | REFERENCE: LAPACK subroutine zlahqr     |
+c        %-----------------------------------------%
+c
+         unfl = dlamch( 'safe minimum' )
+         ovfl = dble(one / unfl)
+         call dlabad( unfl, ovfl )
+         ulp = dlamch( 'precision' )
+         smlnum = unfl*( n / ulp )
+         first = .false.
+      end if
+c
+      if (ido .eq. 0) then
+c 
+c        %-------------------------------%
+c        | Initialize timing statistics  |
+c        | & message level for debugging |
+c        %-------------------------------%
+c
+         call arscnd (t0)
+         msglvl = mcaitr
+c 
+c        %------------------------------%
+c        | Initial call to this routine |
+c        %------------------------------%
+c
+         info   = 0
+         step3  = .false.
+         step4  = .false.
+         rstart = .false.
+         orth1  = .false.
+         orth2  = .false.
+         j      = k + 1
+         ipj    = 1
+         irj    = ipj   + n
+         ivj    = irj   + n
+      end if
+c 
+c     %-------------------------------------------------%
+c     | When in reverse communication mode one of:      |
+c     | STEP3, STEP4, ORTH1, ORTH2, RSTART              |
+c     | will be .true. when ....                        |
+c     | STEP3: return from computing OP*v_{j}.          |
+c     | STEP4: return from computing B-norm of OP*v_{j} |
+c     | ORTH1: return from computing B-norm of r_{j+1}  |
+c     | ORTH2: return from computing B-norm of          |
+c     |        correction to the residual vector.       |
+c     | RSTART: return from OP computations needed by   |
+c     |         zgetv0.                                 |
+c     %-------------------------------------------------%
+c
+      if (step3)  go to 50
+      if (step4)  go to 60
+      if (orth1)  go to 70
+      if (orth2)  go to 90
+      if (rstart) go to 30
+c
+c     %-----------------------------%
+c     | Else this is the first step |
+c     %-----------------------------%
+c
+c     %--------------------------------------------------------------%
+c     |                                                              |
+c     |        A R N O L D I     I T E R A T I O N     L O O P       |
+c     |                                                              |
+c     | Note:  B*r_{j-1} is already in WORKD(1:N)=WORKD(IPJ:IPJ+N-1) |
+c     %--------------------------------------------------------------%
+ 
+ 1000 continue
+c
+         if (msglvl .gt. 1) then
+            call ivout (logfil, 1, j, ndigit, 
+     &                  '_naitr: generating Arnoldi vector number')
+            call dvout (logfil, 1, rnorm, ndigit, 
+     &                  '_naitr: B-norm of the current residual is')
+         end if
+c 
+c        %---------------------------------------------------%
+c        | STEP 1: Check if the B norm of j-th residual      |
+c        | vector is zero. Equivalent to determine whether   |
+c        | an exact j-step Arnoldi factorization is present. |
+c        %---------------------------------------------------%
+c
+         betaj = rnorm
+         if (rnorm .gt. rzero) go to 40
+c
+c           %---------------------------------------------------%
+c           | Invariant subspace found, generate a new starting |
+c           | vector which is orthogonal to the current Arnoldi |
+c           | basis and continue the iteration.                 |
+c           %---------------------------------------------------%
+c
+            if (msglvl .gt. 0) then
+               call ivout (logfil, 1, j, ndigit,
+     &                     '_naitr: ****** RESTART AT STEP ******')
+            end if
+c 
+c           %---------------------------------------------%
+c           | ITRY is the loop variable that controls the |
+c           | maximum amount of times that a restart is   |
+c           | attempted. NRSTRT is used by stat.h         |
+c           %---------------------------------------------%
+c 
+            betaj  = rzero
+            nrstrt = nrstrt + 1
+            itry   = 1
+   20       continue
+            rstart = .true.
+            ido    = 0
+   30       continue
+c
+c           %--------------------------------------%
+c           | If in reverse communication mode and |
+c           | RSTART = .true. flow returns here.   |
+c           %--------------------------------------%
+c
+            call zgetv0 (ido, bmat, itry, .false., n, j, v, ldv, 
+     &                   resid, rnorm, ipntr, workd, ierr)
+            if (ido .ne. 99) go to 9000
+            if (ierr .lt. 0) then
+               itry = itry + 1
+               if (itry .le. 3) go to 20
+c
+c              %------------------------------------------------%
+c              | Give up after several restart attempts.        |
+c              | Set INFO to the size of the invariant subspace |
+c              | which spans OP and exit.                       |
+c              %------------------------------------------------%
+c
+               info = j - 1
+               call arscnd (t1)
+               tcaitr = tcaitr + (t1 - t0)
+               ido = 99
+               go to 9000
+            end if
+c 
+   40    continue
+c
+c        %---------------------------------------------------------%
+c        | STEP 2:  v_{j} = r_{j-1}/rnorm and p_{j} = p_{j}/rnorm  |
+c        | Note that p_{j} = B*r_{j-1}. In order to avoid overflow |
+c        | when reciprocating a small RNORM, test against lower    |
+c        | machine bound.                                          |
+c        %---------------------------------------------------------%
+c
+         call zcopy (n, resid, 1, v(1,j), 1)
+         if ( rnorm .ge. unfl) then
+             temp1 = rone / rnorm
+             call zdscal (n, temp1, v(1,j), 1)
+             call zdscal (n, temp1, workd(ipj), 1)
+         else
+c
+c            %-----------------------------------------%
+c            | To scale both v_{j} and p_{j} carefully |
+c            | use LAPACK routine zlascl               |
+c            %-----------------------------------------%
+c
+             call zlascl ('General', i, i, rnorm, rone,
+     &                    n, 1, v(1,j), n, infol)
+             call zlascl ('General', i, i, rnorm, rone,  
+     &                    n, 1, workd(ipj), n, infol)
+         end if
+c
+c        %------------------------------------------------------%
+c        | STEP 3:  r_{j} = OP*v_{j}; Note that p_{j} = B*v_{j} |
+c        | Note that this is not quite yet r_{j}. See STEP 4    |
+c        %------------------------------------------------------%
+c
+         step3 = .true.
+         nopx  = nopx + 1
+         call arscnd (t2)
+         call zcopy (n, v(1,j), 1, workd(ivj), 1)
+         ipntr(1) = ivj
+         ipntr(2) = irj
+         ipntr(3) = ipj
+         ido = 1
+c 
+c        %-----------------------------------%
+c        | Exit in order to compute OP*v_{j} |
+c        %-----------------------------------%
+c 
+         go to 9000 
+   50    continue
+c 
+c        %----------------------------------%
+c        | Back from reverse communication; |
+c        | WORKD(IRJ:IRJ+N-1) := OP*v_{j}   |
+c        | if step3 = .true.                |
+c        %----------------------------------%
+c
+         call arscnd (t3)
+         tmvopx = tmvopx + (t3 - t2)
+ 
+         step3 = .false.
+c
+c        %------------------------------------------%
+c        | Put another copy of OP*v_{j} into RESID. |
+c        %------------------------------------------%
+c
+         call zcopy (n, workd(irj), 1, resid, 1)
+c 
+c        %---------------------------------------%
+c        | STEP 4:  Finish extending the Arnoldi |
+c        |          factorization to length j.   |
+c        %---------------------------------------%
+c
+         call arscnd (t2)
+         if (bmat .eq. 'G') then
+            nbx = nbx + 1
+            step4 = .true.
+            ipntr(1) = irj
+            ipntr(2) = ipj
+            ido = 2
+c 
+c           %-------------------------------------%
+c           | Exit in order to compute B*OP*v_{j} |
+c           %-------------------------------------%
+c 
+            go to 9000
+         else if (bmat .eq. 'I') then
+            call zcopy (n, resid, 1, workd(ipj), 1)
+         end if
+   60    continue
+c 
+c        %----------------------------------%
+c        | Back from reverse communication; |
+c        | WORKD(IPJ:IPJ+N-1) := B*OP*v_{j} |
+c        | if step4 = .true.                |
+c        %----------------------------------%
+c
+         if (bmat .eq. 'G') then
+            call arscnd (t3)
+            tmvbx = tmvbx + (t3 - t2)
+         end if
+c 
+         step4 = .false.
+c
+c        %-------------------------------------%
+c        | The following is needed for STEP 5. |
+c        | Compute the B-norm of OP*v_{j}.     |
+c        %-------------------------------------%
+c
+         if (bmat .eq. 'G') then  
+             cnorm = zdotc (n, resid, 1, workd(ipj), 1)
+             wnorm = sqrt( dlapy2(dble(cnorm),dimag(cnorm)) )
+         else if (bmat .eq. 'I') then
+             wnorm = dznrm2(n, resid, 1)
+         end if
+c
+c        %-----------------------------------------%
+c        | Compute the j-th residual corresponding |
+c        | to the j step factorization.            |
+c        | Use Classical Gram Schmidt and compute: |
+c        | w_{j} <-  V_{j}^T * B * OP * v_{j}      |
+c        | r_{j} <-  OP*v_{j} - V_{j} * w_{j}      |
+c        %-----------------------------------------%
+c
+c
+c        %------------------------------------------%
+c        | Compute the j Fourier coefficients w_{j} |
+c        | WORKD(IPJ:IPJ+N-1) contains B*OP*v_{j}.  |
+c        %------------------------------------------%
+c 
+         call zgemv ('C', n, j, one, v, ldv, workd(ipj), 1,
+     &               zero, h(1,j), 1)
+c
+c        %--------------------------------------%
+c        | Orthogonalize r_{j} against V_{j}.   |
+c        | RESID contains OP*v_{j}. See STEP 3. | 
+c        %--------------------------------------%
+c
+         call zgemv ('N', n, j, -one, v, ldv, h(1,j), 1,
+     &               one, resid, 1)
+c
+         if (j .gt. 1) h(j,j-1) = dcmplx(betaj, rzero)
+c
+         call arscnd (t4)
+c 
+         orth1 = .true.
+c 
+         call arscnd (t2)
+         if (bmat .eq. 'G') then
+            nbx = nbx + 1
+            call zcopy (n, resid, 1, workd(irj), 1)
+            ipntr(1) = irj
+            ipntr(2) = ipj
+            ido = 2
+c 
+c           %----------------------------------%
+c           | Exit in order to compute B*r_{j} |
+c           %----------------------------------%
+c 
+            go to 9000
+         else if (bmat .eq. 'I') then
+            call zcopy (n, resid, 1, workd(ipj), 1)
+         end if 
+   70    continue
+c 
+c        %---------------------------------------------------%
+c        | Back from reverse communication if ORTH1 = .true. |
+c        | WORKD(IPJ:IPJ+N-1) := B*r_{j}.                    |
+c        %---------------------------------------------------%
+c
+         if (bmat .eq. 'G') then
+            call arscnd (t3)
+            tmvbx = tmvbx + (t3 - t2)
+         end if
+c 
+         orth1 = .false.
+c
+c        %------------------------------%
+c        | Compute the B-norm of r_{j}. |
+c        %------------------------------%
+c
+         if (bmat .eq. 'G') then         
+            cnorm = zdotc (n, resid, 1, workd(ipj), 1)
+            rnorm = sqrt( dlapy2(dble(cnorm),dimag(cnorm)) )
+         else if (bmat .eq. 'I') then
+            rnorm = dznrm2(n, resid, 1)
+         end if
+c 
+c        %-----------------------------------------------------------%
+c        | STEP 5: Re-orthogonalization / Iterative refinement phase |
+c        | Maximum NITER_ITREF tries.                                |
+c        |                                                           |
+c        |          s      = V_{j}^T * B * r_{j}                     |
+c        |          r_{j}  = r_{j} - V_{j}*s                         |
+c        |          alphaj = alphaj + s_{j}                          |
+c        |                                                           |
+c        | The stopping criteria used for iterative refinement is    |
+c        | discussed in Parlett's book SEP, page 107 and in Gragg &  |
+c        | Reichel ACM TOMS paper; Algorithm 686, Dec. 1990.         |
+c        | Determine if we need to correct the residual. The goal is |
+c        | to enforce ||v(:,1:j)^T * r_{j}|| .le. eps * || r_{j} ||  |
+c        | The following test determines whether the sine of the     |
+c        | angle between  OP*x and the computed residual is less     |
+c        | than or equal to 0.717.                                   |
+c        %-----------------------------------------------------------%
+c
+         if ( rnorm .gt. 0.717*wnorm ) go to 100
+c
+         iter  = 0
+         nrorth = nrorth + 1
+c 
+c        %---------------------------------------------------%
+c        | Enter the Iterative refinement phase. If further  |
+c        | refinement is necessary, loop back here. The loop |
+c        | variable is ITER. Perform a step of Classical     |
+c        | Gram-Schmidt using all the Arnoldi vectors V_{j}  |
+c        %---------------------------------------------------%
+c 
+   80    continue
+c
+         if (msglvl .gt. 2) then
+            rtemp(1) = wnorm
+            rtemp(2) = rnorm
+            call dvout (logfil, 2, rtemp, ndigit, 
+     &      '_naitr: re-orthogonalization; wnorm and rnorm are')
+            call zvout (logfil, j, h(1,j), ndigit,
+     &                  '_naitr: j-th column of H')
+         end if
+c
+c        %----------------------------------------------------%
+c        | Compute V_{j}^T * B * r_{j}.                       |
+c        | WORKD(IRJ:IRJ+J-1) = v(:,1:J)'*WORKD(IPJ:IPJ+N-1). |
+c        %----------------------------------------------------%
+c
+         call zgemv ('C', n, j, one, v, ldv, workd(ipj), 1, 
+     &               zero, workd(irj), 1)
+c
+c        %---------------------------------------------%
+c        | Compute the correction to the residual:     |
+c        | r_{j} = r_{j} - V_{j} * WORKD(IRJ:IRJ+J-1). |
+c        | The correction to H is v(:,1:J)*H(1:J,1:J)  |
+c        | + v(:,1:J)*WORKD(IRJ:IRJ+J-1)*e'_j.         |
+c        %---------------------------------------------%
+c
+         call zgemv ('N', n, j, -one, v, ldv, workd(irj), 1, 
+     &               one, resid, 1)
+         call zaxpy (j, one, workd(irj), 1, h(1,j), 1)
+c 
+         orth2 = .true.
+         call arscnd (t2)
+         if (bmat .eq. 'G') then
+            nbx = nbx + 1
+            call zcopy (n, resid, 1, workd(irj), 1)
+            ipntr(1) = irj
+            ipntr(2) = ipj
+            ido = 2
+c 
+c           %-----------------------------------%
+c           | Exit in order to compute B*r_{j}. |
+c           | r_{j} is the corrected residual.  |
+c           %-----------------------------------%
+c 
+            go to 9000
+         else if (bmat .eq. 'I') then
+            call zcopy (n, resid, 1, workd(ipj), 1)
+         end if 
+   90    continue
+c
+c        %---------------------------------------------------%
+c        | Back from reverse communication if ORTH2 = .true. |
+c        %---------------------------------------------------%
+c
+         if (bmat .eq. 'G') then
+            call arscnd (t3)
+            tmvbx = tmvbx + (t3 - t2)
+         end if 
+c
+c        %-----------------------------------------------------%
+c        | Compute the B-norm of the corrected residual r_{j}. |
+c        %-----------------------------------------------------%
+c 
+         if (bmat .eq. 'G') then         
+             cnorm  = zdotc (n, resid, 1, workd(ipj), 1)
+             rnorm1 = sqrt( dlapy2(dble(cnorm),dimag(cnorm)) )
+         else if (bmat .eq. 'I') then
+             rnorm1 = dznrm2(n, resid, 1)
+         end if
+c 
+         if (msglvl .gt. 0 .and. iter .gt. 0 ) then
+            call ivout (logfil, 1, j, ndigit,
+     &           '_naitr: Iterative refinement for Arnoldi residual')
+            if (msglvl .gt. 2) then
+                rtemp(1) = rnorm
+                rtemp(2) = rnorm1
+                call dvout (logfil, 2, rtemp, ndigit,
+     &           '_naitr: iterative refinement ; rnorm and rnorm1 are')
+            end if
+         end if
+c
+c        %-----------------------------------------%
+c        | Determine if we need to perform another |
+c        | step of re-orthogonalization.           |
+c        %-----------------------------------------%
+c
+         if ( rnorm1 .gt. 0.717*rnorm ) then
+c
+c           %---------------------------------------%
+c           | No need for further refinement.       |
+c           | The cosine of the angle between the   |
+c           | corrected residual vector and the old |
+c           | residual vector is greater than 0.717 |
+c           | In other words the corrected residual |
+c           | and the old residual vector share an  |
+c           | angle of less than arcCOS(0.717)      |
+c           %---------------------------------------%
+c
+            rnorm = rnorm1
+c 
+         else
+c
+c           %-------------------------------------------%
+c           | Another step of iterative refinement step |
+c           | is required. NITREF is used by stat.h     |
+c           %-------------------------------------------%
+c
+            nitref = nitref + 1
+            rnorm  = rnorm1
+            iter   = iter + 1
+            if (iter .le. 1) go to 80
+c
+c           %-------------------------------------------------%
+c           | Otherwise RESID is numerically in the span of V |
+c           %-------------------------------------------------%
+c
+            do 95 jj = 1, n
+               resid(jj) = zero
+  95        continue 
+            rnorm = rzero
+         end if
+c 
+c        %----------------------------------------------%
+c        | Branch here directly if iterative refinement |
+c        | wasn't necessary or after at most NITER_REF  |
+c        | steps of iterative refinement.               |
+c        %----------------------------------------------%
+c 
+  100    continue
+c 
+         rstart = .false.
+         orth2  = .false.
+c 
+         call arscnd (t5)
+         titref = titref + (t5 - t4)
+c 
+c        %------------------------------------%
+c        | STEP 6: Update  j = j+1;  Continue |
+c        %------------------------------------%
+c
+         j = j + 1
+         if (j .gt. k+np) then
+            call arscnd (t1)
+            tcaitr = tcaitr + (t1 - t0)
+            ido = 99
+            do 110 i = max(1,k), k+np-1
+c     
+c              %--------------------------------------------%
+c              | Check for splitting and deflation.         |
+c              | Use a standard test as in the QR algorithm |
+c              | REFERENCE: LAPACK subroutine zlahqr        |
+c              %--------------------------------------------%
+c     
+               tst1 = dlapy2(dble(h(i,i)),dimag(h(i,i)))
+     &              + dlapy2(dble(h(i+1,i+1)), dimag(h(i+1,i+1)))
+               if( tst1.eq.dble(zero) )
+     &              tst1 = zlanhs( '1', k+np, h, ldh, workd(n+1) )
+               if( dlapy2(dble(h(i+1,i)),dimag(h(i+1,i))) .le. 
+     &                    max( ulp*tst1, smlnum ) ) 
+     &             h(i+1,i) = zero
+ 110        continue
+c     
+            if (msglvl .gt. 2) then
+               call zmout (logfil, k+np, k+np, h, ldh, ndigit, 
+     &          '_naitr: Final upper Hessenberg matrix H of order K+NP')
+            end if
+c     
+            go to 9000
+         end if
+c
+c        %--------------------------------------------------------%
+c        | Loop back to extend the factorization by another step. |
+c        %--------------------------------------------------------%
+c
+      go to 1000
+c 
+c     %---------------------------------------------------------------%
+c     |                                                               |
+c     |  E N D     O F     M A I N     I T E R A T I O N     L O O P  |
+c     |                                                               |
+c     %---------------------------------------------------------------%
+c
+ 9000 continue
+      return
+c
+c     %---------------%
+c     | End of znaitr |
+c     %---------------%
+c
+      end