comparison libcruft/lapack/iparmq.f @ 7034:68db500cb558

[project @ 2007-10-16 18:54:19 by jwe]
author jwe
date Tue, 16 Oct 2007 18:54:23 +0000
parents
children
comparison
equal deleted inserted replaced
7033:f0142f2afdc6 7034:68db500cb558
1 INTEGER FUNCTION IPARMQ( ISPEC, NAME, OPTS, N, ILO, IHI, LWORK )
2 *
3 * -- LAPACK auxiliary routine (version 3.1) --
4 * Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
5 * November 2006
6 *
7 * .. Scalar Arguments ..
8 INTEGER IHI, ILO, ISPEC, LWORK, N
9 CHARACTER NAME*( * ), OPTS*( * )
10 *
11 * Purpose
12 * =======
13 *
14 * This program sets problem and machine dependent parameters
15 * useful for xHSEQR and its subroutines. It is called whenever
16 * ILAENV is called with 12 <= ISPEC <= 16
17 *
18 * Arguments
19 * =========
20 *
21 * ISPEC (input) integer scalar
22 * ISPEC specifies which tunable parameter IPARMQ should
23 * return.
24 *
25 * ISPEC=12: (INMIN) Matrices of order nmin or less
26 * are sent directly to xLAHQR, the implicit
27 * double shift QR algorithm. NMIN must be
28 * at least 11.
29 *
30 * ISPEC=13: (INWIN) Size of the deflation window.
31 * This is best set greater than or equal to
32 * the number of simultaneous shifts NS.
33 * Larger matrices benefit from larger deflation
34 * windows.
35 *
36 * ISPEC=14: (INIBL) Determines when to stop nibbling and
37 * invest in an (expensive) multi-shift QR sweep.
38 * If the aggressive early deflation subroutine
39 * finds LD converged eigenvalues from an order
40 * NW deflation window and LD.GT.(NW*NIBBLE)/100,
41 * then the next QR sweep is skipped and early
42 * deflation is applied immediately to the
43 * remaining active diagonal block. Setting
44 * IPARMQ(ISPEC=14) = 0 causes TTQRE to skip a
45 * multi-shift QR sweep whenever early deflation
46 * finds a converged eigenvalue. Setting
47 * IPARMQ(ISPEC=14) greater than or equal to 100
48 * prevents TTQRE from skipping a multi-shift
49 * QR sweep.
50 *
51 * ISPEC=15: (NSHFTS) The number of simultaneous shifts in
52 * a multi-shift QR iteration.
53 *
54 * ISPEC=16: (IACC22) IPARMQ is set to 0, 1 or 2 with the
55 * following meanings.
56 * 0: During the multi-shift QR sweep,
57 * xLAQR5 does not accumulate reflections and
58 * does not use matrix-matrix multiply to
59 * update the far-from-diagonal matrix
60 * entries.
61 * 1: During the multi-shift QR sweep,
62 * xLAQR5 and/or xLAQRaccumulates reflections and uses
63 * matrix-matrix multiply to update the
64 * far-from-diagonal matrix entries.
65 * 2: During the multi-shift QR sweep.
66 * xLAQR5 accumulates reflections and takes
67 * advantage of 2-by-2 block structure during
68 * matrix-matrix multiplies.
69 * (If xTRMM is slower than xGEMM, then
70 * IPARMQ(ISPEC=16)=1 may be more efficient than
71 * IPARMQ(ISPEC=16)=2 despite the greater level of
72 * arithmetic work implied by the latter choice.)
73 *
74 * NAME (input) character string
75 * Name of the calling subroutine
76 *
77 * OPTS (input) character string
78 * This is a concatenation of the string arguments to
79 * TTQRE.
80 *
81 * N (input) integer scalar
82 * N is the order of the Hessenberg matrix H.
83 *
84 * ILO (input) INTEGER
85 * IHI (input) INTEGER
86 * It is assumed that H is already upper triangular
87 * in rows and columns 1:ILO-1 and IHI+1:N.
88 *
89 * LWORK (input) integer scalar
90 * The amount of workspace available.
91 *
92 * Further Details
93 * ===============
94 *
95 * Little is known about how best to choose these parameters.
96 * It is possible to use different values of the parameters
97 * for each of CHSEQR, DHSEQR, SHSEQR and ZHSEQR.
98 *
99 * It is probably best to choose different parameters for
100 * different matrices and different parameters at different
101 * times during the iteration, but this has not been
102 * implemented --- yet.
103 *
104 *
105 * The best choices of most of the parameters depend
106 * in an ill-understood way on the relative execution
107 * rate of xLAQR3 and xLAQR5 and on the nature of each
108 * particular eigenvalue problem. Experiment may be the
109 * only practical way to determine which choices are most
110 * effective.
111 *
112 * Following is a list of default values supplied by IPARMQ.
113 * These defaults may be adjusted in order to attain better
114 * performance in any particular computational environment.
115 *
116 * IPARMQ(ISPEC=12) The xLAHQR vs xLAQR0 crossover point.
117 * Default: 75. (Must be at least 11.)
118 *
119 * IPARMQ(ISPEC=13) Recommended deflation window size.
120 * This depends on ILO, IHI and NS, the
121 * number of simultaneous shifts returned
122 * by IPARMQ(ISPEC=15). The default for
123 * (IHI-ILO+1).LE.500 is NS. The default
124 * for (IHI-ILO+1).GT.500 is 3*NS/2.
125 *
126 * IPARMQ(ISPEC=14) Nibble crossover point. Default: 14.
127 *
128 * IPARMQ(ISPEC=15) Number of simultaneous shifts, NS.
129 * a multi-shift QR iteration.
130 *
131 * If IHI-ILO+1 is ...
132 *
133 * greater than ...but less ... the
134 * or equal to ... than default is
135 *
136 * 0 30 NS = 2+
137 * 30 60 NS = 4+
138 * 60 150 NS = 10
139 * 150 590 NS = **
140 * 590 3000 NS = 64
141 * 3000 6000 NS = 128
142 * 6000 infinity NS = 256
143 *
144 * (+) By default matrices of this order are
145 * passed to the implicit double shift routine
146 * xLAHQR. See IPARMQ(ISPEC=12) above. These
147 * values of NS are used only in case of a rare
148 * xLAHQR failure.
149 *
150 * (**) The asterisks (**) indicate an ad-hoc
151 * function increasing from 10 to 64.
152 *
153 * IPARMQ(ISPEC=16) Select structured matrix multiply.
154 * (See ISPEC=16 above for details.)
155 * Default: 3.
156 *
157 * ================================================================
158 * .. Parameters ..
159 INTEGER INMIN, INWIN, INIBL, ISHFTS, IACC22
160 PARAMETER ( INMIN = 12, INWIN = 13, INIBL = 14,
161 $ ISHFTS = 15, IACC22 = 16 )
162 INTEGER NMIN, K22MIN, KACMIN, NIBBLE, KNWSWP
163 PARAMETER ( NMIN = 75, K22MIN = 14, KACMIN = 14,
164 $ NIBBLE = 14, KNWSWP = 500 )
165 REAL TWO
166 PARAMETER ( TWO = 2.0 )
167 * ..
168 * .. Local Scalars ..
169 INTEGER NH, NS
170 * ..
171 * .. Intrinsic Functions ..
172 INTRINSIC LOG, MAX, MOD, NINT, REAL
173 * ..
174 * .. Executable Statements ..
175 IF( ( ISPEC.EQ.ISHFTS ) .OR. ( ISPEC.EQ.INWIN ) .OR.
176 $ ( ISPEC.EQ.IACC22 ) ) THEN
177 *
178 * ==== Set the number simultaneous shifts ====
179 *
180 NH = IHI - ILO + 1
181 NS = 2
182 IF( NH.GE.30 )
183 $ NS = 4
184 IF( NH.GE.60 )
185 $ NS = 10
186 IF( NH.GE.150 )
187 $ NS = MAX( 10, NH / NINT( LOG( REAL( NH ) ) / LOG( TWO ) ) )
188 IF( NH.GE.590 )
189 $ NS = 64
190 IF( NH.GE.3000 )
191 $ NS = 128
192 IF( NH.GE.6000 )
193 $ NS = 256
194 NS = MAX( 2, NS-MOD( NS, 2 ) )
195 END IF
196 *
197 IF( ISPEC.EQ.INMIN ) THEN
198 *
199 *
200 * ===== Matrices of order smaller than NMIN get sent
201 * . to xLAHQR, the classic double shift algorithm.
202 * . This must be at least 11. ====
203 *
204 IPARMQ = NMIN
205 *
206 ELSE IF( ISPEC.EQ.INIBL ) THEN
207 *
208 * ==== INIBL: skip a multi-shift qr iteration and
209 * . whenever aggressive early deflation finds
210 * . at least (NIBBLE*(window size)/100) deflations. ====
211 *
212 IPARMQ = NIBBLE
213 *
214 ELSE IF( ISPEC.EQ.ISHFTS ) THEN
215 *
216 * ==== NSHFTS: The number of simultaneous shifts =====
217 *
218 IPARMQ = NS
219 *
220 ELSE IF( ISPEC.EQ.INWIN ) THEN
221 *
222 * ==== NW: deflation window size. ====
223 *
224 IF( NH.LE.KNWSWP ) THEN
225 IPARMQ = NS
226 ELSE
227 IPARMQ = 3*NS / 2
228 END IF
229 *
230 ELSE IF( ISPEC.EQ.IACC22 ) THEN
231 *
232 * ==== IACC22: Whether to accumulate reflections
233 * . before updating the far-from-diagonal elements
234 * . and whether to use 2-by-2 block structure while
235 * . doing it. A small amount of work could be saved
236 * . by making this choice dependent also upon the
237 * . NH=IHI-ILO+1.
238 *
239 IPARMQ = 0
240 IF( NS.GE.KACMIN )
241 $ IPARMQ = 1
242 IF( NS.GE.K22MIN )
243 $ IPARMQ = 2
244 *
245 ELSE
246 * ===== invalid value of ispec =====
247 IPARMQ = -1
248 *
249 END IF
250 *
251 * ==== End of IPARMQ ====
252 *
253 END