comparison libcruft/lapack/zgbtrf.f @ 5164:57077d0ddc8e

[project @ 2005-02-25 19:55:24 by jwe]
author jwe
date Fri, 25 Feb 2005 19:55:28 +0000
parents
children 68db500cb558
comparison
equal deleted inserted replaced
5163:9f3299378193 5164:57077d0ddc8e
1 SUBROUTINE ZGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO )
2 *
3 * -- LAPACK routine (version 3.0) --
4 * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
5 * Courant Institute, Argonne National Lab, and Rice University
6 * September 30, 1994
7 *
8 * .. Scalar Arguments ..
9 INTEGER INFO, KL, KU, LDAB, M, N
10 * ..
11 * .. Array Arguments ..
12 INTEGER IPIV( * )
13 COMPLEX*16 AB( LDAB, * )
14 * ..
15 *
16 * Purpose
17 * =======
18 *
19 * ZGBTRF computes an LU factorization of a complex m-by-n band matrix A
20 * using partial pivoting with row interchanges.
21 *
22 * This is the blocked version of the algorithm, calling Level 3 BLAS.
23 *
24 * Arguments
25 * =========
26 *
27 * M (input) INTEGER
28 * The number of rows of the matrix A. M >= 0.
29 *
30 * N (input) INTEGER
31 * The number of columns of the matrix A. N >= 0.
32 *
33 * KL (input) INTEGER
34 * The number of subdiagonals within the band of A. KL >= 0.
35 *
36 * KU (input) INTEGER
37 * The number of superdiagonals within the band of A. KU >= 0.
38 *
39 * AB (input/output) COMPLEX*16 array, dimension (LDAB,N)
40 * On entry, the matrix A in band storage, in rows KL+1 to
41 * 2*KL+KU+1; rows 1 to KL of the array need not be set.
42 * The j-th column of A is stored in the j-th column of the
43 * array AB as follows:
44 * AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)
45 *
46 * On exit, details of the factorization: U is stored as an
47 * upper triangular band matrix with KL+KU superdiagonals in
48 * rows 1 to KL+KU+1, and the multipliers used during the
49 * factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
50 * See below for further details.
51 *
52 * LDAB (input) INTEGER
53 * The leading dimension of the array AB. LDAB >= 2*KL+KU+1.
54 *
55 * IPIV (output) INTEGER array, dimension (min(M,N))
56 * The pivot indices; for 1 <= i <= min(M,N), row i of the
57 * matrix was interchanged with row IPIV(i).
58 *
59 * INFO (output) INTEGER
60 * = 0: successful exit
61 * < 0: if INFO = -i, the i-th argument had an illegal value
62 * > 0: if INFO = +i, U(i,i) is exactly zero. The factorization
63 * has been completed, but the factor U is exactly
64 * singular, and division by zero will occur if it is used
65 * to solve a system of equations.
66 *
67 * Further Details
68 * ===============
69 *
70 * The band storage scheme is illustrated by the following example, when
71 * M = N = 6, KL = 2, KU = 1:
72 *
73 * On entry: On exit:
74 *
75 * * * * + + + * * * u14 u25 u36
76 * * * + + + + * * u13 u24 u35 u46
77 * * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56
78 * a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66
79 * a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 *
80 * a31 a42 a53 a64 * * m31 m42 m53 m64 * *
81 *
82 * Array elements marked * are not used by the routine; elements marked
83 * + need not be set on entry, but are required by the routine to store
84 * elements of U because of fill-in resulting from the row interchanges.
85 *
86 * =====================================================================
87 *
88 * .. Parameters ..
89 COMPLEX*16 ONE, ZERO
90 PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ),
91 $ ZERO = ( 0.0D+0, 0.0D+0 ) )
92 INTEGER NBMAX, LDWORK
93 PARAMETER ( NBMAX = 64, LDWORK = NBMAX+1 )
94 * ..
95 * .. Local Scalars ..
96 INTEGER I, I2, I3, II, IP, J, J2, J3, JB, JJ, JM, JP,
97 $ JU, K2, KM, KV, NB, NW
98 COMPLEX*16 TEMP
99 * ..
100 * .. Local Arrays ..
101 COMPLEX*16 WORK13( LDWORK, NBMAX ),
102 $ WORK31( LDWORK, NBMAX )
103 * ..
104 * .. External Functions ..
105 INTEGER ILAENV, IZAMAX
106 EXTERNAL ILAENV, IZAMAX
107 * ..
108 * .. External Subroutines ..
109 EXTERNAL XERBLA, ZCOPY, ZGBTF2, ZGEMM, ZGERU, ZLASWP,
110 $ ZSCAL, ZSWAP, ZTRSM
111 * ..
112 * .. Intrinsic Functions ..
113 INTRINSIC MAX, MIN
114 * ..
115 * .. Executable Statements ..
116 *
117 * KV is the number of superdiagonals in the factor U, allowing for
118 * fill-in
119 *
120 KV = KU + KL
121 *
122 * Test the input parameters.
123 *
124 INFO = 0
125 IF( M.LT.0 ) THEN
126 INFO = -1
127 ELSE IF( N.LT.0 ) THEN
128 INFO = -2
129 ELSE IF( KL.LT.0 ) THEN
130 INFO = -3
131 ELSE IF( KU.LT.0 ) THEN
132 INFO = -4
133 ELSE IF( LDAB.LT.KL+KV+1 ) THEN
134 INFO = -6
135 END IF
136 IF( INFO.NE.0 ) THEN
137 CALL XERBLA( 'ZGBTRF', -INFO )
138 RETURN
139 END IF
140 *
141 * Quick return if possible
142 *
143 IF( M.EQ.0 .OR. N.EQ.0 )
144 $ RETURN
145 *
146 * Determine the block size for this environment
147 *
148 NB = ILAENV( 1, 'ZGBTRF', ' ', M, N, KL, KU )
149 *
150 * The block size must not exceed the limit set by the size of the
151 * local arrays WORK13 and WORK31.
152 *
153 NB = MIN( NB, NBMAX )
154 *
155 IF( NB.LE.1 .OR. NB.GT.KL ) THEN
156 *
157 * Use unblocked code
158 *
159 CALL ZGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO )
160 ELSE
161 *
162 * Use blocked code
163 *
164 * Zero the superdiagonal elements of the work array WORK13
165 *
166 DO 20 J = 1, NB
167 DO 10 I = 1, J - 1
168 WORK13( I, J ) = ZERO
169 10 CONTINUE
170 20 CONTINUE
171 *
172 * Zero the subdiagonal elements of the work array WORK31
173 *
174 DO 40 J = 1, NB
175 DO 30 I = J + 1, NB
176 WORK31( I, J ) = ZERO
177 30 CONTINUE
178 40 CONTINUE
179 *
180 * Gaussian elimination with partial pivoting
181 *
182 * Set fill-in elements in columns KU+2 to KV to zero
183 *
184 DO 60 J = KU + 2, MIN( KV, N )
185 DO 50 I = KV - J + 2, KL
186 AB( I, J ) = ZERO
187 50 CONTINUE
188 60 CONTINUE
189 *
190 * JU is the index of the last column affected by the current
191 * stage of the factorization
192 *
193 JU = 1
194 *
195 DO 180 J = 1, MIN( M, N ), NB
196 JB = MIN( NB, MIN( M, N )-J+1 )
197 *
198 * The active part of the matrix is partitioned
199 *
200 * A11 A12 A13
201 * A21 A22 A23
202 * A31 A32 A33
203 *
204 * Here A11, A21 and A31 denote the current block of JB columns
205 * which is about to be factorized. The number of rows in the
206 * partitioning are JB, I2, I3 respectively, and the numbers
207 * of columns are JB, J2, J3. The superdiagonal elements of A13
208 * and the subdiagonal elements of A31 lie outside the band.
209 *
210 I2 = MIN( KL-JB, M-J-JB+1 )
211 I3 = MIN( JB, M-J-KL+1 )
212 *
213 * J2 and J3 are computed after JU has been updated.
214 *
215 * Factorize the current block of JB columns
216 *
217 DO 80 JJ = J, J + JB - 1
218 *
219 * Set fill-in elements in column JJ+KV to zero
220 *
221 IF( JJ+KV.LE.N ) THEN
222 DO 70 I = 1, KL
223 AB( I, JJ+KV ) = ZERO
224 70 CONTINUE
225 END IF
226 *
227 * Find pivot and test for singularity. KM is the number of
228 * subdiagonal elements in the current column.
229 *
230 KM = MIN( KL, M-JJ )
231 JP = IZAMAX( KM+1, AB( KV+1, JJ ), 1 )
232 IPIV( JJ ) = JP + JJ - J
233 IF( AB( KV+JP, JJ ).NE.ZERO ) THEN
234 JU = MAX( JU, MIN( JJ+KU+JP-1, N ) )
235 IF( JP.NE.1 ) THEN
236 *
237 * Apply interchange to columns J to J+JB-1
238 *
239 IF( JP+JJ-1.LT.J+KL ) THEN
240 *
241 CALL ZSWAP( JB, AB( KV+1+JJ-J, J ), LDAB-1,
242 $ AB( KV+JP+JJ-J, J ), LDAB-1 )
243 ELSE
244 *
245 * The interchange affects columns J to JJ-1 of A31
246 * which are stored in the work array WORK31
247 *
248 CALL ZSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
249 $ WORK31( JP+JJ-J-KL, 1 ), LDWORK )
250 CALL ZSWAP( J+JB-JJ, AB( KV+1, JJ ), LDAB-1,
251 $ AB( KV+JP, JJ ), LDAB-1 )
252 END IF
253 END IF
254 *
255 * Compute multipliers
256 *
257 CALL ZSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ),
258 $ 1 )
259 *
260 * Update trailing submatrix within the band and within
261 * the current block. JM is the index of the last column
262 * which needs to be updated.
263 *
264 JM = MIN( JU, J+JB-1 )
265 IF( JM.GT.JJ )
266 $ CALL ZGERU( KM, JM-JJ, -ONE, AB( KV+2, JJ ), 1,
267 $ AB( KV, JJ+1 ), LDAB-1,
268 $ AB( KV+1, JJ+1 ), LDAB-1 )
269 ELSE
270 *
271 * If pivot is zero, set INFO to the index of the pivot
272 * unless a zero pivot has already been found.
273 *
274 IF( INFO.EQ.0 )
275 $ INFO = JJ
276 END IF
277 *
278 * Copy current column of A31 into the work array WORK31
279 *
280 NW = MIN( JJ-J+1, I3 )
281 IF( NW.GT.0 )
282 $ CALL ZCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1,
283 $ WORK31( 1, JJ-J+1 ), 1 )
284 80 CONTINUE
285 IF( J+JB.LE.N ) THEN
286 *
287 * Apply the row interchanges to the other blocks.
288 *
289 J2 = MIN( JU-J+1, KV ) - JB
290 J3 = MAX( 0, JU-J-KV+1 )
291 *
292 * Use ZLASWP to apply the row interchanges to A12, A22, and
293 * A32.
294 *
295 CALL ZLASWP( J2, AB( KV+1-JB, J+JB ), LDAB-1, 1, JB,
296 $ IPIV( J ), 1 )
297 *
298 * Adjust the pivot indices.
299 *
300 DO 90 I = J, J + JB - 1
301 IPIV( I ) = IPIV( I ) + J - 1
302 90 CONTINUE
303 *
304 * Apply the row interchanges to A13, A23, and A33
305 * columnwise.
306 *
307 K2 = J - 1 + JB + J2
308 DO 110 I = 1, J3
309 JJ = K2 + I
310 DO 100 II = J + I - 1, J + JB - 1
311 IP = IPIV( II )
312 IF( IP.NE.II ) THEN
313 TEMP = AB( KV+1+II-JJ, JJ )
314 AB( KV+1+II-JJ, JJ ) = AB( KV+1+IP-JJ, JJ )
315 AB( KV+1+IP-JJ, JJ ) = TEMP
316 END IF
317 100 CONTINUE
318 110 CONTINUE
319 *
320 * Update the relevant part of the trailing submatrix
321 *
322 IF( J2.GT.0 ) THEN
323 *
324 * Update A12
325 *
326 CALL ZTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
327 $ JB, J2, ONE, AB( KV+1, J ), LDAB-1,
328 $ AB( KV+1-JB, J+JB ), LDAB-1 )
329 *
330 IF( I2.GT.0 ) THEN
331 *
332 * Update A22
333 *
334 CALL ZGEMM( 'No transpose', 'No transpose', I2, J2,
335 $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
336 $ AB( KV+1-JB, J+JB ), LDAB-1, ONE,
337 $ AB( KV+1, J+JB ), LDAB-1 )
338 END IF
339 *
340 IF( I3.GT.0 ) THEN
341 *
342 * Update A32
343 *
344 CALL ZGEMM( 'No transpose', 'No transpose', I3, J2,
345 $ JB, -ONE, WORK31, LDWORK,
346 $ AB( KV+1-JB, J+JB ), LDAB-1, ONE,
347 $ AB( KV+KL+1-JB, J+JB ), LDAB-1 )
348 END IF
349 END IF
350 *
351 IF( J3.GT.0 ) THEN
352 *
353 * Copy the lower triangle of A13 into the work array
354 * WORK13
355 *
356 DO 130 JJ = 1, J3
357 DO 120 II = JJ, JB
358 WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 )
359 120 CONTINUE
360 130 CONTINUE
361 *
362 * Update A13 in the work array
363 *
364 CALL ZTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
365 $ JB, J3, ONE, AB( KV+1, J ), LDAB-1,
366 $ WORK13, LDWORK )
367 *
368 IF( I2.GT.0 ) THEN
369 *
370 * Update A23
371 *
372 CALL ZGEMM( 'No transpose', 'No transpose', I2, J3,
373 $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
374 $ WORK13, LDWORK, ONE, AB( 1+JB, J+KV ),
375 $ LDAB-1 )
376 END IF
377 *
378 IF( I3.GT.0 ) THEN
379 *
380 * Update A33
381 *
382 CALL ZGEMM( 'No transpose', 'No transpose', I3, J3,
383 $ JB, -ONE, WORK31, LDWORK, WORK13,
384 $ LDWORK, ONE, AB( 1+KL, J+KV ), LDAB-1 )
385 END IF
386 *
387 * Copy the lower triangle of A13 back into place
388 *
389 DO 150 JJ = 1, J3
390 DO 140 II = JJ, JB
391 AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ )
392 140 CONTINUE
393 150 CONTINUE
394 END IF
395 ELSE
396 *
397 * Adjust the pivot indices.
398 *
399 DO 160 I = J, J + JB - 1
400 IPIV( I ) = IPIV( I ) + J - 1
401 160 CONTINUE
402 END IF
403 *
404 * Partially undo the interchanges in the current block to
405 * restore the upper triangular form of A31 and copy the upper
406 * triangle of A31 back into place
407 *
408 DO 170 JJ = J + JB - 1, J, -1
409 JP = IPIV( JJ ) - JJ + 1
410 IF( JP.NE.1 ) THEN
411 *
412 * Apply interchange to columns J to JJ-1
413 *
414 IF( JP+JJ-1.LT.J+KL ) THEN
415 *
416 * The interchange does not affect A31
417 *
418 CALL ZSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
419 $ AB( KV+JP+JJ-J, J ), LDAB-1 )
420 ELSE
421 *
422 * The interchange does affect A31
423 *
424 CALL ZSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
425 $ WORK31( JP+JJ-J-KL, 1 ), LDWORK )
426 END IF
427 END IF
428 *
429 * Copy the current column of A31 back into place
430 *
431 NW = MIN( I3, JJ-J+1 )
432 IF( NW.GT.0 )
433 $ CALL ZCOPY( NW, WORK31( 1, JJ-J+1 ), 1,
434 $ AB( KV+KL+1-JJ+J, JJ ), 1 )
435 170 CONTINUE
436 180 CONTINUE
437 END IF
438 *
439 RETURN
440 *
441 * End of ZGBTRF
442 *
443 END