4329
|
1 SUBROUTINE DGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO ) |
|
2 * |
|
3 * -- LAPACK routine (version 3.0) -- |
|
4 * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., |
|
5 * Courant Institute, Argonne National Lab, and Rice University |
|
6 * February 29, 1992 |
|
7 * |
|
8 * .. Scalar Arguments .. |
|
9 INTEGER INFO, KL, KU, LDAB, M, N |
|
10 * .. |
|
11 * .. Array Arguments .. |
|
12 INTEGER IPIV( * ) |
|
13 DOUBLE PRECISION AB( LDAB, * ) |
|
14 * .. |
|
15 * |
|
16 * Purpose |
|
17 * ======= |
|
18 * |
|
19 * DGBTRF computes an LU factorization of a real m-by-n band matrix A |
|
20 * using partial pivoting with row interchanges. |
|
21 * |
|
22 * This is the blocked version of the algorithm, calling Level 3 BLAS. |
|
23 * |
|
24 * Arguments |
|
25 * ========= |
|
26 * |
|
27 * M (input) INTEGER |
|
28 * The number of rows of the matrix A. M >= 0. |
|
29 * |
|
30 * N (input) INTEGER |
|
31 * The number of columns of the matrix A. N >= 0. |
|
32 * |
|
33 * KL (input) INTEGER |
|
34 * The number of subdiagonals within the band of A. KL >= 0. |
|
35 * |
|
36 * KU (input) INTEGER |
|
37 * The number of superdiagonals within the band of A. KU >= 0. |
|
38 * |
|
39 * AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) |
|
40 * On entry, the matrix A in band storage, in rows KL+1 to |
|
41 * 2*KL+KU+1; rows 1 to KL of the array need not be set. |
|
42 * The j-th column of A is stored in the j-th column of the |
|
43 * array AB as follows: |
|
44 * AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) |
|
45 * |
|
46 * On exit, details of the factorization: U is stored as an |
|
47 * upper triangular band matrix with KL+KU superdiagonals in |
|
48 * rows 1 to KL+KU+1, and the multipliers used during the |
|
49 * factorization are stored in rows KL+KU+2 to 2*KL+KU+1. |
|
50 * See below for further details. |
|
51 * |
|
52 * LDAB (input) INTEGER |
|
53 * The leading dimension of the array AB. LDAB >= 2*KL+KU+1. |
|
54 * |
|
55 * IPIV (output) INTEGER array, dimension (min(M,N)) |
|
56 * The pivot indices; for 1 <= i <= min(M,N), row i of the |
|
57 * matrix was interchanged with row IPIV(i). |
|
58 * |
|
59 * INFO (output) INTEGER |
|
60 * = 0: successful exit |
|
61 * < 0: if INFO = -i, the i-th argument had an illegal value |
|
62 * > 0: if INFO = +i, U(i,i) is exactly zero. The factorization |
|
63 * has been completed, but the factor U is exactly |
|
64 * singular, and division by zero will occur if it is used |
|
65 * to solve a system of equations. |
|
66 * |
|
67 * Further Details |
|
68 * =============== |
|
69 * |
|
70 * The band storage scheme is illustrated by the following example, when |
|
71 * M = N = 6, KL = 2, KU = 1: |
|
72 * |
|
73 * On entry: On exit: |
|
74 * |
|
75 * * * * + + + * * * u14 u25 u36 |
|
76 * * * + + + + * * u13 u24 u35 u46 |
|
77 * * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 |
|
78 * a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 |
|
79 * a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * |
|
80 * a31 a42 a53 a64 * * m31 m42 m53 m64 * * |
|
81 * |
|
82 * Array elements marked * are not used by the routine; elements marked |
|
83 * + need not be set on entry, but are required by the routine to store |
|
84 * elements of U because of fill-in resulting from the row interchanges. |
|
85 * |
|
86 * ===================================================================== |
|
87 * |
|
88 * .. Parameters .. |
|
89 DOUBLE PRECISION ONE, ZERO |
|
90 PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) |
|
91 INTEGER NBMAX, LDWORK |
|
92 PARAMETER ( NBMAX = 64, LDWORK = NBMAX+1 ) |
|
93 * .. |
|
94 * .. Local Scalars .. |
|
95 INTEGER I, I2, I3, II, IP, J, J2, J3, JB, JJ, JM, JP, |
|
96 $ JU, K2, KM, KV, NB, NW |
|
97 DOUBLE PRECISION TEMP |
|
98 * .. |
|
99 * .. Local Arrays .. |
|
100 DOUBLE PRECISION WORK13( LDWORK, NBMAX ), |
|
101 $ WORK31( LDWORK, NBMAX ) |
|
102 * .. |
|
103 * .. External Functions .. |
|
104 INTEGER IDAMAX, ILAENV |
|
105 EXTERNAL IDAMAX, ILAENV |
|
106 * .. |
|
107 * .. External Subroutines .. |
|
108 EXTERNAL DCOPY, DGBTF2, DGEMM, DGER, DLASWP, DSCAL, |
|
109 $ DSWAP, DTRSM, XERBLA |
|
110 * .. |
|
111 * .. Intrinsic Functions .. |
|
112 INTRINSIC MAX, MIN |
|
113 * .. |
|
114 * .. Executable Statements .. |
|
115 * |
|
116 * KV is the number of superdiagonals in the factor U, allowing for |
|
117 * fill-in |
|
118 * |
|
119 KV = KU + KL |
|
120 * |
|
121 * Test the input parameters. |
|
122 * |
|
123 INFO = 0 |
|
124 IF( M.LT.0 ) THEN |
|
125 INFO = -1 |
|
126 ELSE IF( N.LT.0 ) THEN |
|
127 INFO = -2 |
|
128 ELSE IF( KL.LT.0 ) THEN |
|
129 INFO = -3 |
|
130 ELSE IF( KU.LT.0 ) THEN |
|
131 INFO = -4 |
|
132 ELSE IF( LDAB.LT.KL+KV+1 ) THEN |
|
133 INFO = -6 |
|
134 END IF |
|
135 IF( INFO.NE.0 ) THEN |
|
136 CALL XERBLA( 'DGBTRF', -INFO ) |
|
137 RETURN |
|
138 END IF |
|
139 * |
|
140 * Quick return if possible |
|
141 * |
|
142 IF( M.EQ.0 .OR. N.EQ.0 ) |
|
143 $ RETURN |
|
144 * |
|
145 * Determine the block size for this environment |
|
146 * |
|
147 NB = ILAENV( 1, 'DGBTRF', ' ', M, N, KL, KU ) |
|
148 * |
|
149 * The block size must not exceed the limit set by the size of the |
|
150 * local arrays WORK13 and WORK31. |
|
151 * |
|
152 NB = MIN( NB, NBMAX ) |
|
153 * |
|
154 IF( NB.LE.1 .OR. NB.GT.KL ) THEN |
|
155 * |
|
156 * Use unblocked code |
|
157 * |
|
158 CALL DGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO ) |
|
159 ELSE |
|
160 * |
|
161 * Use blocked code |
|
162 * |
|
163 * Zero the superdiagonal elements of the work array WORK13 |
|
164 * |
|
165 DO 20 J = 1, NB |
|
166 DO 10 I = 1, J - 1 |
|
167 WORK13( I, J ) = ZERO |
|
168 10 CONTINUE |
|
169 20 CONTINUE |
|
170 * |
|
171 * Zero the subdiagonal elements of the work array WORK31 |
|
172 * |
|
173 DO 40 J = 1, NB |
|
174 DO 30 I = J + 1, NB |
|
175 WORK31( I, J ) = ZERO |
|
176 30 CONTINUE |
|
177 40 CONTINUE |
|
178 * |
|
179 * Gaussian elimination with partial pivoting |
|
180 * |
|
181 * Set fill-in elements in columns KU+2 to KV to zero |
|
182 * |
|
183 DO 60 J = KU + 2, MIN( KV, N ) |
|
184 DO 50 I = KV - J + 2, KL |
|
185 AB( I, J ) = ZERO |
|
186 50 CONTINUE |
|
187 60 CONTINUE |
|
188 * |
|
189 * JU is the index of the last column affected by the current |
|
190 * stage of the factorization |
|
191 * |
|
192 JU = 1 |
|
193 * |
|
194 DO 180 J = 1, MIN( M, N ), NB |
|
195 JB = MIN( NB, MIN( M, N )-J+1 ) |
|
196 * |
|
197 * The active part of the matrix is partitioned |
|
198 * |
|
199 * A11 A12 A13 |
|
200 * A21 A22 A23 |
|
201 * A31 A32 A33 |
|
202 * |
|
203 * Here A11, A21 and A31 denote the current block of JB columns |
|
204 * which is about to be factorized. The number of rows in the |
|
205 * partitioning are JB, I2, I3 respectively, and the numbers |
|
206 * of columns are JB, J2, J3. The superdiagonal elements of A13 |
|
207 * and the subdiagonal elements of A31 lie outside the band. |
|
208 * |
|
209 I2 = MIN( KL-JB, M-J-JB+1 ) |
|
210 I3 = MIN( JB, M-J-KL+1 ) |
|
211 * |
|
212 * J2 and J3 are computed after JU has been updated. |
|
213 * |
|
214 * Factorize the current block of JB columns |
|
215 * |
|
216 DO 80 JJ = J, J + JB - 1 |
|
217 * |
|
218 * Set fill-in elements in column JJ+KV to zero |
|
219 * |
|
220 IF( JJ+KV.LE.N ) THEN |
|
221 DO 70 I = 1, KL |
|
222 AB( I, JJ+KV ) = ZERO |
|
223 70 CONTINUE |
|
224 END IF |
|
225 * |
|
226 * Find pivot and test for singularity. KM is the number of |
|
227 * subdiagonal elements in the current column. |
|
228 * |
|
229 KM = MIN( KL, M-JJ ) |
|
230 JP = IDAMAX( KM+1, AB( KV+1, JJ ), 1 ) |
|
231 IPIV( JJ ) = JP + JJ - J |
|
232 IF( AB( KV+JP, JJ ).NE.ZERO ) THEN |
|
233 JU = MAX( JU, MIN( JJ+KU+JP-1, N ) ) |
|
234 IF( JP.NE.1 ) THEN |
|
235 * |
|
236 * Apply interchange to columns J to J+JB-1 |
|
237 * |
|
238 IF( JP+JJ-1.LT.J+KL ) THEN |
|
239 * |
|
240 CALL DSWAP( JB, AB( KV+1+JJ-J, J ), LDAB-1, |
|
241 $ AB( KV+JP+JJ-J, J ), LDAB-1 ) |
|
242 ELSE |
|
243 * |
|
244 * The interchange affects columns J to JJ-1 of A31 |
|
245 * which are stored in the work array WORK31 |
|
246 * |
|
247 CALL DSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1, |
|
248 $ WORK31( JP+JJ-J-KL, 1 ), LDWORK ) |
|
249 CALL DSWAP( J+JB-JJ, AB( KV+1, JJ ), LDAB-1, |
|
250 $ AB( KV+JP, JJ ), LDAB-1 ) |
|
251 END IF |
|
252 END IF |
|
253 * |
|
254 * Compute multipliers |
|
255 * |
|
256 CALL DSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ), |
|
257 $ 1 ) |
|
258 * |
|
259 * Update trailing submatrix within the band and within |
|
260 * the current block. JM is the index of the last column |
|
261 * which needs to be updated. |
|
262 * |
|
263 JM = MIN( JU, J+JB-1 ) |
|
264 IF( JM.GT.JJ ) |
|
265 $ CALL DGER( KM, JM-JJ, -ONE, AB( KV+2, JJ ), 1, |
|
266 $ AB( KV, JJ+1 ), LDAB-1, |
|
267 $ AB( KV+1, JJ+1 ), LDAB-1 ) |
|
268 ELSE |
|
269 * |
|
270 * If pivot is zero, set INFO to the index of the pivot |
|
271 * unless a zero pivot has already been found. |
|
272 * |
|
273 IF( INFO.EQ.0 ) |
|
274 $ INFO = JJ |
|
275 END IF |
|
276 * |
|
277 * Copy current column of A31 into the work array WORK31 |
|
278 * |
|
279 NW = MIN( JJ-J+1, I3 ) |
|
280 IF( NW.GT.0 ) |
|
281 $ CALL DCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1, |
|
282 $ WORK31( 1, JJ-J+1 ), 1 ) |
|
283 80 CONTINUE |
|
284 IF( J+JB.LE.N ) THEN |
|
285 * |
|
286 * Apply the row interchanges to the other blocks. |
|
287 * |
|
288 J2 = MIN( JU-J+1, KV ) - JB |
|
289 J3 = MAX( 0, JU-J-KV+1 ) |
|
290 * |
|
291 * Use DLASWP to apply the row interchanges to A12, A22, and |
|
292 * A32. |
|
293 * |
|
294 CALL DLASWP( J2, AB( KV+1-JB, J+JB ), LDAB-1, 1, JB, |
|
295 $ IPIV( J ), 1 ) |
|
296 * |
|
297 * Adjust the pivot indices. |
|
298 * |
|
299 DO 90 I = J, J + JB - 1 |
|
300 IPIV( I ) = IPIV( I ) + J - 1 |
|
301 90 CONTINUE |
|
302 * |
|
303 * Apply the row interchanges to A13, A23, and A33 |
|
304 * columnwise. |
|
305 * |
|
306 K2 = J - 1 + JB + J2 |
|
307 DO 110 I = 1, J3 |
|
308 JJ = K2 + I |
|
309 DO 100 II = J + I - 1, J + JB - 1 |
|
310 IP = IPIV( II ) |
|
311 IF( IP.NE.II ) THEN |
|
312 TEMP = AB( KV+1+II-JJ, JJ ) |
|
313 AB( KV+1+II-JJ, JJ ) = AB( KV+1+IP-JJ, JJ ) |
|
314 AB( KV+1+IP-JJ, JJ ) = TEMP |
|
315 END IF |
|
316 100 CONTINUE |
|
317 110 CONTINUE |
|
318 * |
|
319 * Update the relevant part of the trailing submatrix |
|
320 * |
|
321 IF( J2.GT.0 ) THEN |
|
322 * |
|
323 * Update A12 |
|
324 * |
|
325 CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit', |
|
326 $ JB, J2, ONE, AB( KV+1, J ), LDAB-1, |
|
327 $ AB( KV+1-JB, J+JB ), LDAB-1 ) |
|
328 * |
|
329 IF( I2.GT.0 ) THEN |
|
330 * |
|
331 * Update A22 |
|
332 * |
|
333 CALL DGEMM( 'No transpose', 'No transpose', I2, J2, |
|
334 $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1, |
|
335 $ AB( KV+1-JB, J+JB ), LDAB-1, ONE, |
|
336 $ AB( KV+1, J+JB ), LDAB-1 ) |
|
337 END IF |
|
338 * |
|
339 IF( I3.GT.0 ) THEN |
|
340 * |
|
341 * Update A32 |
|
342 * |
|
343 CALL DGEMM( 'No transpose', 'No transpose', I3, J2, |
|
344 $ JB, -ONE, WORK31, LDWORK, |
|
345 $ AB( KV+1-JB, J+JB ), LDAB-1, ONE, |
|
346 $ AB( KV+KL+1-JB, J+JB ), LDAB-1 ) |
|
347 END IF |
|
348 END IF |
|
349 * |
|
350 IF( J3.GT.0 ) THEN |
|
351 * |
|
352 * Copy the lower triangle of A13 into the work array |
|
353 * WORK13 |
|
354 * |
|
355 DO 130 JJ = 1, J3 |
|
356 DO 120 II = JJ, JB |
|
357 WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 ) |
|
358 120 CONTINUE |
|
359 130 CONTINUE |
|
360 * |
|
361 * Update A13 in the work array |
|
362 * |
|
363 CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit', |
|
364 $ JB, J3, ONE, AB( KV+1, J ), LDAB-1, |
|
365 $ WORK13, LDWORK ) |
|
366 * |
|
367 IF( I2.GT.0 ) THEN |
|
368 * |
|
369 * Update A23 |
|
370 * |
|
371 CALL DGEMM( 'No transpose', 'No transpose', I2, J3, |
|
372 $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1, |
|
373 $ WORK13, LDWORK, ONE, AB( 1+JB, J+KV ), |
|
374 $ LDAB-1 ) |
|
375 END IF |
|
376 * |
|
377 IF( I3.GT.0 ) THEN |
|
378 * |
|
379 * Update A33 |
|
380 * |
|
381 CALL DGEMM( 'No transpose', 'No transpose', I3, J3, |
|
382 $ JB, -ONE, WORK31, LDWORK, WORK13, |
|
383 $ LDWORK, ONE, AB( 1+KL, J+KV ), LDAB-1 ) |
|
384 END IF |
|
385 * |
|
386 * Copy the lower triangle of A13 back into place |
|
387 * |
|
388 DO 150 JJ = 1, J3 |
|
389 DO 140 II = JJ, JB |
|
390 AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ ) |
|
391 140 CONTINUE |
|
392 150 CONTINUE |
|
393 END IF |
|
394 ELSE |
|
395 * |
|
396 * Adjust the pivot indices. |
|
397 * |
|
398 DO 160 I = J, J + JB - 1 |
|
399 IPIV( I ) = IPIV( I ) + J - 1 |
|
400 160 CONTINUE |
|
401 END IF |
|
402 * |
|
403 * Partially undo the interchanges in the current block to |
|
404 * restore the upper triangular form of A31 and copy the upper |
|
405 * triangle of A31 back into place |
|
406 * |
|
407 DO 170 JJ = J + JB - 1, J, -1 |
|
408 JP = IPIV( JJ ) - JJ + 1 |
|
409 IF( JP.NE.1 ) THEN |
|
410 * |
|
411 * Apply interchange to columns J to JJ-1 |
|
412 * |
|
413 IF( JP+JJ-1.LT.J+KL ) THEN |
|
414 * |
|
415 * The interchange does not affect A31 |
|
416 * |
|
417 CALL DSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1, |
|
418 $ AB( KV+JP+JJ-J, J ), LDAB-1 ) |
|
419 ELSE |
|
420 * |
|
421 * The interchange does affect A31 |
|
422 * |
|
423 CALL DSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1, |
|
424 $ WORK31( JP+JJ-J-KL, 1 ), LDWORK ) |
|
425 END IF |
|
426 END IF |
|
427 * |
|
428 * Copy the current column of A31 back into place |
|
429 * |
|
430 NW = MIN( I3, JJ-J+1 ) |
|
431 IF( NW.GT.0 ) |
|
432 $ CALL DCOPY( NW, WORK31( 1, JJ-J+1 ), 1, |
|
433 $ AB( KV+KL+1-JJ+J, JJ ), 1 ) |
|
434 170 CONTINUE |
|
435 180 CONTINUE |
|
436 END IF |
|
437 * |
|
438 RETURN |
|
439 * |
|
440 * End of DGBTRF |
|
441 * |
|
442 END |