Mercurial > octave-nkf
comparison libcruft/lapack/sgbtrf.f @ 7789:82be108cc558
First attempt at single precision tyeps
* * *
corrections to qrupdate single precision routines
* * *
prefer demotion to single over promotion to double
* * *
Add single precision support to log2 function
* * *
Trivial PROJECT file update
* * *
Cache optimized hermitian/transpose methods
* * *
Add tests for tranpose/hermitian and ChangeLog entry for new transpose code
author | David Bateman <dbateman@free.fr> |
---|---|
date | Sun, 27 Apr 2008 22:34:17 +0200 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
7788:45f5faba05a2 | 7789:82be108cc558 |
---|---|
1 SUBROUTINE SGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO ) | |
2 * | |
3 * -- LAPACK routine (version 3.1) -- | |
4 * Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. | |
5 * November 2006 | |
6 * | |
7 * .. Scalar Arguments .. | |
8 INTEGER INFO, KL, KU, LDAB, M, N | |
9 * .. | |
10 * .. Array Arguments .. | |
11 INTEGER IPIV( * ) | |
12 REAL AB( LDAB, * ) | |
13 * .. | |
14 * | |
15 * Purpose | |
16 * ======= | |
17 * | |
18 * SGBTRF computes an LU factorization of a real m-by-n band matrix A | |
19 * using partial pivoting with row interchanges. | |
20 * | |
21 * This is the blocked version of the algorithm, calling Level 3 BLAS. | |
22 * | |
23 * Arguments | |
24 * ========= | |
25 * | |
26 * M (input) INTEGER | |
27 * The number of rows of the matrix A. M >= 0. | |
28 * | |
29 * N (input) INTEGER | |
30 * The number of columns of the matrix A. N >= 0. | |
31 * | |
32 * KL (input) INTEGER | |
33 * The number of subdiagonals within the band of A. KL >= 0. | |
34 * | |
35 * KU (input) INTEGER | |
36 * The number of superdiagonals within the band of A. KU >= 0. | |
37 * | |
38 * AB (input/output) REAL array, dimension (LDAB,N) | |
39 * On entry, the matrix A in band storage, in rows KL+1 to | |
40 * 2*KL+KU+1; rows 1 to KL of the array need not be set. | |
41 * The j-th column of A is stored in the j-th column of the | |
42 * array AB as follows: | |
43 * AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) | |
44 * | |
45 * On exit, details of the factorization: U is stored as an | |
46 * upper triangular band matrix with KL+KU superdiagonals in | |
47 * rows 1 to KL+KU+1, and the multipliers used during the | |
48 * factorization are stored in rows KL+KU+2 to 2*KL+KU+1. | |
49 * See below for further details. | |
50 * | |
51 * LDAB (input) INTEGER | |
52 * The leading dimension of the array AB. LDAB >= 2*KL+KU+1. | |
53 * | |
54 * IPIV (output) INTEGER array, dimension (min(M,N)) | |
55 * The pivot indices; for 1 <= i <= min(M,N), row i of the | |
56 * matrix was interchanged with row IPIV(i). | |
57 * | |
58 * INFO (output) INTEGER | |
59 * = 0: successful exit | |
60 * < 0: if INFO = -i, the i-th argument had an illegal value | |
61 * > 0: if INFO = +i, U(i,i) is exactly zero. The factorization | |
62 * has been completed, but the factor U is exactly | |
63 * singular, and division by zero will occur if it is used | |
64 * to solve a system of equations. | |
65 * | |
66 * Further Details | |
67 * =============== | |
68 * | |
69 * The band storage scheme is illustrated by the following example, when | |
70 * M = N = 6, KL = 2, KU = 1: | |
71 * | |
72 * On entry: On exit: | |
73 * | |
74 * * * * + + + * * * u14 u25 u36 | |
75 * * * + + + + * * u13 u24 u35 u46 | |
76 * * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 | |
77 * a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 | |
78 * a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * | |
79 * a31 a42 a53 a64 * * m31 m42 m53 m64 * * | |
80 * | |
81 * Array elements marked * are not used by the routine; elements marked | |
82 * + need not be set on entry, but are required by the routine to store | |
83 * elements of U because of fill-in resulting from the row interchanges. | |
84 * | |
85 * ===================================================================== | |
86 * | |
87 * .. Parameters .. | |
88 REAL ONE, ZERO | |
89 PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) | |
90 INTEGER NBMAX, LDWORK | |
91 PARAMETER ( NBMAX = 64, LDWORK = NBMAX+1 ) | |
92 * .. | |
93 * .. Local Scalars .. | |
94 INTEGER I, I2, I3, II, IP, J, J2, J3, JB, JJ, JM, JP, | |
95 $ JU, K2, KM, KV, NB, NW | |
96 REAL TEMP | |
97 * .. | |
98 * .. Local Arrays .. | |
99 REAL WORK13( LDWORK, NBMAX ), | |
100 $ WORK31( LDWORK, NBMAX ) | |
101 * .. | |
102 * .. External Functions .. | |
103 INTEGER ILAENV, ISAMAX | |
104 EXTERNAL ILAENV, ISAMAX | |
105 * .. | |
106 * .. External Subroutines .. | |
107 EXTERNAL SCOPY, SGBTF2, SGEMM, SGER, SLASWP, SSCAL, | |
108 $ SSWAP, STRSM, XERBLA | |
109 * .. | |
110 * .. Intrinsic Functions .. | |
111 INTRINSIC MAX, MIN | |
112 * .. | |
113 * .. Executable Statements .. | |
114 * | |
115 * KV is the number of superdiagonals in the factor U, allowing for | |
116 * fill-in | |
117 * | |
118 KV = KU + KL | |
119 * | |
120 * Test the input parameters. | |
121 * | |
122 INFO = 0 | |
123 IF( M.LT.0 ) THEN | |
124 INFO = -1 | |
125 ELSE IF( N.LT.0 ) THEN | |
126 INFO = -2 | |
127 ELSE IF( KL.LT.0 ) THEN | |
128 INFO = -3 | |
129 ELSE IF( KU.LT.0 ) THEN | |
130 INFO = -4 | |
131 ELSE IF( LDAB.LT.KL+KV+1 ) THEN | |
132 INFO = -6 | |
133 END IF | |
134 IF( INFO.NE.0 ) THEN | |
135 CALL XERBLA( 'SGBTRF', -INFO ) | |
136 RETURN | |
137 END IF | |
138 * | |
139 * Quick return if possible | |
140 * | |
141 IF( M.EQ.0 .OR. N.EQ.0 ) | |
142 $ RETURN | |
143 * | |
144 * Determine the block size for this environment | |
145 * | |
146 NB = ILAENV( 1, 'SGBTRF', ' ', M, N, KL, KU ) | |
147 * | |
148 * The block size must not exceed the limit set by the size of the | |
149 * local arrays WORK13 and WORK31. | |
150 * | |
151 NB = MIN( NB, NBMAX ) | |
152 * | |
153 IF( NB.LE.1 .OR. NB.GT.KL ) THEN | |
154 * | |
155 * Use unblocked code | |
156 * | |
157 CALL SGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO ) | |
158 ELSE | |
159 * | |
160 * Use blocked code | |
161 * | |
162 * Zero the superdiagonal elements of the work array WORK13 | |
163 * | |
164 DO 20 J = 1, NB | |
165 DO 10 I = 1, J - 1 | |
166 WORK13( I, J ) = ZERO | |
167 10 CONTINUE | |
168 20 CONTINUE | |
169 * | |
170 * Zero the subdiagonal elements of the work array WORK31 | |
171 * | |
172 DO 40 J = 1, NB | |
173 DO 30 I = J + 1, NB | |
174 WORK31( I, J ) = ZERO | |
175 30 CONTINUE | |
176 40 CONTINUE | |
177 * | |
178 * Gaussian elimination with partial pivoting | |
179 * | |
180 * Set fill-in elements in columns KU+2 to KV to zero | |
181 * | |
182 DO 60 J = KU + 2, MIN( KV, N ) | |
183 DO 50 I = KV - J + 2, KL | |
184 AB( I, J ) = ZERO | |
185 50 CONTINUE | |
186 60 CONTINUE | |
187 * | |
188 * JU is the index of the last column affected by the current | |
189 * stage of the factorization | |
190 * | |
191 JU = 1 | |
192 * | |
193 DO 180 J = 1, MIN( M, N ), NB | |
194 JB = MIN( NB, MIN( M, N )-J+1 ) | |
195 * | |
196 * The active part of the matrix is partitioned | |
197 * | |
198 * A11 A12 A13 | |
199 * A21 A22 A23 | |
200 * A31 A32 A33 | |
201 * | |
202 * Here A11, A21 and A31 denote the current block of JB columns | |
203 * which is about to be factorized. The number of rows in the | |
204 * partitioning are JB, I2, I3 respectively, and the numbers | |
205 * of columns are JB, J2, J3. The superdiagonal elements of A13 | |
206 * and the subdiagonal elements of A31 lie outside the band. | |
207 * | |
208 I2 = MIN( KL-JB, M-J-JB+1 ) | |
209 I3 = MIN( JB, M-J-KL+1 ) | |
210 * | |
211 * J2 and J3 are computed after JU has been updated. | |
212 * | |
213 * Factorize the current block of JB columns | |
214 * | |
215 DO 80 JJ = J, J + JB - 1 | |
216 * | |
217 * Set fill-in elements in column JJ+KV to zero | |
218 * | |
219 IF( JJ+KV.LE.N ) THEN | |
220 DO 70 I = 1, KL | |
221 AB( I, JJ+KV ) = ZERO | |
222 70 CONTINUE | |
223 END IF | |
224 * | |
225 * Find pivot and test for singularity. KM is the number of | |
226 * subdiagonal elements in the current column. | |
227 * | |
228 KM = MIN( KL, M-JJ ) | |
229 JP = ISAMAX( KM+1, AB( KV+1, JJ ), 1 ) | |
230 IPIV( JJ ) = JP + JJ - J | |
231 IF( AB( KV+JP, JJ ).NE.ZERO ) THEN | |
232 JU = MAX( JU, MIN( JJ+KU+JP-1, N ) ) | |
233 IF( JP.NE.1 ) THEN | |
234 * | |
235 * Apply interchange to columns J to J+JB-1 | |
236 * | |
237 IF( JP+JJ-1.LT.J+KL ) THEN | |
238 * | |
239 CALL SSWAP( JB, AB( KV+1+JJ-J, J ), LDAB-1, | |
240 $ AB( KV+JP+JJ-J, J ), LDAB-1 ) | |
241 ELSE | |
242 * | |
243 * The interchange affects columns J to JJ-1 of A31 | |
244 * which are stored in the work array WORK31 | |
245 * | |
246 CALL SSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1, | |
247 $ WORK31( JP+JJ-J-KL, 1 ), LDWORK ) | |
248 CALL SSWAP( J+JB-JJ, AB( KV+1, JJ ), LDAB-1, | |
249 $ AB( KV+JP, JJ ), LDAB-1 ) | |
250 END IF | |
251 END IF | |
252 * | |
253 * Compute multipliers | |
254 * | |
255 CALL SSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ), | |
256 $ 1 ) | |
257 * | |
258 * Update trailing submatrix within the band and within | |
259 * the current block. JM is the index of the last column | |
260 * which needs to be updated. | |
261 * | |
262 JM = MIN( JU, J+JB-1 ) | |
263 IF( JM.GT.JJ ) | |
264 $ CALL SGER( KM, JM-JJ, -ONE, AB( KV+2, JJ ), 1, | |
265 $ AB( KV, JJ+1 ), LDAB-1, | |
266 $ AB( KV+1, JJ+1 ), LDAB-1 ) | |
267 ELSE | |
268 * | |
269 * If pivot is zero, set INFO to the index of the pivot | |
270 * unless a zero pivot has already been found. | |
271 * | |
272 IF( INFO.EQ.0 ) | |
273 $ INFO = JJ | |
274 END IF | |
275 * | |
276 * Copy current column of A31 into the work array WORK31 | |
277 * | |
278 NW = MIN( JJ-J+1, I3 ) | |
279 IF( NW.GT.0 ) | |
280 $ CALL SCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1, | |
281 $ WORK31( 1, JJ-J+1 ), 1 ) | |
282 80 CONTINUE | |
283 IF( J+JB.LE.N ) THEN | |
284 * | |
285 * Apply the row interchanges to the other blocks. | |
286 * | |
287 J2 = MIN( JU-J+1, KV ) - JB | |
288 J3 = MAX( 0, JU-J-KV+1 ) | |
289 * | |
290 * Use SLASWP to apply the row interchanges to A12, A22, and | |
291 * A32. | |
292 * | |
293 CALL SLASWP( J2, AB( KV+1-JB, J+JB ), LDAB-1, 1, JB, | |
294 $ IPIV( J ), 1 ) | |
295 * | |
296 * Adjust the pivot indices. | |
297 * | |
298 DO 90 I = J, J + JB - 1 | |
299 IPIV( I ) = IPIV( I ) + J - 1 | |
300 90 CONTINUE | |
301 * | |
302 * Apply the row interchanges to A13, A23, and A33 | |
303 * columnwise. | |
304 * | |
305 K2 = J - 1 + JB + J2 | |
306 DO 110 I = 1, J3 | |
307 JJ = K2 + I | |
308 DO 100 II = J + I - 1, J + JB - 1 | |
309 IP = IPIV( II ) | |
310 IF( IP.NE.II ) THEN | |
311 TEMP = AB( KV+1+II-JJ, JJ ) | |
312 AB( KV+1+II-JJ, JJ ) = AB( KV+1+IP-JJ, JJ ) | |
313 AB( KV+1+IP-JJ, JJ ) = TEMP | |
314 END IF | |
315 100 CONTINUE | |
316 110 CONTINUE | |
317 * | |
318 * Update the relevant part of the trailing submatrix | |
319 * | |
320 IF( J2.GT.0 ) THEN | |
321 * | |
322 * Update A12 | |
323 * | |
324 CALL STRSM( 'Left', 'Lower', 'No transpose', 'Unit', | |
325 $ JB, J2, ONE, AB( KV+1, J ), LDAB-1, | |
326 $ AB( KV+1-JB, J+JB ), LDAB-1 ) | |
327 * | |
328 IF( I2.GT.0 ) THEN | |
329 * | |
330 * Update A22 | |
331 * | |
332 CALL SGEMM( 'No transpose', 'No transpose', I2, J2, | |
333 $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1, | |
334 $ AB( KV+1-JB, J+JB ), LDAB-1, ONE, | |
335 $ AB( KV+1, J+JB ), LDAB-1 ) | |
336 END IF | |
337 * | |
338 IF( I3.GT.0 ) THEN | |
339 * | |
340 * Update A32 | |
341 * | |
342 CALL SGEMM( 'No transpose', 'No transpose', I3, J2, | |
343 $ JB, -ONE, WORK31, LDWORK, | |
344 $ AB( KV+1-JB, J+JB ), LDAB-1, ONE, | |
345 $ AB( KV+KL+1-JB, J+JB ), LDAB-1 ) | |
346 END IF | |
347 END IF | |
348 * | |
349 IF( J3.GT.0 ) THEN | |
350 * | |
351 * Copy the lower triangle of A13 into the work array | |
352 * WORK13 | |
353 * | |
354 DO 130 JJ = 1, J3 | |
355 DO 120 II = JJ, JB | |
356 WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 ) | |
357 120 CONTINUE | |
358 130 CONTINUE | |
359 * | |
360 * Update A13 in the work array | |
361 * | |
362 CALL STRSM( 'Left', 'Lower', 'No transpose', 'Unit', | |
363 $ JB, J3, ONE, AB( KV+1, J ), LDAB-1, | |
364 $ WORK13, LDWORK ) | |
365 * | |
366 IF( I2.GT.0 ) THEN | |
367 * | |
368 * Update A23 | |
369 * | |
370 CALL SGEMM( 'No transpose', 'No transpose', I2, J3, | |
371 $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1, | |
372 $ WORK13, LDWORK, ONE, AB( 1+JB, J+KV ), | |
373 $ LDAB-1 ) | |
374 END IF | |
375 * | |
376 IF( I3.GT.0 ) THEN | |
377 * | |
378 * Update A33 | |
379 * | |
380 CALL SGEMM( 'No transpose', 'No transpose', I3, J3, | |
381 $ JB, -ONE, WORK31, LDWORK, WORK13, | |
382 $ LDWORK, ONE, AB( 1+KL, J+KV ), LDAB-1 ) | |
383 END IF | |
384 * | |
385 * Copy the lower triangle of A13 back into place | |
386 * | |
387 DO 150 JJ = 1, J3 | |
388 DO 140 II = JJ, JB | |
389 AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ ) | |
390 140 CONTINUE | |
391 150 CONTINUE | |
392 END IF | |
393 ELSE | |
394 * | |
395 * Adjust the pivot indices. | |
396 * | |
397 DO 160 I = J, J + JB - 1 | |
398 IPIV( I ) = IPIV( I ) + J - 1 | |
399 160 CONTINUE | |
400 END IF | |
401 * | |
402 * Partially undo the interchanges in the current block to | |
403 * restore the upper triangular form of A31 and copy the upper | |
404 * triangle of A31 back into place | |
405 * | |
406 DO 170 JJ = J + JB - 1, J, -1 | |
407 JP = IPIV( JJ ) - JJ + 1 | |
408 IF( JP.NE.1 ) THEN | |
409 * | |
410 * Apply interchange to columns J to JJ-1 | |
411 * | |
412 IF( JP+JJ-1.LT.J+KL ) THEN | |
413 * | |
414 * The interchange does not affect A31 | |
415 * | |
416 CALL SSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1, | |
417 $ AB( KV+JP+JJ-J, J ), LDAB-1 ) | |
418 ELSE | |
419 * | |
420 * The interchange does affect A31 | |
421 * | |
422 CALL SSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1, | |
423 $ WORK31( JP+JJ-J-KL, 1 ), LDWORK ) | |
424 END IF | |
425 END IF | |
426 * | |
427 * Copy the current column of A31 back into place | |
428 * | |
429 NW = MIN( I3, JJ-J+1 ) | |
430 IF( NW.GT.0 ) | |
431 $ CALL SCOPY( NW, WORK31( 1, JJ-J+1 ), 1, | |
432 $ AB( KV+KL+1-JJ+J, JJ ), 1 ) | |
433 170 CONTINUE | |
434 180 CONTINUE | |
435 END IF | |
436 * | |
437 RETURN | |
438 * | |
439 * End of SGBTRF | |
440 * | |
441 END |