comparison libcruft/lapack/sgbtrf.f @ 7789:82be108cc558

First attempt at single precision tyeps * * * corrections to qrupdate single precision routines * * * prefer demotion to single over promotion to double * * * Add single precision support to log2 function * * * Trivial PROJECT file update * * * Cache optimized hermitian/transpose methods * * * Add tests for tranpose/hermitian and ChangeLog entry for new transpose code
author David Bateman <dbateman@free.fr>
date Sun, 27 Apr 2008 22:34:17 +0200
parents
children
comparison
equal deleted inserted replaced
7788:45f5faba05a2 7789:82be108cc558
1 SUBROUTINE SGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO )
2 *
3 * -- LAPACK routine (version 3.1) --
4 * Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
5 * November 2006
6 *
7 * .. Scalar Arguments ..
8 INTEGER INFO, KL, KU, LDAB, M, N
9 * ..
10 * .. Array Arguments ..
11 INTEGER IPIV( * )
12 REAL AB( LDAB, * )
13 * ..
14 *
15 * Purpose
16 * =======
17 *
18 * SGBTRF computes an LU factorization of a real m-by-n band matrix A
19 * using partial pivoting with row interchanges.
20 *
21 * This is the blocked version of the algorithm, calling Level 3 BLAS.
22 *
23 * Arguments
24 * =========
25 *
26 * M (input) INTEGER
27 * The number of rows of the matrix A. M >= 0.
28 *
29 * N (input) INTEGER
30 * The number of columns of the matrix A. N >= 0.
31 *
32 * KL (input) INTEGER
33 * The number of subdiagonals within the band of A. KL >= 0.
34 *
35 * KU (input) INTEGER
36 * The number of superdiagonals within the band of A. KU >= 0.
37 *
38 * AB (input/output) REAL array, dimension (LDAB,N)
39 * On entry, the matrix A in band storage, in rows KL+1 to
40 * 2*KL+KU+1; rows 1 to KL of the array need not be set.
41 * The j-th column of A is stored in the j-th column of the
42 * array AB as follows:
43 * AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)
44 *
45 * On exit, details of the factorization: U is stored as an
46 * upper triangular band matrix with KL+KU superdiagonals in
47 * rows 1 to KL+KU+1, and the multipliers used during the
48 * factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
49 * See below for further details.
50 *
51 * LDAB (input) INTEGER
52 * The leading dimension of the array AB. LDAB >= 2*KL+KU+1.
53 *
54 * IPIV (output) INTEGER array, dimension (min(M,N))
55 * The pivot indices; for 1 <= i <= min(M,N), row i of the
56 * matrix was interchanged with row IPIV(i).
57 *
58 * INFO (output) INTEGER
59 * = 0: successful exit
60 * < 0: if INFO = -i, the i-th argument had an illegal value
61 * > 0: if INFO = +i, U(i,i) is exactly zero. The factorization
62 * has been completed, but the factor U is exactly
63 * singular, and division by zero will occur if it is used
64 * to solve a system of equations.
65 *
66 * Further Details
67 * ===============
68 *
69 * The band storage scheme is illustrated by the following example, when
70 * M = N = 6, KL = 2, KU = 1:
71 *
72 * On entry: On exit:
73 *
74 * * * * + + + * * * u14 u25 u36
75 * * * + + + + * * u13 u24 u35 u46
76 * * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56
77 * a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66
78 * a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 *
79 * a31 a42 a53 a64 * * m31 m42 m53 m64 * *
80 *
81 * Array elements marked * are not used by the routine; elements marked
82 * + need not be set on entry, but are required by the routine to store
83 * elements of U because of fill-in resulting from the row interchanges.
84 *
85 * =====================================================================
86 *
87 * .. Parameters ..
88 REAL ONE, ZERO
89 PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 )
90 INTEGER NBMAX, LDWORK
91 PARAMETER ( NBMAX = 64, LDWORK = NBMAX+1 )
92 * ..
93 * .. Local Scalars ..
94 INTEGER I, I2, I3, II, IP, J, J2, J3, JB, JJ, JM, JP,
95 $ JU, K2, KM, KV, NB, NW
96 REAL TEMP
97 * ..
98 * .. Local Arrays ..
99 REAL WORK13( LDWORK, NBMAX ),
100 $ WORK31( LDWORK, NBMAX )
101 * ..
102 * .. External Functions ..
103 INTEGER ILAENV, ISAMAX
104 EXTERNAL ILAENV, ISAMAX
105 * ..
106 * .. External Subroutines ..
107 EXTERNAL SCOPY, SGBTF2, SGEMM, SGER, SLASWP, SSCAL,
108 $ SSWAP, STRSM, XERBLA
109 * ..
110 * .. Intrinsic Functions ..
111 INTRINSIC MAX, MIN
112 * ..
113 * .. Executable Statements ..
114 *
115 * KV is the number of superdiagonals in the factor U, allowing for
116 * fill-in
117 *
118 KV = KU + KL
119 *
120 * Test the input parameters.
121 *
122 INFO = 0
123 IF( M.LT.0 ) THEN
124 INFO = -1
125 ELSE IF( N.LT.0 ) THEN
126 INFO = -2
127 ELSE IF( KL.LT.0 ) THEN
128 INFO = -3
129 ELSE IF( KU.LT.0 ) THEN
130 INFO = -4
131 ELSE IF( LDAB.LT.KL+KV+1 ) THEN
132 INFO = -6
133 END IF
134 IF( INFO.NE.0 ) THEN
135 CALL XERBLA( 'SGBTRF', -INFO )
136 RETURN
137 END IF
138 *
139 * Quick return if possible
140 *
141 IF( M.EQ.0 .OR. N.EQ.0 )
142 $ RETURN
143 *
144 * Determine the block size for this environment
145 *
146 NB = ILAENV( 1, 'SGBTRF', ' ', M, N, KL, KU )
147 *
148 * The block size must not exceed the limit set by the size of the
149 * local arrays WORK13 and WORK31.
150 *
151 NB = MIN( NB, NBMAX )
152 *
153 IF( NB.LE.1 .OR. NB.GT.KL ) THEN
154 *
155 * Use unblocked code
156 *
157 CALL SGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO )
158 ELSE
159 *
160 * Use blocked code
161 *
162 * Zero the superdiagonal elements of the work array WORK13
163 *
164 DO 20 J = 1, NB
165 DO 10 I = 1, J - 1
166 WORK13( I, J ) = ZERO
167 10 CONTINUE
168 20 CONTINUE
169 *
170 * Zero the subdiagonal elements of the work array WORK31
171 *
172 DO 40 J = 1, NB
173 DO 30 I = J + 1, NB
174 WORK31( I, J ) = ZERO
175 30 CONTINUE
176 40 CONTINUE
177 *
178 * Gaussian elimination with partial pivoting
179 *
180 * Set fill-in elements in columns KU+2 to KV to zero
181 *
182 DO 60 J = KU + 2, MIN( KV, N )
183 DO 50 I = KV - J + 2, KL
184 AB( I, J ) = ZERO
185 50 CONTINUE
186 60 CONTINUE
187 *
188 * JU is the index of the last column affected by the current
189 * stage of the factorization
190 *
191 JU = 1
192 *
193 DO 180 J = 1, MIN( M, N ), NB
194 JB = MIN( NB, MIN( M, N )-J+1 )
195 *
196 * The active part of the matrix is partitioned
197 *
198 * A11 A12 A13
199 * A21 A22 A23
200 * A31 A32 A33
201 *
202 * Here A11, A21 and A31 denote the current block of JB columns
203 * which is about to be factorized. The number of rows in the
204 * partitioning are JB, I2, I3 respectively, and the numbers
205 * of columns are JB, J2, J3. The superdiagonal elements of A13
206 * and the subdiagonal elements of A31 lie outside the band.
207 *
208 I2 = MIN( KL-JB, M-J-JB+1 )
209 I3 = MIN( JB, M-J-KL+1 )
210 *
211 * J2 and J3 are computed after JU has been updated.
212 *
213 * Factorize the current block of JB columns
214 *
215 DO 80 JJ = J, J + JB - 1
216 *
217 * Set fill-in elements in column JJ+KV to zero
218 *
219 IF( JJ+KV.LE.N ) THEN
220 DO 70 I = 1, KL
221 AB( I, JJ+KV ) = ZERO
222 70 CONTINUE
223 END IF
224 *
225 * Find pivot and test for singularity. KM is the number of
226 * subdiagonal elements in the current column.
227 *
228 KM = MIN( KL, M-JJ )
229 JP = ISAMAX( KM+1, AB( KV+1, JJ ), 1 )
230 IPIV( JJ ) = JP + JJ - J
231 IF( AB( KV+JP, JJ ).NE.ZERO ) THEN
232 JU = MAX( JU, MIN( JJ+KU+JP-1, N ) )
233 IF( JP.NE.1 ) THEN
234 *
235 * Apply interchange to columns J to J+JB-1
236 *
237 IF( JP+JJ-1.LT.J+KL ) THEN
238 *
239 CALL SSWAP( JB, AB( KV+1+JJ-J, J ), LDAB-1,
240 $ AB( KV+JP+JJ-J, J ), LDAB-1 )
241 ELSE
242 *
243 * The interchange affects columns J to JJ-1 of A31
244 * which are stored in the work array WORK31
245 *
246 CALL SSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
247 $ WORK31( JP+JJ-J-KL, 1 ), LDWORK )
248 CALL SSWAP( J+JB-JJ, AB( KV+1, JJ ), LDAB-1,
249 $ AB( KV+JP, JJ ), LDAB-1 )
250 END IF
251 END IF
252 *
253 * Compute multipliers
254 *
255 CALL SSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ),
256 $ 1 )
257 *
258 * Update trailing submatrix within the band and within
259 * the current block. JM is the index of the last column
260 * which needs to be updated.
261 *
262 JM = MIN( JU, J+JB-1 )
263 IF( JM.GT.JJ )
264 $ CALL SGER( KM, JM-JJ, -ONE, AB( KV+2, JJ ), 1,
265 $ AB( KV, JJ+1 ), LDAB-1,
266 $ AB( KV+1, JJ+1 ), LDAB-1 )
267 ELSE
268 *
269 * If pivot is zero, set INFO to the index of the pivot
270 * unless a zero pivot has already been found.
271 *
272 IF( INFO.EQ.0 )
273 $ INFO = JJ
274 END IF
275 *
276 * Copy current column of A31 into the work array WORK31
277 *
278 NW = MIN( JJ-J+1, I3 )
279 IF( NW.GT.0 )
280 $ CALL SCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1,
281 $ WORK31( 1, JJ-J+1 ), 1 )
282 80 CONTINUE
283 IF( J+JB.LE.N ) THEN
284 *
285 * Apply the row interchanges to the other blocks.
286 *
287 J2 = MIN( JU-J+1, KV ) - JB
288 J3 = MAX( 0, JU-J-KV+1 )
289 *
290 * Use SLASWP to apply the row interchanges to A12, A22, and
291 * A32.
292 *
293 CALL SLASWP( J2, AB( KV+1-JB, J+JB ), LDAB-1, 1, JB,
294 $ IPIV( J ), 1 )
295 *
296 * Adjust the pivot indices.
297 *
298 DO 90 I = J, J + JB - 1
299 IPIV( I ) = IPIV( I ) + J - 1
300 90 CONTINUE
301 *
302 * Apply the row interchanges to A13, A23, and A33
303 * columnwise.
304 *
305 K2 = J - 1 + JB + J2
306 DO 110 I = 1, J3
307 JJ = K2 + I
308 DO 100 II = J + I - 1, J + JB - 1
309 IP = IPIV( II )
310 IF( IP.NE.II ) THEN
311 TEMP = AB( KV+1+II-JJ, JJ )
312 AB( KV+1+II-JJ, JJ ) = AB( KV+1+IP-JJ, JJ )
313 AB( KV+1+IP-JJ, JJ ) = TEMP
314 END IF
315 100 CONTINUE
316 110 CONTINUE
317 *
318 * Update the relevant part of the trailing submatrix
319 *
320 IF( J2.GT.0 ) THEN
321 *
322 * Update A12
323 *
324 CALL STRSM( 'Left', 'Lower', 'No transpose', 'Unit',
325 $ JB, J2, ONE, AB( KV+1, J ), LDAB-1,
326 $ AB( KV+1-JB, J+JB ), LDAB-1 )
327 *
328 IF( I2.GT.0 ) THEN
329 *
330 * Update A22
331 *
332 CALL SGEMM( 'No transpose', 'No transpose', I2, J2,
333 $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
334 $ AB( KV+1-JB, J+JB ), LDAB-1, ONE,
335 $ AB( KV+1, J+JB ), LDAB-1 )
336 END IF
337 *
338 IF( I3.GT.0 ) THEN
339 *
340 * Update A32
341 *
342 CALL SGEMM( 'No transpose', 'No transpose', I3, J2,
343 $ JB, -ONE, WORK31, LDWORK,
344 $ AB( KV+1-JB, J+JB ), LDAB-1, ONE,
345 $ AB( KV+KL+1-JB, J+JB ), LDAB-1 )
346 END IF
347 END IF
348 *
349 IF( J3.GT.0 ) THEN
350 *
351 * Copy the lower triangle of A13 into the work array
352 * WORK13
353 *
354 DO 130 JJ = 1, J3
355 DO 120 II = JJ, JB
356 WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 )
357 120 CONTINUE
358 130 CONTINUE
359 *
360 * Update A13 in the work array
361 *
362 CALL STRSM( 'Left', 'Lower', 'No transpose', 'Unit',
363 $ JB, J3, ONE, AB( KV+1, J ), LDAB-1,
364 $ WORK13, LDWORK )
365 *
366 IF( I2.GT.0 ) THEN
367 *
368 * Update A23
369 *
370 CALL SGEMM( 'No transpose', 'No transpose', I2, J3,
371 $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
372 $ WORK13, LDWORK, ONE, AB( 1+JB, J+KV ),
373 $ LDAB-1 )
374 END IF
375 *
376 IF( I3.GT.0 ) THEN
377 *
378 * Update A33
379 *
380 CALL SGEMM( 'No transpose', 'No transpose', I3, J3,
381 $ JB, -ONE, WORK31, LDWORK, WORK13,
382 $ LDWORK, ONE, AB( 1+KL, J+KV ), LDAB-1 )
383 END IF
384 *
385 * Copy the lower triangle of A13 back into place
386 *
387 DO 150 JJ = 1, J3
388 DO 140 II = JJ, JB
389 AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ )
390 140 CONTINUE
391 150 CONTINUE
392 END IF
393 ELSE
394 *
395 * Adjust the pivot indices.
396 *
397 DO 160 I = J, J + JB - 1
398 IPIV( I ) = IPIV( I ) + J - 1
399 160 CONTINUE
400 END IF
401 *
402 * Partially undo the interchanges in the current block to
403 * restore the upper triangular form of A31 and copy the upper
404 * triangle of A31 back into place
405 *
406 DO 170 JJ = J + JB - 1, J, -1
407 JP = IPIV( JJ ) - JJ + 1
408 IF( JP.NE.1 ) THEN
409 *
410 * Apply interchange to columns J to JJ-1
411 *
412 IF( JP+JJ-1.LT.J+KL ) THEN
413 *
414 * The interchange does not affect A31
415 *
416 CALL SSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
417 $ AB( KV+JP+JJ-J, J ), LDAB-1 )
418 ELSE
419 *
420 * The interchange does affect A31
421 *
422 CALL SSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
423 $ WORK31( JP+JJ-J-KL, 1 ), LDWORK )
424 END IF
425 END IF
426 *
427 * Copy the current column of A31 back into place
428 *
429 NW = MIN( I3, JJ-J+1 )
430 IF( NW.GT.0 )
431 $ CALL SCOPY( NW, WORK31( 1, JJ-J+1 ), 1,
432 $ AB( KV+KL+1-JJ+J, JJ ), 1 )
433 170 CONTINUE
434 180 CONTINUE
435 END IF
436 *
437 RETURN
438 *
439 * End of SGBTRF
440 *
441 END