comparison libcruft/lapack/clalsa.f @ 7789:82be108cc558

First attempt at single precision tyeps * * * corrections to qrupdate single precision routines * * * prefer demotion to single over promotion to double * * * Add single precision support to log2 function * * * Trivial PROJECT file update * * * Cache optimized hermitian/transpose methods * * * Add tests for tranpose/hermitian and ChangeLog entry for new transpose code
author David Bateman <dbateman@free.fr>
date Sun, 27 Apr 2008 22:34:17 +0200
parents
children
comparison
equal deleted inserted replaced
7788:45f5faba05a2 7789:82be108cc558
1 SUBROUTINE CLALSA( ICOMPQ, SMLSIZ, N, NRHS, B, LDB, BX, LDBX, U,
2 $ LDU, VT, K, DIFL, DIFR, Z, POLES, GIVPTR,
3 $ GIVCOL, LDGCOL, PERM, GIVNUM, C, S, RWORK,
4 $ IWORK, INFO )
5 *
6 * -- LAPACK routine (version 3.1) --
7 * Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
8 * November 2006
9 *
10 * .. Scalar Arguments ..
11 INTEGER ICOMPQ, INFO, LDB, LDBX, LDGCOL, LDU, N, NRHS,
12 $ SMLSIZ
13 * ..
14 * .. Array Arguments ..
15 INTEGER GIVCOL( LDGCOL, * ), GIVPTR( * ), IWORK( * ),
16 $ K( * ), PERM( LDGCOL, * )
17 REAL C( * ), DIFL( LDU, * ), DIFR( LDU, * ),
18 $ GIVNUM( LDU, * ), POLES( LDU, * ), RWORK( * ),
19 $ S( * ), U( LDU, * ), VT( LDU, * ), Z( LDU, * )
20 COMPLEX B( LDB, * ), BX( LDBX, * )
21 * ..
22 *
23 * Purpose
24 * =======
25 *
26 * CLALSA is an itermediate step in solving the least squares problem
27 * by computing the SVD of the coefficient matrix in compact form (The
28 * singular vectors are computed as products of simple orthorgonal
29 * matrices.).
30 *
31 * If ICOMPQ = 0, CLALSA applies the inverse of the left singular vector
32 * matrix of an upper bidiagonal matrix to the right hand side; and if
33 * ICOMPQ = 1, CLALSA applies the right singular vector matrix to the
34 * right hand side. The singular vector matrices were generated in
35 * compact form by CLALSA.
36 *
37 * Arguments
38 * =========
39 *
40 * ICOMPQ (input) INTEGER
41 * Specifies whether the left or the right singular vector
42 * matrix is involved.
43 * = 0: Left singular vector matrix
44 * = 1: Right singular vector matrix
45 *
46 * SMLSIZ (input) INTEGER
47 * The maximum size of the subproblems at the bottom of the
48 * computation tree.
49 *
50 * N (input) INTEGER
51 * The row and column dimensions of the upper bidiagonal matrix.
52 *
53 * NRHS (input) INTEGER
54 * The number of columns of B and BX. NRHS must be at least 1.
55 *
56 * B (input/output) COMPLEX array, dimension ( LDB, NRHS )
57 * On input, B contains the right hand sides of the least
58 * squares problem in rows 1 through M.
59 * On output, B contains the solution X in rows 1 through N.
60 *
61 * LDB (input) INTEGER
62 * The leading dimension of B in the calling subprogram.
63 * LDB must be at least max(1,MAX( M, N ) ).
64 *
65 * BX (output) COMPLEX array, dimension ( LDBX, NRHS )
66 * On exit, the result of applying the left or right singular
67 * vector matrix to B.
68 *
69 * LDBX (input) INTEGER
70 * The leading dimension of BX.
71 *
72 * U (input) REAL array, dimension ( LDU, SMLSIZ ).
73 * On entry, U contains the left singular vector matrices of all
74 * subproblems at the bottom level.
75 *
76 * LDU (input) INTEGER, LDU = > N.
77 * The leading dimension of arrays U, VT, DIFL, DIFR,
78 * POLES, GIVNUM, and Z.
79 *
80 * VT (input) REAL array, dimension ( LDU, SMLSIZ+1 ).
81 * On entry, VT' contains the right singular vector matrices of
82 * all subproblems at the bottom level.
83 *
84 * K (input) INTEGER array, dimension ( N ).
85 *
86 * DIFL (input) REAL array, dimension ( LDU, NLVL ).
87 * where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1.
88 *
89 * DIFR (input) REAL array, dimension ( LDU, 2 * NLVL ).
90 * On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record
91 * distances between singular values on the I-th level and
92 * singular values on the (I -1)-th level, and DIFR(*, 2 * I)
93 * record the normalizing factors of the right singular vectors
94 * matrices of subproblems on I-th level.
95 *
96 * Z (input) REAL array, dimension ( LDU, NLVL ).
97 * On entry, Z(1, I) contains the components of the deflation-
98 * adjusted updating row vector for subproblems on the I-th
99 * level.
100 *
101 * POLES (input) REAL array, dimension ( LDU, 2 * NLVL ).
102 * On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old
103 * singular values involved in the secular equations on the I-th
104 * level.
105 *
106 * GIVPTR (input) INTEGER array, dimension ( N ).
107 * On entry, GIVPTR( I ) records the number of Givens
108 * rotations performed on the I-th problem on the computation
109 * tree.
110 *
111 * GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 * NLVL ).
112 * On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the
113 * locations of Givens rotations performed on the I-th level on
114 * the computation tree.
115 *
116 * LDGCOL (input) INTEGER, LDGCOL = > N.
117 * The leading dimension of arrays GIVCOL and PERM.
118 *
119 * PERM (input) INTEGER array, dimension ( LDGCOL, NLVL ).
120 * On entry, PERM(*, I) records permutations done on the I-th
121 * level of the computation tree.
122 *
123 * GIVNUM (input) REAL array, dimension ( LDU, 2 * NLVL ).
124 * On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S-
125 * values of Givens rotations performed on the I-th level on the
126 * computation tree.
127 *
128 * C (input) REAL array, dimension ( N ).
129 * On entry, if the I-th subproblem is not square,
130 * C( I ) contains the C-value of a Givens rotation related to
131 * the right null space of the I-th subproblem.
132 *
133 * S (input) REAL array, dimension ( N ).
134 * On entry, if the I-th subproblem is not square,
135 * S( I ) contains the S-value of a Givens rotation related to
136 * the right null space of the I-th subproblem.
137 *
138 * RWORK (workspace) REAL array, dimension at least
139 * max ( N, (SMLSZ+1)*NRHS*3 ).
140 *
141 * IWORK (workspace) INTEGER array.
142 * The dimension must be at least 3 * N
143 *
144 * INFO (output) INTEGER
145 * = 0: successful exit.
146 * < 0: if INFO = -i, the i-th argument had an illegal value.
147 *
148 * Further Details
149 * ===============
150 *
151 * Based on contributions by
152 * Ming Gu and Ren-Cang Li, Computer Science Division, University of
153 * California at Berkeley, USA
154 * Osni Marques, LBNL/NERSC, USA
155 *
156 * =====================================================================
157 *
158 * .. Parameters ..
159 REAL ZERO, ONE
160 PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 )
161 * ..
162 * .. Local Scalars ..
163 INTEGER I, I1, IC, IM1, INODE, J, JCOL, JIMAG, JREAL,
164 $ JROW, LF, LL, LVL, LVL2, ND, NDB1, NDIML,
165 $ NDIMR, NL, NLF, NLP1, NLVL, NR, NRF, NRP1, SQRE
166 * ..
167 * .. External Subroutines ..
168 EXTERNAL CCOPY, CLALS0, SGEMM, SLASDT, XERBLA
169 * ..
170 * .. Intrinsic Functions ..
171 INTRINSIC AIMAG, CMPLX, REAL
172 * ..
173 * .. Executable Statements ..
174 *
175 * Test the input parameters.
176 *
177 INFO = 0
178 *
179 IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN
180 INFO = -1
181 ELSE IF( SMLSIZ.LT.3 ) THEN
182 INFO = -2
183 ELSE IF( N.LT.SMLSIZ ) THEN
184 INFO = -3
185 ELSE IF( NRHS.LT.1 ) THEN
186 INFO = -4
187 ELSE IF( LDB.LT.N ) THEN
188 INFO = -6
189 ELSE IF( LDBX.LT.N ) THEN
190 INFO = -8
191 ELSE IF( LDU.LT.N ) THEN
192 INFO = -10
193 ELSE IF( LDGCOL.LT.N ) THEN
194 INFO = -19
195 END IF
196 IF( INFO.NE.0 ) THEN
197 CALL XERBLA( 'CLALSA', -INFO )
198 RETURN
199 END IF
200 *
201 * Book-keeping and setting up the computation tree.
202 *
203 INODE = 1
204 NDIML = INODE + N
205 NDIMR = NDIML + N
206 *
207 CALL SLASDT( N, NLVL, ND, IWORK( INODE ), IWORK( NDIML ),
208 $ IWORK( NDIMR ), SMLSIZ )
209 *
210 * The following code applies back the left singular vector factors.
211 * For applying back the right singular vector factors, go to 170.
212 *
213 IF( ICOMPQ.EQ.1 ) THEN
214 GO TO 170
215 END IF
216 *
217 * The nodes on the bottom level of the tree were solved
218 * by SLASDQ. The corresponding left and right singular vector
219 * matrices are in explicit form. First apply back the left
220 * singular vector matrices.
221 *
222 NDB1 = ( ND+1 ) / 2
223 DO 130 I = NDB1, ND
224 *
225 * IC : center row of each node
226 * NL : number of rows of left subproblem
227 * NR : number of rows of right subproblem
228 * NLF: starting row of the left subproblem
229 * NRF: starting row of the right subproblem
230 *
231 I1 = I - 1
232 IC = IWORK( INODE+I1 )
233 NL = IWORK( NDIML+I1 )
234 NR = IWORK( NDIMR+I1 )
235 NLF = IC - NL
236 NRF = IC + 1
237 *
238 * Since B and BX are complex, the following call to SGEMM
239 * is performed in two steps (real and imaginary parts).
240 *
241 * CALL SGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU,
242 * $ B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX )
243 *
244 J = NL*NRHS*2
245 DO 20 JCOL = 1, NRHS
246 DO 10 JROW = NLF, NLF + NL - 1
247 J = J + 1
248 RWORK( J ) = REAL( B( JROW, JCOL ) )
249 10 CONTINUE
250 20 CONTINUE
251 CALL SGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU,
252 $ RWORK( 1+NL*NRHS*2 ), NL, ZERO, RWORK( 1 ), NL )
253 J = NL*NRHS*2
254 DO 40 JCOL = 1, NRHS
255 DO 30 JROW = NLF, NLF + NL - 1
256 J = J + 1
257 RWORK( J ) = AIMAG( B( JROW, JCOL ) )
258 30 CONTINUE
259 40 CONTINUE
260 CALL SGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU,
261 $ RWORK( 1+NL*NRHS*2 ), NL, ZERO, RWORK( 1+NL*NRHS ),
262 $ NL )
263 JREAL = 0
264 JIMAG = NL*NRHS
265 DO 60 JCOL = 1, NRHS
266 DO 50 JROW = NLF, NLF + NL - 1
267 JREAL = JREAL + 1
268 JIMAG = JIMAG + 1
269 BX( JROW, JCOL ) = CMPLX( RWORK( JREAL ),
270 $ RWORK( JIMAG ) )
271 50 CONTINUE
272 60 CONTINUE
273 *
274 * Since B and BX are complex, the following call to SGEMM
275 * is performed in two steps (real and imaginary parts).
276 *
277 * CALL SGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU,
278 * $ B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX )
279 *
280 J = NR*NRHS*2
281 DO 80 JCOL = 1, NRHS
282 DO 70 JROW = NRF, NRF + NR - 1
283 J = J + 1
284 RWORK( J ) = REAL( B( JROW, JCOL ) )
285 70 CONTINUE
286 80 CONTINUE
287 CALL SGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU,
288 $ RWORK( 1+NR*NRHS*2 ), NR, ZERO, RWORK( 1 ), NR )
289 J = NR*NRHS*2
290 DO 100 JCOL = 1, NRHS
291 DO 90 JROW = NRF, NRF + NR - 1
292 J = J + 1
293 RWORK( J ) = AIMAG( B( JROW, JCOL ) )
294 90 CONTINUE
295 100 CONTINUE
296 CALL SGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU,
297 $ RWORK( 1+NR*NRHS*2 ), NR, ZERO, RWORK( 1+NR*NRHS ),
298 $ NR )
299 JREAL = 0
300 JIMAG = NR*NRHS
301 DO 120 JCOL = 1, NRHS
302 DO 110 JROW = NRF, NRF + NR - 1
303 JREAL = JREAL + 1
304 JIMAG = JIMAG + 1
305 BX( JROW, JCOL ) = CMPLX( RWORK( JREAL ),
306 $ RWORK( JIMAG ) )
307 110 CONTINUE
308 120 CONTINUE
309 *
310 130 CONTINUE
311 *
312 * Next copy the rows of B that correspond to unchanged rows
313 * in the bidiagonal matrix to BX.
314 *
315 DO 140 I = 1, ND
316 IC = IWORK( INODE+I-1 )
317 CALL CCOPY( NRHS, B( IC, 1 ), LDB, BX( IC, 1 ), LDBX )
318 140 CONTINUE
319 *
320 * Finally go through the left singular vector matrices of all
321 * the other subproblems bottom-up on the tree.
322 *
323 J = 2**NLVL
324 SQRE = 0
325 *
326 DO 160 LVL = NLVL, 1, -1
327 LVL2 = 2*LVL - 1
328 *
329 * find the first node LF and last node LL on
330 * the current level LVL
331 *
332 IF( LVL.EQ.1 ) THEN
333 LF = 1
334 LL = 1
335 ELSE
336 LF = 2**( LVL-1 )
337 LL = 2*LF - 1
338 END IF
339 DO 150 I = LF, LL
340 IM1 = I - 1
341 IC = IWORK( INODE+IM1 )
342 NL = IWORK( NDIML+IM1 )
343 NR = IWORK( NDIMR+IM1 )
344 NLF = IC - NL
345 NRF = IC + 1
346 J = J - 1
347 CALL CLALS0( ICOMPQ, NL, NR, SQRE, NRHS, BX( NLF, 1 ), LDBX,
348 $ B( NLF, 1 ), LDB, PERM( NLF, LVL ),
349 $ GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL,
350 $ GIVNUM( NLF, LVL2 ), LDU, POLES( NLF, LVL2 ),
351 $ DIFL( NLF, LVL ), DIFR( NLF, LVL2 ),
352 $ Z( NLF, LVL ), K( J ), C( J ), S( J ), RWORK,
353 $ INFO )
354 150 CONTINUE
355 160 CONTINUE
356 GO TO 330
357 *
358 * ICOMPQ = 1: applying back the right singular vector factors.
359 *
360 170 CONTINUE
361 *
362 * First now go through the right singular vector matrices of all
363 * the tree nodes top-down.
364 *
365 J = 0
366 DO 190 LVL = 1, NLVL
367 LVL2 = 2*LVL - 1
368 *
369 * Find the first node LF and last node LL on
370 * the current level LVL.
371 *
372 IF( LVL.EQ.1 ) THEN
373 LF = 1
374 LL = 1
375 ELSE
376 LF = 2**( LVL-1 )
377 LL = 2*LF - 1
378 END IF
379 DO 180 I = LL, LF, -1
380 IM1 = I - 1
381 IC = IWORK( INODE+IM1 )
382 NL = IWORK( NDIML+IM1 )
383 NR = IWORK( NDIMR+IM1 )
384 NLF = IC - NL
385 NRF = IC + 1
386 IF( I.EQ.LL ) THEN
387 SQRE = 0
388 ELSE
389 SQRE = 1
390 END IF
391 J = J + 1
392 CALL CLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B( NLF, 1 ), LDB,
393 $ BX( NLF, 1 ), LDBX, PERM( NLF, LVL ),
394 $ GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL,
395 $ GIVNUM( NLF, LVL2 ), LDU, POLES( NLF, LVL2 ),
396 $ DIFL( NLF, LVL ), DIFR( NLF, LVL2 ),
397 $ Z( NLF, LVL ), K( J ), C( J ), S( J ), RWORK,
398 $ INFO )
399 180 CONTINUE
400 190 CONTINUE
401 *
402 * The nodes on the bottom level of the tree were solved
403 * by SLASDQ. The corresponding right singular vector
404 * matrices are in explicit form. Apply them back.
405 *
406 NDB1 = ( ND+1 ) / 2
407 DO 320 I = NDB1, ND
408 I1 = I - 1
409 IC = IWORK( INODE+I1 )
410 NL = IWORK( NDIML+I1 )
411 NR = IWORK( NDIMR+I1 )
412 NLP1 = NL + 1
413 IF( I.EQ.ND ) THEN
414 NRP1 = NR
415 ELSE
416 NRP1 = NR + 1
417 END IF
418 NLF = IC - NL
419 NRF = IC + 1
420 *
421 * Since B and BX are complex, the following call to SGEMM is
422 * performed in two steps (real and imaginary parts).
423 *
424 * CALL SGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU,
425 * $ B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX )
426 *
427 J = NLP1*NRHS*2
428 DO 210 JCOL = 1, NRHS
429 DO 200 JROW = NLF, NLF + NLP1 - 1
430 J = J + 1
431 RWORK( J ) = REAL( B( JROW, JCOL ) )
432 200 CONTINUE
433 210 CONTINUE
434 CALL SGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU,
435 $ RWORK( 1+NLP1*NRHS*2 ), NLP1, ZERO, RWORK( 1 ),
436 $ NLP1 )
437 J = NLP1*NRHS*2
438 DO 230 JCOL = 1, NRHS
439 DO 220 JROW = NLF, NLF + NLP1 - 1
440 J = J + 1
441 RWORK( J ) = AIMAG( B( JROW, JCOL ) )
442 220 CONTINUE
443 230 CONTINUE
444 CALL SGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU,
445 $ RWORK( 1+NLP1*NRHS*2 ), NLP1, ZERO,
446 $ RWORK( 1+NLP1*NRHS ), NLP1 )
447 JREAL = 0
448 JIMAG = NLP1*NRHS
449 DO 250 JCOL = 1, NRHS
450 DO 240 JROW = NLF, NLF + NLP1 - 1
451 JREAL = JREAL + 1
452 JIMAG = JIMAG + 1
453 BX( JROW, JCOL ) = CMPLX( RWORK( JREAL ),
454 $ RWORK( JIMAG ) )
455 240 CONTINUE
456 250 CONTINUE
457 *
458 * Since B and BX are complex, the following call to SGEMM is
459 * performed in two steps (real and imaginary parts).
460 *
461 * CALL SGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU,
462 * $ B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX )
463 *
464 J = NRP1*NRHS*2
465 DO 270 JCOL = 1, NRHS
466 DO 260 JROW = NRF, NRF + NRP1 - 1
467 J = J + 1
468 RWORK( J ) = REAL( B( JROW, JCOL ) )
469 260 CONTINUE
470 270 CONTINUE
471 CALL SGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU,
472 $ RWORK( 1+NRP1*NRHS*2 ), NRP1, ZERO, RWORK( 1 ),
473 $ NRP1 )
474 J = NRP1*NRHS*2
475 DO 290 JCOL = 1, NRHS
476 DO 280 JROW = NRF, NRF + NRP1 - 1
477 J = J + 1
478 RWORK( J ) = AIMAG( B( JROW, JCOL ) )
479 280 CONTINUE
480 290 CONTINUE
481 CALL SGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU,
482 $ RWORK( 1+NRP1*NRHS*2 ), NRP1, ZERO,
483 $ RWORK( 1+NRP1*NRHS ), NRP1 )
484 JREAL = 0
485 JIMAG = NRP1*NRHS
486 DO 310 JCOL = 1, NRHS
487 DO 300 JROW = NRF, NRF + NRP1 - 1
488 JREAL = JREAL + 1
489 JIMAG = JIMAG + 1
490 BX( JROW, JCOL ) = CMPLX( RWORK( JREAL ),
491 $ RWORK( JIMAG ) )
492 300 CONTINUE
493 310 CONTINUE
494 *
495 320 CONTINUE
496 *
497 330 CONTINUE
498 *
499 RETURN
500 *
501 * End of CLALSA
502 *
503 END