Mercurial > octave
comparison libcruft/lapack/sgehrd.f @ 7789:82be108cc558
First attempt at single precision tyeps
* * *
corrections to qrupdate single precision routines
* * *
prefer demotion to single over promotion to double
* * *
Add single precision support to log2 function
* * *
Trivial PROJECT file update
* * *
Cache optimized hermitian/transpose methods
* * *
Add tests for tranpose/hermitian and ChangeLog entry for new transpose code
author | David Bateman <dbateman@free.fr> |
---|---|
date | Sun, 27 Apr 2008 22:34:17 +0200 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
7788:45f5faba05a2 | 7789:82be108cc558 |
---|---|
1 SUBROUTINE SGEHRD( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO ) | |
2 * | |
3 * -- LAPACK routine (version 3.1) -- | |
4 * Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. | |
5 * November 2006 | |
6 * | |
7 * .. Scalar Arguments .. | |
8 INTEGER IHI, ILO, INFO, LDA, LWORK, N | |
9 * .. | |
10 * .. Array Arguments .. | |
11 REAL A( LDA, * ), TAU( * ), WORK( * ) | |
12 * .. | |
13 * | |
14 * Purpose | |
15 * ======= | |
16 * | |
17 * SGEHRD reduces a real general matrix A to upper Hessenberg form H by | |
18 * an orthogonal similarity transformation: Q' * A * Q = H . | |
19 * | |
20 * Arguments | |
21 * ========= | |
22 * | |
23 * N (input) INTEGER | |
24 * The order of the matrix A. N >= 0. | |
25 * | |
26 * ILO (input) INTEGER | |
27 * IHI (input) INTEGER | |
28 * It is assumed that A is already upper triangular in rows | |
29 * and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally | |
30 * set by a previous call to SGEBAL; otherwise they should be | |
31 * set to 1 and N respectively. See Further Details. | |
32 * 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. | |
33 * | |
34 * A (input/output) REAL array, dimension (LDA,N) | |
35 * On entry, the N-by-N general matrix to be reduced. | |
36 * On exit, the upper triangle and the first subdiagonal of A | |
37 * are overwritten with the upper Hessenberg matrix H, and the | |
38 * elements below the first subdiagonal, with the array TAU, | |
39 * represent the orthogonal matrix Q as a product of elementary | |
40 * reflectors. See Further Details. | |
41 * | |
42 * LDA (input) INTEGER | |
43 * The leading dimension of the array A. LDA >= max(1,N). | |
44 * | |
45 * TAU (output) REAL array, dimension (N-1) | |
46 * The scalar factors of the elementary reflectors (see Further | |
47 * Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to | |
48 * zero. | |
49 * | |
50 * WORK (workspace/output) REAL array, dimension (LWORK) | |
51 * On exit, if INFO = 0, WORK(1) returns the optimal LWORK. | |
52 * | |
53 * LWORK (input) INTEGER | |
54 * The length of the array WORK. LWORK >= max(1,N). | |
55 * For optimum performance LWORK >= N*NB, where NB is the | |
56 * optimal blocksize. | |
57 * | |
58 * If LWORK = -1, then a workspace query is assumed; the routine | |
59 * only calculates the optimal size of the WORK array, returns | |
60 * this value as the first entry of the WORK array, and no error | |
61 * message related to LWORK is issued by XERBLA. | |
62 * | |
63 * INFO (output) INTEGER | |
64 * = 0: successful exit | |
65 * < 0: if INFO = -i, the i-th argument had an illegal value. | |
66 * | |
67 * Further Details | |
68 * =============== | |
69 * | |
70 * The matrix Q is represented as a product of (ihi-ilo) elementary | |
71 * reflectors | |
72 * | |
73 * Q = H(ilo) H(ilo+1) . . . H(ihi-1). | |
74 * | |
75 * Each H(i) has the form | |
76 * | |
77 * H(i) = I - tau * v * v' | |
78 * | |
79 * where tau is a real scalar, and v is a real vector with | |
80 * v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on | |
81 * exit in A(i+2:ihi,i), and tau in TAU(i). | |
82 * | |
83 * The contents of A are illustrated by the following example, with | |
84 * n = 7, ilo = 2 and ihi = 6: | |
85 * | |
86 * on entry, on exit, | |
87 * | |
88 * ( a a a a a a a ) ( a a h h h h a ) | |
89 * ( a a a a a a ) ( a h h h h a ) | |
90 * ( a a a a a a ) ( h h h h h h ) | |
91 * ( a a a a a a ) ( v2 h h h h h ) | |
92 * ( a a a a a a ) ( v2 v3 h h h h ) | |
93 * ( a a a a a a ) ( v2 v3 v4 h h h ) | |
94 * ( a ) ( a ) | |
95 * | |
96 * where a denotes an element of the original matrix A, h denotes a | |
97 * modified element of the upper Hessenberg matrix H, and vi denotes an | |
98 * element of the vector defining H(i). | |
99 * | |
100 * This file is a slight modification of LAPACK-3.0's SGEHRD | |
101 * subroutine incorporating improvements proposed by Quintana-Orti and | |
102 * Van de Geijn (2005). | |
103 * | |
104 * ===================================================================== | |
105 * | |
106 * .. Parameters .. | |
107 INTEGER NBMAX, LDT | |
108 PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) | |
109 REAL ZERO, ONE | |
110 PARAMETER ( ZERO = 0.0E+0, | |
111 $ ONE = 1.0E+0 ) | |
112 * .. | |
113 * .. Local Scalars .. | |
114 LOGICAL LQUERY | |
115 INTEGER I, IB, IINFO, IWS, J, LDWORK, LWKOPT, NB, | |
116 $ NBMIN, NH, NX | |
117 REAL EI | |
118 * .. | |
119 * .. Local Arrays .. | |
120 REAL T( LDT, NBMAX ) | |
121 * .. | |
122 * .. External Subroutines .. | |
123 EXTERNAL SAXPY, SGEHD2, SGEMM, SLAHR2, SLARFB, STRMM, | |
124 $ XERBLA | |
125 * .. | |
126 * .. Intrinsic Functions .. | |
127 INTRINSIC MAX, MIN | |
128 * .. | |
129 * .. External Functions .. | |
130 INTEGER ILAENV | |
131 EXTERNAL ILAENV | |
132 * .. | |
133 * .. Executable Statements .. | |
134 * | |
135 * Test the input parameters | |
136 * | |
137 INFO = 0 | |
138 NB = MIN( NBMAX, ILAENV( 1, 'SGEHRD', ' ', N, ILO, IHI, -1 ) ) | |
139 LWKOPT = N*NB | |
140 WORK( 1 ) = LWKOPT | |
141 LQUERY = ( LWORK.EQ.-1 ) | |
142 IF( N.LT.0 ) THEN | |
143 INFO = -1 | |
144 ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN | |
145 INFO = -2 | |
146 ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN | |
147 INFO = -3 | |
148 ELSE IF( LDA.LT.MAX( 1, N ) ) THEN | |
149 INFO = -5 | |
150 ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN | |
151 INFO = -8 | |
152 END IF | |
153 IF( INFO.NE.0 ) THEN | |
154 CALL XERBLA( 'SGEHRD', -INFO ) | |
155 RETURN | |
156 ELSE IF( LQUERY ) THEN | |
157 RETURN | |
158 END IF | |
159 * | |
160 * Set elements 1:ILO-1 and IHI:N-1 of TAU to zero | |
161 * | |
162 DO 10 I = 1, ILO - 1 | |
163 TAU( I ) = ZERO | |
164 10 CONTINUE | |
165 DO 20 I = MAX( 1, IHI ), N - 1 | |
166 TAU( I ) = ZERO | |
167 20 CONTINUE | |
168 * | |
169 * Quick return if possible | |
170 * | |
171 NH = IHI - ILO + 1 | |
172 IF( NH.LE.1 ) THEN | |
173 WORK( 1 ) = 1 | |
174 RETURN | |
175 END IF | |
176 * | |
177 * Determine the block size | |
178 * | |
179 NB = MIN( NBMAX, ILAENV( 1, 'SGEHRD', ' ', N, ILO, IHI, -1 ) ) | |
180 NBMIN = 2 | |
181 IWS = 1 | |
182 IF( NB.GT.1 .AND. NB.LT.NH ) THEN | |
183 * | |
184 * Determine when to cross over from blocked to unblocked code | |
185 * (last block is always handled by unblocked code) | |
186 * | |
187 NX = MAX( NB, ILAENV( 3, 'SGEHRD', ' ', N, ILO, IHI, -1 ) ) | |
188 IF( NX.LT.NH ) THEN | |
189 * | |
190 * Determine if workspace is large enough for blocked code | |
191 * | |
192 IWS = N*NB | |
193 IF( LWORK.LT.IWS ) THEN | |
194 * | |
195 * Not enough workspace to use optimal NB: determine the | |
196 * minimum value of NB, and reduce NB or force use of | |
197 * unblocked code | |
198 * | |
199 NBMIN = MAX( 2, ILAENV( 2, 'SGEHRD', ' ', N, ILO, IHI, | |
200 $ -1 ) ) | |
201 IF( LWORK.GE.N*NBMIN ) THEN | |
202 NB = LWORK / N | |
203 ELSE | |
204 NB = 1 | |
205 END IF | |
206 END IF | |
207 END IF | |
208 END IF | |
209 LDWORK = N | |
210 * | |
211 IF( NB.LT.NBMIN .OR. NB.GE.NH ) THEN | |
212 * | |
213 * Use unblocked code below | |
214 * | |
215 I = ILO | |
216 * | |
217 ELSE | |
218 * | |
219 * Use blocked code | |
220 * | |
221 DO 40 I = ILO, IHI - 1 - NX, NB | |
222 IB = MIN( NB, IHI-I ) | |
223 * | |
224 * Reduce columns i:i+ib-1 to Hessenberg form, returning the | |
225 * matrices V and T of the block reflector H = I - V*T*V' | |
226 * which performs the reduction, and also the matrix Y = A*V*T | |
227 * | |
228 CALL SLAHR2( IHI, I, IB, A( 1, I ), LDA, TAU( I ), T, LDT, | |
229 $ WORK, LDWORK ) | |
230 * | |
231 * Apply the block reflector H to A(1:ihi,i+ib:ihi) from the | |
232 * right, computing A := A - Y * V'. V(i+ib,ib-1) must be set | |
233 * to 1 | |
234 * | |
235 EI = A( I+IB, I+IB-1 ) | |
236 A( I+IB, I+IB-1 ) = ONE | |
237 CALL SGEMM( 'No transpose', 'Transpose', | |
238 $ IHI, IHI-I-IB+1, | |
239 $ IB, -ONE, WORK, LDWORK, A( I+IB, I ), LDA, ONE, | |
240 $ A( 1, I+IB ), LDA ) | |
241 A( I+IB, I+IB-1 ) = EI | |
242 * | |
243 * Apply the block reflector H to A(1:i,i+1:i+ib-1) from the | |
244 * right | |
245 * | |
246 CALL STRMM( 'Right', 'Lower', 'Transpose', | |
247 $ 'Unit', I, IB-1, | |
248 $ ONE, A( I+1, I ), LDA, WORK, LDWORK ) | |
249 DO 30 J = 0, IB-2 | |
250 CALL SAXPY( I, -ONE, WORK( LDWORK*J+1 ), 1, | |
251 $ A( 1, I+J+1 ), 1 ) | |
252 30 CONTINUE | |
253 * | |
254 * Apply the block reflector H to A(i+1:ihi,i+ib:n) from the | |
255 * left | |
256 * | |
257 CALL SLARFB( 'Left', 'Transpose', 'Forward', | |
258 $ 'Columnwise', | |
259 $ IHI-I, N-I-IB+1, IB, A( I+1, I ), LDA, T, LDT, | |
260 $ A( I+1, I+IB ), LDA, WORK, LDWORK ) | |
261 40 CONTINUE | |
262 END IF | |
263 * | |
264 * Use unblocked code to reduce the rest of the matrix | |
265 * | |
266 CALL SGEHD2( N, I, IHI, A, LDA, TAU, WORK, IINFO ) | |
267 WORK( 1 ) = IWS | |
268 * | |
269 RETURN | |
270 * | |
271 * End of SGEHRD | |
272 * | |
273 END |