comparison libcruft/lapack/sgehrd.f @ 7789:82be108cc558

First attempt at single precision tyeps * * * corrections to qrupdate single precision routines * * * prefer demotion to single over promotion to double * * * Add single precision support to log2 function * * * Trivial PROJECT file update * * * Cache optimized hermitian/transpose methods * * * Add tests for tranpose/hermitian and ChangeLog entry for new transpose code
author David Bateman <dbateman@free.fr>
date Sun, 27 Apr 2008 22:34:17 +0200
parents
children
comparison
equal deleted inserted replaced
7788:45f5faba05a2 7789:82be108cc558
1 SUBROUTINE SGEHRD( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO )
2 *
3 * -- LAPACK routine (version 3.1) --
4 * Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
5 * November 2006
6 *
7 * .. Scalar Arguments ..
8 INTEGER IHI, ILO, INFO, LDA, LWORK, N
9 * ..
10 * .. Array Arguments ..
11 REAL A( LDA, * ), TAU( * ), WORK( * )
12 * ..
13 *
14 * Purpose
15 * =======
16 *
17 * SGEHRD reduces a real general matrix A to upper Hessenberg form H by
18 * an orthogonal similarity transformation: Q' * A * Q = H .
19 *
20 * Arguments
21 * =========
22 *
23 * N (input) INTEGER
24 * The order of the matrix A. N >= 0.
25 *
26 * ILO (input) INTEGER
27 * IHI (input) INTEGER
28 * It is assumed that A is already upper triangular in rows
29 * and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
30 * set by a previous call to SGEBAL; otherwise they should be
31 * set to 1 and N respectively. See Further Details.
32 * 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
33 *
34 * A (input/output) REAL array, dimension (LDA,N)
35 * On entry, the N-by-N general matrix to be reduced.
36 * On exit, the upper triangle and the first subdiagonal of A
37 * are overwritten with the upper Hessenberg matrix H, and the
38 * elements below the first subdiagonal, with the array TAU,
39 * represent the orthogonal matrix Q as a product of elementary
40 * reflectors. See Further Details.
41 *
42 * LDA (input) INTEGER
43 * The leading dimension of the array A. LDA >= max(1,N).
44 *
45 * TAU (output) REAL array, dimension (N-1)
46 * The scalar factors of the elementary reflectors (see Further
47 * Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to
48 * zero.
49 *
50 * WORK (workspace/output) REAL array, dimension (LWORK)
51 * On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
52 *
53 * LWORK (input) INTEGER
54 * The length of the array WORK. LWORK >= max(1,N).
55 * For optimum performance LWORK >= N*NB, where NB is the
56 * optimal blocksize.
57 *
58 * If LWORK = -1, then a workspace query is assumed; the routine
59 * only calculates the optimal size of the WORK array, returns
60 * this value as the first entry of the WORK array, and no error
61 * message related to LWORK is issued by XERBLA.
62 *
63 * INFO (output) INTEGER
64 * = 0: successful exit
65 * < 0: if INFO = -i, the i-th argument had an illegal value.
66 *
67 * Further Details
68 * ===============
69 *
70 * The matrix Q is represented as a product of (ihi-ilo) elementary
71 * reflectors
72 *
73 * Q = H(ilo) H(ilo+1) . . . H(ihi-1).
74 *
75 * Each H(i) has the form
76 *
77 * H(i) = I - tau * v * v'
78 *
79 * where tau is a real scalar, and v is a real vector with
80 * v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on
81 * exit in A(i+2:ihi,i), and tau in TAU(i).
82 *
83 * The contents of A are illustrated by the following example, with
84 * n = 7, ilo = 2 and ihi = 6:
85 *
86 * on entry, on exit,
87 *
88 * ( a a a a a a a ) ( a a h h h h a )
89 * ( a a a a a a ) ( a h h h h a )
90 * ( a a a a a a ) ( h h h h h h )
91 * ( a a a a a a ) ( v2 h h h h h )
92 * ( a a a a a a ) ( v2 v3 h h h h )
93 * ( a a a a a a ) ( v2 v3 v4 h h h )
94 * ( a ) ( a )
95 *
96 * where a denotes an element of the original matrix A, h denotes a
97 * modified element of the upper Hessenberg matrix H, and vi denotes an
98 * element of the vector defining H(i).
99 *
100 * This file is a slight modification of LAPACK-3.0's SGEHRD
101 * subroutine incorporating improvements proposed by Quintana-Orti and
102 * Van de Geijn (2005).
103 *
104 * =====================================================================
105 *
106 * .. Parameters ..
107 INTEGER NBMAX, LDT
108 PARAMETER ( NBMAX = 64, LDT = NBMAX+1 )
109 REAL ZERO, ONE
110 PARAMETER ( ZERO = 0.0E+0,
111 $ ONE = 1.0E+0 )
112 * ..
113 * .. Local Scalars ..
114 LOGICAL LQUERY
115 INTEGER I, IB, IINFO, IWS, J, LDWORK, LWKOPT, NB,
116 $ NBMIN, NH, NX
117 REAL EI
118 * ..
119 * .. Local Arrays ..
120 REAL T( LDT, NBMAX )
121 * ..
122 * .. External Subroutines ..
123 EXTERNAL SAXPY, SGEHD2, SGEMM, SLAHR2, SLARFB, STRMM,
124 $ XERBLA
125 * ..
126 * .. Intrinsic Functions ..
127 INTRINSIC MAX, MIN
128 * ..
129 * .. External Functions ..
130 INTEGER ILAENV
131 EXTERNAL ILAENV
132 * ..
133 * .. Executable Statements ..
134 *
135 * Test the input parameters
136 *
137 INFO = 0
138 NB = MIN( NBMAX, ILAENV( 1, 'SGEHRD', ' ', N, ILO, IHI, -1 ) )
139 LWKOPT = N*NB
140 WORK( 1 ) = LWKOPT
141 LQUERY = ( LWORK.EQ.-1 )
142 IF( N.LT.0 ) THEN
143 INFO = -1
144 ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN
145 INFO = -2
146 ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN
147 INFO = -3
148 ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
149 INFO = -5
150 ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
151 INFO = -8
152 END IF
153 IF( INFO.NE.0 ) THEN
154 CALL XERBLA( 'SGEHRD', -INFO )
155 RETURN
156 ELSE IF( LQUERY ) THEN
157 RETURN
158 END IF
159 *
160 * Set elements 1:ILO-1 and IHI:N-1 of TAU to zero
161 *
162 DO 10 I = 1, ILO - 1
163 TAU( I ) = ZERO
164 10 CONTINUE
165 DO 20 I = MAX( 1, IHI ), N - 1
166 TAU( I ) = ZERO
167 20 CONTINUE
168 *
169 * Quick return if possible
170 *
171 NH = IHI - ILO + 1
172 IF( NH.LE.1 ) THEN
173 WORK( 1 ) = 1
174 RETURN
175 END IF
176 *
177 * Determine the block size
178 *
179 NB = MIN( NBMAX, ILAENV( 1, 'SGEHRD', ' ', N, ILO, IHI, -1 ) )
180 NBMIN = 2
181 IWS = 1
182 IF( NB.GT.1 .AND. NB.LT.NH ) THEN
183 *
184 * Determine when to cross over from blocked to unblocked code
185 * (last block is always handled by unblocked code)
186 *
187 NX = MAX( NB, ILAENV( 3, 'SGEHRD', ' ', N, ILO, IHI, -1 ) )
188 IF( NX.LT.NH ) THEN
189 *
190 * Determine if workspace is large enough for blocked code
191 *
192 IWS = N*NB
193 IF( LWORK.LT.IWS ) THEN
194 *
195 * Not enough workspace to use optimal NB: determine the
196 * minimum value of NB, and reduce NB or force use of
197 * unblocked code
198 *
199 NBMIN = MAX( 2, ILAENV( 2, 'SGEHRD', ' ', N, ILO, IHI,
200 $ -1 ) )
201 IF( LWORK.GE.N*NBMIN ) THEN
202 NB = LWORK / N
203 ELSE
204 NB = 1
205 END IF
206 END IF
207 END IF
208 END IF
209 LDWORK = N
210 *
211 IF( NB.LT.NBMIN .OR. NB.GE.NH ) THEN
212 *
213 * Use unblocked code below
214 *
215 I = ILO
216 *
217 ELSE
218 *
219 * Use blocked code
220 *
221 DO 40 I = ILO, IHI - 1 - NX, NB
222 IB = MIN( NB, IHI-I )
223 *
224 * Reduce columns i:i+ib-1 to Hessenberg form, returning the
225 * matrices V and T of the block reflector H = I - V*T*V'
226 * which performs the reduction, and also the matrix Y = A*V*T
227 *
228 CALL SLAHR2( IHI, I, IB, A( 1, I ), LDA, TAU( I ), T, LDT,
229 $ WORK, LDWORK )
230 *
231 * Apply the block reflector H to A(1:ihi,i+ib:ihi) from the
232 * right, computing A := A - Y * V'. V(i+ib,ib-1) must be set
233 * to 1
234 *
235 EI = A( I+IB, I+IB-1 )
236 A( I+IB, I+IB-1 ) = ONE
237 CALL SGEMM( 'No transpose', 'Transpose',
238 $ IHI, IHI-I-IB+1,
239 $ IB, -ONE, WORK, LDWORK, A( I+IB, I ), LDA, ONE,
240 $ A( 1, I+IB ), LDA )
241 A( I+IB, I+IB-1 ) = EI
242 *
243 * Apply the block reflector H to A(1:i,i+1:i+ib-1) from the
244 * right
245 *
246 CALL STRMM( 'Right', 'Lower', 'Transpose',
247 $ 'Unit', I, IB-1,
248 $ ONE, A( I+1, I ), LDA, WORK, LDWORK )
249 DO 30 J = 0, IB-2
250 CALL SAXPY( I, -ONE, WORK( LDWORK*J+1 ), 1,
251 $ A( 1, I+J+1 ), 1 )
252 30 CONTINUE
253 *
254 * Apply the block reflector H to A(i+1:ihi,i+ib:n) from the
255 * left
256 *
257 CALL SLARFB( 'Left', 'Transpose', 'Forward',
258 $ 'Columnwise',
259 $ IHI-I, N-I-IB+1, IB, A( I+1, I ), LDA, T, LDT,
260 $ A( I+1, I+IB ), LDA, WORK, LDWORK )
261 40 CONTINUE
262 END IF
263 *
264 * Use unblocked code to reduce the rest of the matrix
265 *
266 CALL SGEHD2( N, I, IHI, A, LDA, TAU, WORK, IINFO )
267 WORK( 1 ) = IWS
268 *
269 RETURN
270 *
271 * End of SGEHRD
272 *
273 END