Mercurial > octave-nkf
comparison liboctave/fEIG.cc @ 7789:82be108cc558
First attempt at single precision tyeps
* * *
corrections to qrupdate single precision routines
* * *
prefer demotion to single over promotion to double
* * *
Add single precision support to log2 function
* * *
Trivial PROJECT file update
* * *
Cache optimized hermitian/transpose methods
* * *
Add tests for tranpose/hermitian and ChangeLog entry for new transpose code
author | David Bateman <dbateman@free.fr> |
---|---|
date | Sun, 27 Apr 2008 22:34:17 +0200 |
parents | |
children | 18c4ded8612a |
comparison
equal
deleted
inserted
replaced
7788:45f5faba05a2 | 7789:82be108cc558 |
---|---|
1 /* | |
2 | |
3 Copyright (C) 1994, 1995, 1996, 1997, 1998, 2000, 2002, 2003, 2004, | |
4 2005, 2006, 2007 John W. Eaton | |
5 | |
6 This file is part of Octave. | |
7 | |
8 Octave is free software; you can redistribute it and/or modify it | |
9 under the terms of the GNU General Public License as published by the | |
10 Free Software Foundation; either version 3 of the License, or (at your | |
11 option) any later version. | |
12 | |
13 Octave is distributed in the hope that it will be useful, but WITHOUT | |
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
16 for more details. | |
17 | |
18 You should have received a copy of the GNU General Public License | |
19 along with Octave; see the file COPYING. If not, see | |
20 <http://www.gnu.org/licenses/>. | |
21 | |
22 */ | |
23 | |
24 #ifdef HAVE_CONFIG_H | |
25 #include <config.h> | |
26 #endif | |
27 | |
28 #include "fEIG.h" | |
29 #include "fColVector.h" | |
30 #include "f77-fcn.h" | |
31 #include "lo-error.h" | |
32 | |
33 extern "C" | |
34 { | |
35 F77_RET_T | |
36 F77_FUNC (sgeev, SGEEV) (F77_CONST_CHAR_ARG_DECL, | |
37 F77_CONST_CHAR_ARG_DECL, | |
38 const octave_idx_type&, float*, const octave_idx_type&, float*, | |
39 float*, float*, const octave_idx_type&, float*, | |
40 const octave_idx_type&, float*, const octave_idx_type&, octave_idx_type& | |
41 F77_CHAR_ARG_LEN_DECL | |
42 F77_CHAR_ARG_LEN_DECL); | |
43 | |
44 F77_RET_T | |
45 F77_FUNC (cgeev, CGEEV) (F77_CONST_CHAR_ARG_DECL, | |
46 F77_CONST_CHAR_ARG_DECL, | |
47 const octave_idx_type&, FloatComplex*, const octave_idx_type&, FloatComplex*, | |
48 FloatComplex*, const octave_idx_type&, FloatComplex*, const octave_idx_type&, | |
49 FloatComplex*, const octave_idx_type&, float*, octave_idx_type& | |
50 F77_CHAR_ARG_LEN_DECL | |
51 F77_CHAR_ARG_LEN_DECL); | |
52 | |
53 F77_RET_T | |
54 F77_FUNC (ssyev, SSYEV) (F77_CONST_CHAR_ARG_DECL, | |
55 F77_CONST_CHAR_ARG_DECL, | |
56 const octave_idx_type&, float*, const octave_idx_type&, float*, | |
57 float*, const octave_idx_type&, octave_idx_type& | |
58 F77_CHAR_ARG_LEN_DECL | |
59 F77_CHAR_ARG_LEN_DECL); | |
60 | |
61 F77_RET_T | |
62 F77_FUNC (cheev, CHEEV) (F77_CONST_CHAR_ARG_DECL, | |
63 F77_CONST_CHAR_ARG_DECL, | |
64 const octave_idx_type&, FloatComplex*, const octave_idx_type&, float*, | |
65 FloatComplex*, const octave_idx_type&, float*, octave_idx_type& | |
66 F77_CHAR_ARG_LEN_DECL | |
67 F77_CHAR_ARG_LEN_DECL); | |
68 } | |
69 | |
70 octave_idx_type | |
71 FloatEIG::init (const FloatMatrix& a, bool calc_ev) | |
72 { | |
73 if (a.any_element_is_inf_or_nan ()) | |
74 { | |
75 (*current_liboctave_error_handler) | |
76 ("EIG: matrix contains Inf or NaN values"); | |
77 return -1; | |
78 } | |
79 | |
80 if (a.is_symmetric ()) | |
81 return symmetric_init (a, calc_ev); | |
82 | |
83 octave_idx_type n = a.rows (); | |
84 | |
85 if (n != a.cols ()) | |
86 { | |
87 (*current_liboctave_error_handler) ("EIG requires square matrix"); | |
88 return -1; | |
89 } | |
90 | |
91 octave_idx_type info = 0; | |
92 | |
93 FloatMatrix atmp = a; | |
94 float *tmp_data = atmp.fortran_vec (); | |
95 | |
96 Array<float> wr (n); | |
97 float *pwr = wr.fortran_vec (); | |
98 | |
99 Array<float> wi (n); | |
100 float *pwi = wi.fortran_vec (); | |
101 | |
102 volatile octave_idx_type nvr = calc_ev ? n : 0; | |
103 FloatMatrix vr (nvr, nvr); | |
104 float *pvr = vr.fortran_vec (); | |
105 | |
106 octave_idx_type lwork = -1; | |
107 float dummy_work; | |
108 | |
109 float *dummy = 0; | |
110 octave_idx_type idummy = 1; | |
111 | |
112 F77_XFCN (sgeev, SGEEV, (F77_CONST_CHAR_ARG2 ("N", 1), | |
113 F77_CONST_CHAR_ARG2 (calc_ev ? "V" : "N", 1), | |
114 n, tmp_data, n, pwr, pwi, dummy, | |
115 idummy, pvr, n, &dummy_work, lwork, info | |
116 F77_CHAR_ARG_LEN (1) | |
117 F77_CHAR_ARG_LEN (1))); | |
118 | |
119 if (info == 0) | |
120 { | |
121 lwork = static_cast<octave_idx_type> (dummy_work); | |
122 Array<float> work (lwork); | |
123 float *pwork = work.fortran_vec (); | |
124 | |
125 F77_XFCN (sgeev, SGEEV, (F77_CONST_CHAR_ARG2 ("N", 1), | |
126 F77_CONST_CHAR_ARG2 (calc_ev ? "V" : "N", 1), | |
127 n, tmp_data, n, pwr, pwi, dummy, | |
128 idummy, pvr, n, pwork, lwork, info | |
129 F77_CHAR_ARG_LEN (1) | |
130 F77_CHAR_ARG_LEN (1))); | |
131 | |
132 if (info < 0) | |
133 { | |
134 (*current_liboctave_error_handler) ("unrecoverable error in sgeev"); | |
135 return info; | |
136 } | |
137 | |
138 if (info > 0) | |
139 { | |
140 (*current_liboctave_error_handler) ("sgeev failed to converge"); | |
141 return info; | |
142 } | |
143 | |
144 lambda.resize (n); | |
145 v.resize (nvr, nvr); | |
146 | |
147 for (octave_idx_type j = 0; j < n; j++) | |
148 { | |
149 if (wi.elem (j) == 0.0) | |
150 { | |
151 lambda.elem (j) = FloatComplex (wr.elem (j)); | |
152 for (octave_idx_type i = 0; i < nvr; i++) | |
153 v.elem (i, j) = vr.elem (i, j); | |
154 } | |
155 else | |
156 { | |
157 if (j+1 >= n) | |
158 { | |
159 (*current_liboctave_error_handler) ("EIG: internal error"); | |
160 return -1; | |
161 } | |
162 | |
163 lambda.elem(j) = FloatComplex (wr.elem(j), wi.elem(j)); | |
164 lambda.elem(j+1) = FloatComplex (wr.elem(j+1), wi.elem(j+1)); | |
165 | |
166 for (octave_idx_type i = 0; i < nvr; i++) | |
167 { | |
168 float real_part = vr.elem (i, j); | |
169 float imag_part = vr.elem (i, j+1); | |
170 v.elem (i, j) = FloatComplex (real_part, imag_part); | |
171 v.elem (i, j+1) = FloatComplex (real_part, -imag_part); | |
172 } | |
173 j++; | |
174 } | |
175 } | |
176 } | |
177 else | |
178 (*current_liboctave_error_handler) ("sgeev workspace query failed"); | |
179 | |
180 return info; | |
181 } | |
182 | |
183 octave_idx_type | |
184 FloatEIG::symmetric_init (const FloatMatrix& a, bool calc_ev) | |
185 { | |
186 octave_idx_type n = a.rows (); | |
187 | |
188 if (n != a.cols ()) | |
189 { | |
190 (*current_liboctave_error_handler) ("EIG requires square matrix"); | |
191 return -1; | |
192 } | |
193 | |
194 octave_idx_type info = 0; | |
195 | |
196 FloatMatrix atmp = a; | |
197 float *tmp_data = atmp.fortran_vec (); | |
198 | |
199 FloatColumnVector wr (n); | |
200 float *pwr = wr.fortran_vec (); | |
201 | |
202 octave_idx_type lwork = -1; | |
203 float dummy_work; | |
204 | |
205 F77_XFCN (ssyev, SSYEV, (F77_CONST_CHAR_ARG2 (calc_ev ? "V" : "N", 1), | |
206 F77_CONST_CHAR_ARG2 ("U", 1), | |
207 n, tmp_data, n, pwr, &dummy_work, lwork, info | |
208 F77_CHAR_ARG_LEN (1) | |
209 F77_CHAR_ARG_LEN (1))); | |
210 | |
211 if (info == 0) | |
212 { | |
213 lwork = static_cast<octave_idx_type> (dummy_work); | |
214 Array<float> work (lwork); | |
215 float *pwork = work.fortran_vec (); | |
216 | |
217 F77_XFCN (ssyev, SSYEV, (F77_CONST_CHAR_ARG2 (calc_ev ? "V" : "N", 1), | |
218 F77_CONST_CHAR_ARG2 ("U", 1), | |
219 n, tmp_data, n, pwr, pwork, lwork, info | |
220 F77_CHAR_ARG_LEN (1) | |
221 F77_CHAR_ARG_LEN (1))); | |
222 | |
223 if (info < 0) | |
224 { | |
225 (*current_liboctave_error_handler) ("unrecoverable error in ssyev"); | |
226 return info; | |
227 } | |
228 | |
229 if (info > 0) | |
230 { | |
231 (*current_liboctave_error_handler) ("ssyev failed to converge"); | |
232 return info; | |
233 } | |
234 | |
235 lambda = FloatComplexColumnVector (wr); | |
236 v = calc_ev ? FloatComplexMatrix (atmp) : FloatComplexMatrix (); | |
237 } | |
238 else | |
239 (*current_liboctave_error_handler) ("ssyev workspace query failed"); | |
240 | |
241 return info; | |
242 } | |
243 | |
244 octave_idx_type | |
245 FloatEIG::init (const FloatComplexMatrix& a, bool calc_ev) | |
246 { | |
247 if (a.any_element_is_inf_or_nan ()) | |
248 { | |
249 (*current_liboctave_error_handler) | |
250 ("EIG: matrix contains Inf or NaN values"); | |
251 return -1; | |
252 } | |
253 | |
254 if (a.is_hermitian ()) | |
255 return hermitian_init (a, calc_ev); | |
256 | |
257 octave_idx_type n = a.rows (); | |
258 | |
259 if (n != a.cols ()) | |
260 { | |
261 (*current_liboctave_error_handler) ("EIG requires square matrix"); | |
262 return -1; | |
263 } | |
264 | |
265 octave_idx_type info = 0; | |
266 | |
267 FloatComplexMatrix atmp = a; | |
268 FloatComplex *tmp_data = atmp.fortran_vec (); | |
269 | |
270 FloatComplexColumnVector w (n); | |
271 FloatComplex *pw = w.fortran_vec (); | |
272 | |
273 octave_idx_type nvr = calc_ev ? n : 0; | |
274 FloatComplexMatrix vtmp (nvr, nvr); | |
275 FloatComplex *pv = vtmp.fortran_vec (); | |
276 | |
277 octave_idx_type lwork = -1; | |
278 FloatComplex dummy_work; | |
279 | |
280 octave_idx_type lrwork = 2*n; | |
281 Array<float> rwork (lrwork); | |
282 float *prwork = rwork.fortran_vec (); | |
283 | |
284 FloatComplex *dummy = 0; | |
285 octave_idx_type idummy = 1; | |
286 | |
287 F77_XFCN (cgeev, CGEEV, (F77_CONST_CHAR_ARG2 ("N", 1), | |
288 F77_CONST_CHAR_ARG2 (calc_ev ? "V" : "N", 1), | |
289 n, tmp_data, n, pw, dummy, idummy, | |
290 pv, n, &dummy_work, lwork, prwork, info | |
291 F77_CHAR_ARG_LEN (1) | |
292 F77_CHAR_ARG_LEN (1))); | |
293 | |
294 if (info == 0) | |
295 { | |
296 lwork = static_cast<octave_idx_type> (dummy_work.real ()); | |
297 Array<FloatComplex> work (lwork); | |
298 FloatComplex *pwork = work.fortran_vec (); | |
299 | |
300 F77_XFCN (cgeev, CGEEV, (F77_CONST_CHAR_ARG2 ("N", 1), | |
301 F77_CONST_CHAR_ARG2 (calc_ev ? "V" : "N", 1), | |
302 n, tmp_data, n, pw, dummy, idummy, | |
303 pv, n, pwork, lwork, prwork, info | |
304 F77_CHAR_ARG_LEN (1) | |
305 F77_CHAR_ARG_LEN (1))); | |
306 | |
307 if (info < 0) | |
308 { | |
309 (*current_liboctave_error_handler) ("unrecoverable error in cgeev"); | |
310 return info; | |
311 } | |
312 | |
313 if (info > 0) | |
314 { | |
315 (*current_liboctave_error_handler) ("cgeev failed to converge"); | |
316 return info; | |
317 } | |
318 | |
319 lambda = w; | |
320 v = vtmp; | |
321 } | |
322 else | |
323 (*current_liboctave_error_handler) ("cgeev workspace query failed"); | |
324 | |
325 return info; | |
326 } | |
327 | |
328 octave_idx_type | |
329 FloatEIG::hermitian_init (const FloatComplexMatrix& a, bool calc_ev) | |
330 { | |
331 octave_idx_type n = a.rows (); | |
332 | |
333 if (n != a.cols ()) | |
334 { | |
335 (*current_liboctave_error_handler) ("EIG requires square matrix"); | |
336 return -1; | |
337 } | |
338 | |
339 octave_idx_type info = 0; | |
340 | |
341 FloatComplexMatrix atmp = a; | |
342 FloatComplex *tmp_data = atmp.fortran_vec (); | |
343 | |
344 FloatColumnVector wr (n); | |
345 float *pwr = wr.fortran_vec (); | |
346 | |
347 octave_idx_type lwork = -1; | |
348 FloatComplex dummy_work; | |
349 | |
350 octave_idx_type lrwork = 3*n; | |
351 Array<float> rwork (lrwork); | |
352 float *prwork = rwork.fortran_vec (); | |
353 | |
354 F77_XFCN (cheev, CHEEV, (F77_CONST_CHAR_ARG2 (calc_ev ? "V" : "N", 1), | |
355 F77_CONST_CHAR_ARG2 ("U", 1), | |
356 n, tmp_data, n, pwr, &dummy_work, lwork, | |
357 prwork, info | |
358 F77_CHAR_ARG_LEN (1) | |
359 F77_CHAR_ARG_LEN (1))); | |
360 | |
361 if (info == 0) | |
362 { | |
363 lwork = static_cast<octave_idx_type> (dummy_work.real ()); | |
364 Array<FloatComplex> work (lwork); | |
365 FloatComplex *pwork = work.fortran_vec (); | |
366 | |
367 F77_XFCN (cheev, CHEEV, (F77_CONST_CHAR_ARG2 (calc_ev ? "V" : "N", 1), | |
368 F77_CONST_CHAR_ARG2 ("U", 1), | |
369 n, tmp_data, n, pwr, pwork, lwork, prwork, info | |
370 F77_CHAR_ARG_LEN (1) | |
371 F77_CHAR_ARG_LEN (1))); | |
372 | |
373 if (info < 0) | |
374 { | |
375 (*current_liboctave_error_handler) ("unrecoverable error in cheev"); | |
376 return info; | |
377 } | |
378 | |
379 if (info > 0) | |
380 { | |
381 (*current_liboctave_error_handler) ("cheev failed to converge"); | |
382 return info; | |
383 } | |
384 | |
385 lambda = FloatComplexColumnVector (wr); | |
386 v = calc_ev ? FloatComplexMatrix (atmp) : FloatComplexMatrix (); | |
387 } | |
388 else | |
389 (*current_liboctave_error_handler) ("cheev workspace query failed"); | |
390 | |
391 return info; | |
392 } | |
393 | |
394 /* | |
395 ;;; Local Variables: *** | |
396 ;;; mode: C++ *** | |
397 ;;; End: *** | |
398 */ |