comparison liboctave/fEIG.cc @ 7789:82be108cc558

First attempt at single precision tyeps * * * corrections to qrupdate single precision routines * * * prefer demotion to single over promotion to double * * * Add single precision support to log2 function * * * Trivial PROJECT file update * * * Cache optimized hermitian/transpose methods * * * Add tests for tranpose/hermitian and ChangeLog entry for new transpose code
author David Bateman <dbateman@free.fr>
date Sun, 27 Apr 2008 22:34:17 +0200
parents
children 18c4ded8612a
comparison
equal deleted inserted replaced
7788:45f5faba05a2 7789:82be108cc558
1 /*
2
3 Copyright (C) 1994, 1995, 1996, 1997, 1998, 2000, 2002, 2003, 2004,
4 2005, 2006, 2007 John W. Eaton
5
6 This file is part of Octave.
7
8 Octave is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12
13 Octave is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with Octave; see the file COPYING. If not, see
20 <http://www.gnu.org/licenses/>.
21
22 */
23
24 #ifdef HAVE_CONFIG_H
25 #include <config.h>
26 #endif
27
28 #include "fEIG.h"
29 #include "fColVector.h"
30 #include "f77-fcn.h"
31 #include "lo-error.h"
32
33 extern "C"
34 {
35 F77_RET_T
36 F77_FUNC (sgeev, SGEEV) (F77_CONST_CHAR_ARG_DECL,
37 F77_CONST_CHAR_ARG_DECL,
38 const octave_idx_type&, float*, const octave_idx_type&, float*,
39 float*, float*, const octave_idx_type&, float*,
40 const octave_idx_type&, float*, const octave_idx_type&, octave_idx_type&
41 F77_CHAR_ARG_LEN_DECL
42 F77_CHAR_ARG_LEN_DECL);
43
44 F77_RET_T
45 F77_FUNC (cgeev, CGEEV) (F77_CONST_CHAR_ARG_DECL,
46 F77_CONST_CHAR_ARG_DECL,
47 const octave_idx_type&, FloatComplex*, const octave_idx_type&, FloatComplex*,
48 FloatComplex*, const octave_idx_type&, FloatComplex*, const octave_idx_type&,
49 FloatComplex*, const octave_idx_type&, float*, octave_idx_type&
50 F77_CHAR_ARG_LEN_DECL
51 F77_CHAR_ARG_LEN_DECL);
52
53 F77_RET_T
54 F77_FUNC (ssyev, SSYEV) (F77_CONST_CHAR_ARG_DECL,
55 F77_CONST_CHAR_ARG_DECL,
56 const octave_idx_type&, float*, const octave_idx_type&, float*,
57 float*, const octave_idx_type&, octave_idx_type&
58 F77_CHAR_ARG_LEN_DECL
59 F77_CHAR_ARG_LEN_DECL);
60
61 F77_RET_T
62 F77_FUNC (cheev, CHEEV) (F77_CONST_CHAR_ARG_DECL,
63 F77_CONST_CHAR_ARG_DECL,
64 const octave_idx_type&, FloatComplex*, const octave_idx_type&, float*,
65 FloatComplex*, const octave_idx_type&, float*, octave_idx_type&
66 F77_CHAR_ARG_LEN_DECL
67 F77_CHAR_ARG_LEN_DECL);
68 }
69
70 octave_idx_type
71 FloatEIG::init (const FloatMatrix& a, bool calc_ev)
72 {
73 if (a.any_element_is_inf_or_nan ())
74 {
75 (*current_liboctave_error_handler)
76 ("EIG: matrix contains Inf or NaN values");
77 return -1;
78 }
79
80 if (a.is_symmetric ())
81 return symmetric_init (a, calc_ev);
82
83 octave_idx_type n = a.rows ();
84
85 if (n != a.cols ())
86 {
87 (*current_liboctave_error_handler) ("EIG requires square matrix");
88 return -1;
89 }
90
91 octave_idx_type info = 0;
92
93 FloatMatrix atmp = a;
94 float *tmp_data = atmp.fortran_vec ();
95
96 Array<float> wr (n);
97 float *pwr = wr.fortran_vec ();
98
99 Array<float> wi (n);
100 float *pwi = wi.fortran_vec ();
101
102 volatile octave_idx_type nvr = calc_ev ? n : 0;
103 FloatMatrix vr (nvr, nvr);
104 float *pvr = vr.fortran_vec ();
105
106 octave_idx_type lwork = -1;
107 float dummy_work;
108
109 float *dummy = 0;
110 octave_idx_type idummy = 1;
111
112 F77_XFCN (sgeev, SGEEV, (F77_CONST_CHAR_ARG2 ("N", 1),
113 F77_CONST_CHAR_ARG2 (calc_ev ? "V" : "N", 1),
114 n, tmp_data, n, pwr, pwi, dummy,
115 idummy, pvr, n, &dummy_work, lwork, info
116 F77_CHAR_ARG_LEN (1)
117 F77_CHAR_ARG_LEN (1)));
118
119 if (info == 0)
120 {
121 lwork = static_cast<octave_idx_type> (dummy_work);
122 Array<float> work (lwork);
123 float *pwork = work.fortran_vec ();
124
125 F77_XFCN (sgeev, SGEEV, (F77_CONST_CHAR_ARG2 ("N", 1),
126 F77_CONST_CHAR_ARG2 (calc_ev ? "V" : "N", 1),
127 n, tmp_data, n, pwr, pwi, dummy,
128 idummy, pvr, n, pwork, lwork, info
129 F77_CHAR_ARG_LEN (1)
130 F77_CHAR_ARG_LEN (1)));
131
132 if (info < 0)
133 {
134 (*current_liboctave_error_handler) ("unrecoverable error in sgeev");
135 return info;
136 }
137
138 if (info > 0)
139 {
140 (*current_liboctave_error_handler) ("sgeev failed to converge");
141 return info;
142 }
143
144 lambda.resize (n);
145 v.resize (nvr, nvr);
146
147 for (octave_idx_type j = 0; j < n; j++)
148 {
149 if (wi.elem (j) == 0.0)
150 {
151 lambda.elem (j) = FloatComplex (wr.elem (j));
152 for (octave_idx_type i = 0; i < nvr; i++)
153 v.elem (i, j) = vr.elem (i, j);
154 }
155 else
156 {
157 if (j+1 >= n)
158 {
159 (*current_liboctave_error_handler) ("EIG: internal error");
160 return -1;
161 }
162
163 lambda.elem(j) = FloatComplex (wr.elem(j), wi.elem(j));
164 lambda.elem(j+1) = FloatComplex (wr.elem(j+1), wi.elem(j+1));
165
166 for (octave_idx_type i = 0; i < nvr; i++)
167 {
168 float real_part = vr.elem (i, j);
169 float imag_part = vr.elem (i, j+1);
170 v.elem (i, j) = FloatComplex (real_part, imag_part);
171 v.elem (i, j+1) = FloatComplex (real_part, -imag_part);
172 }
173 j++;
174 }
175 }
176 }
177 else
178 (*current_liboctave_error_handler) ("sgeev workspace query failed");
179
180 return info;
181 }
182
183 octave_idx_type
184 FloatEIG::symmetric_init (const FloatMatrix& a, bool calc_ev)
185 {
186 octave_idx_type n = a.rows ();
187
188 if (n != a.cols ())
189 {
190 (*current_liboctave_error_handler) ("EIG requires square matrix");
191 return -1;
192 }
193
194 octave_idx_type info = 0;
195
196 FloatMatrix atmp = a;
197 float *tmp_data = atmp.fortran_vec ();
198
199 FloatColumnVector wr (n);
200 float *pwr = wr.fortran_vec ();
201
202 octave_idx_type lwork = -1;
203 float dummy_work;
204
205 F77_XFCN (ssyev, SSYEV, (F77_CONST_CHAR_ARG2 (calc_ev ? "V" : "N", 1),
206 F77_CONST_CHAR_ARG2 ("U", 1),
207 n, tmp_data, n, pwr, &dummy_work, lwork, info
208 F77_CHAR_ARG_LEN (1)
209 F77_CHAR_ARG_LEN (1)));
210
211 if (info == 0)
212 {
213 lwork = static_cast<octave_idx_type> (dummy_work);
214 Array<float> work (lwork);
215 float *pwork = work.fortran_vec ();
216
217 F77_XFCN (ssyev, SSYEV, (F77_CONST_CHAR_ARG2 (calc_ev ? "V" : "N", 1),
218 F77_CONST_CHAR_ARG2 ("U", 1),
219 n, tmp_data, n, pwr, pwork, lwork, info
220 F77_CHAR_ARG_LEN (1)
221 F77_CHAR_ARG_LEN (1)));
222
223 if (info < 0)
224 {
225 (*current_liboctave_error_handler) ("unrecoverable error in ssyev");
226 return info;
227 }
228
229 if (info > 0)
230 {
231 (*current_liboctave_error_handler) ("ssyev failed to converge");
232 return info;
233 }
234
235 lambda = FloatComplexColumnVector (wr);
236 v = calc_ev ? FloatComplexMatrix (atmp) : FloatComplexMatrix ();
237 }
238 else
239 (*current_liboctave_error_handler) ("ssyev workspace query failed");
240
241 return info;
242 }
243
244 octave_idx_type
245 FloatEIG::init (const FloatComplexMatrix& a, bool calc_ev)
246 {
247 if (a.any_element_is_inf_or_nan ())
248 {
249 (*current_liboctave_error_handler)
250 ("EIG: matrix contains Inf or NaN values");
251 return -1;
252 }
253
254 if (a.is_hermitian ())
255 return hermitian_init (a, calc_ev);
256
257 octave_idx_type n = a.rows ();
258
259 if (n != a.cols ())
260 {
261 (*current_liboctave_error_handler) ("EIG requires square matrix");
262 return -1;
263 }
264
265 octave_idx_type info = 0;
266
267 FloatComplexMatrix atmp = a;
268 FloatComplex *tmp_data = atmp.fortran_vec ();
269
270 FloatComplexColumnVector w (n);
271 FloatComplex *pw = w.fortran_vec ();
272
273 octave_idx_type nvr = calc_ev ? n : 0;
274 FloatComplexMatrix vtmp (nvr, nvr);
275 FloatComplex *pv = vtmp.fortran_vec ();
276
277 octave_idx_type lwork = -1;
278 FloatComplex dummy_work;
279
280 octave_idx_type lrwork = 2*n;
281 Array<float> rwork (lrwork);
282 float *prwork = rwork.fortran_vec ();
283
284 FloatComplex *dummy = 0;
285 octave_idx_type idummy = 1;
286
287 F77_XFCN (cgeev, CGEEV, (F77_CONST_CHAR_ARG2 ("N", 1),
288 F77_CONST_CHAR_ARG2 (calc_ev ? "V" : "N", 1),
289 n, tmp_data, n, pw, dummy, idummy,
290 pv, n, &dummy_work, lwork, prwork, info
291 F77_CHAR_ARG_LEN (1)
292 F77_CHAR_ARG_LEN (1)));
293
294 if (info == 0)
295 {
296 lwork = static_cast<octave_idx_type> (dummy_work.real ());
297 Array<FloatComplex> work (lwork);
298 FloatComplex *pwork = work.fortran_vec ();
299
300 F77_XFCN (cgeev, CGEEV, (F77_CONST_CHAR_ARG2 ("N", 1),
301 F77_CONST_CHAR_ARG2 (calc_ev ? "V" : "N", 1),
302 n, tmp_data, n, pw, dummy, idummy,
303 pv, n, pwork, lwork, prwork, info
304 F77_CHAR_ARG_LEN (1)
305 F77_CHAR_ARG_LEN (1)));
306
307 if (info < 0)
308 {
309 (*current_liboctave_error_handler) ("unrecoverable error in cgeev");
310 return info;
311 }
312
313 if (info > 0)
314 {
315 (*current_liboctave_error_handler) ("cgeev failed to converge");
316 return info;
317 }
318
319 lambda = w;
320 v = vtmp;
321 }
322 else
323 (*current_liboctave_error_handler) ("cgeev workspace query failed");
324
325 return info;
326 }
327
328 octave_idx_type
329 FloatEIG::hermitian_init (const FloatComplexMatrix& a, bool calc_ev)
330 {
331 octave_idx_type n = a.rows ();
332
333 if (n != a.cols ())
334 {
335 (*current_liboctave_error_handler) ("EIG requires square matrix");
336 return -1;
337 }
338
339 octave_idx_type info = 0;
340
341 FloatComplexMatrix atmp = a;
342 FloatComplex *tmp_data = atmp.fortran_vec ();
343
344 FloatColumnVector wr (n);
345 float *pwr = wr.fortran_vec ();
346
347 octave_idx_type lwork = -1;
348 FloatComplex dummy_work;
349
350 octave_idx_type lrwork = 3*n;
351 Array<float> rwork (lrwork);
352 float *prwork = rwork.fortran_vec ();
353
354 F77_XFCN (cheev, CHEEV, (F77_CONST_CHAR_ARG2 (calc_ev ? "V" : "N", 1),
355 F77_CONST_CHAR_ARG2 ("U", 1),
356 n, tmp_data, n, pwr, &dummy_work, lwork,
357 prwork, info
358 F77_CHAR_ARG_LEN (1)
359 F77_CHAR_ARG_LEN (1)));
360
361 if (info == 0)
362 {
363 lwork = static_cast<octave_idx_type> (dummy_work.real ());
364 Array<FloatComplex> work (lwork);
365 FloatComplex *pwork = work.fortran_vec ();
366
367 F77_XFCN (cheev, CHEEV, (F77_CONST_CHAR_ARG2 (calc_ev ? "V" : "N", 1),
368 F77_CONST_CHAR_ARG2 ("U", 1),
369 n, tmp_data, n, pwr, pwork, lwork, prwork, info
370 F77_CHAR_ARG_LEN (1)
371 F77_CHAR_ARG_LEN (1)));
372
373 if (info < 0)
374 {
375 (*current_liboctave_error_handler) ("unrecoverable error in cheev");
376 return info;
377 }
378
379 if (info > 0)
380 {
381 (*current_liboctave_error_handler) ("cheev failed to converge");
382 return info;
383 }
384
385 lambda = FloatComplexColumnVector (wr);
386 v = calc_ev ? FloatComplexMatrix (atmp) : FloatComplexMatrix ();
387 }
388 else
389 (*current_liboctave_error_handler) ("cheev workspace query failed");
390
391 return info;
392 }
393
394 /*
395 ;;; Local Variables: ***
396 ;;; mode: C++ ***
397 ;;; End: ***
398 */