comparison src/DLD-FUNCTIONS/balance.cc @ 7789:82be108cc558

First attempt at single precision tyeps * * * corrections to qrupdate single precision routines * * * prefer demotion to single over promotion to double * * * Add single precision support to log2 function * * * Trivial PROJECT file update * * * Cache optimized hermitian/transpose methods * * * Add tests for tranpose/hermitian and ChangeLog entry for new transpose code
author David Bateman <dbateman@free.fr>
date Sun, 27 Apr 2008 22:34:17 +0200
parents 29980c6b8604
children a5e080076778
comparison
equal deleted inserted replaced
7788:45f5faba05a2 7789:82be108cc558
28 #endif 28 #endif
29 29
30 #include <string> 30 #include <string>
31 31
32 #include "CmplxAEPBAL.h" 32 #include "CmplxAEPBAL.h"
33 #include "CmplxAEPBAL.h" 33 #include "fCmplxAEPBAL.h"
34 #include "dbleAEPBAL.h" 34 #include "dbleAEPBAL.h"
35 #include "dbleAEPBAL.h" 35 #include "floatAEPBAL.h"
36 #include "CmplxGEPBAL.h"
37 #include "fCmplxGEPBAL.h"
38 #include "dbleGEPBAL.h"
39 #include "floatGEPBAL.h"
36 #include "quit.h" 40 #include "quit.h"
37 41
38 #include "defun-dld.h" 42 #include "defun-dld.h"
39 #include "error.h" 43 #include "error.h"
40 #include "f77-fcn.h" 44 #include "f77-fcn.h"
41 #include "gripes.h" 45 #include "gripes.h"
42 #include "oct-obj.h" 46 #include "oct-obj.h"
43 #include "utils.h" 47 #include "utils.h"
44 48
45 extern "C"
46 {
47 F77_RET_T
48 F77_FUNC (dggbal, DGGBAL) (F77_CONST_CHAR_ARG_DECL, const octave_idx_type& N,
49 double* A, const octave_idx_type& LDA, double* B,
50 const octave_idx_type& LDB, octave_idx_type& ILO, octave_idx_type& IHI,
51 double* LSCALE, double* RSCALE,
52 double* WORK, octave_idx_type& INFO
53 F77_CHAR_ARG_LEN_DECL);
54
55 F77_RET_T
56 F77_FUNC (dggbak, DGGBAK) (F77_CONST_CHAR_ARG_DECL,
57 F77_CONST_CHAR_ARG_DECL,
58 const octave_idx_type& N, const octave_idx_type& ILO,
59 const octave_idx_type& IHI, const double* LSCALE,
60 const double* RSCALE, octave_idx_type& M, double* V,
61 const octave_idx_type& LDV, octave_idx_type& INFO
62 F77_CHAR_ARG_LEN_DECL
63 F77_CHAR_ARG_LEN_DECL);
64
65 F77_RET_T
66 F77_FUNC (zggbal, ZGGBAL) (F77_CONST_CHAR_ARG_DECL, const octave_idx_type& N,
67 Complex* A, const octave_idx_type& LDA, Complex* B,
68 const octave_idx_type& LDB, octave_idx_type& ILO, octave_idx_type& IHI,
69 double* LSCALE, double* RSCALE,
70 double* WORK, octave_idx_type& INFO
71 F77_CHAR_ARG_LEN_DECL);
72 }
73
74 DEFUN_DLD (balance, args, nargout, 49 DEFUN_DLD (balance, args, nargout,
75 "-*- texinfo -*-\n\ 50 "-*- texinfo -*-\n\
76 @deftypefn {Loadable Function} {@var{aa} =} balance (@var{a}, @var{opt})\n\ 51 @deftypefn {Loadable Function} {@var{aa} =} balance (@var{a}, @var{opt})\n\
77 @deftypefnx {Loadable Function} {[@var{dd}, @var{aa}] =} balance (@var{a}, @var{opt})\n\ 52 @deftypefnx {Loadable Function} {[@var{dd}, @var{aa}] =} balance (@var{a}, @var{opt})\n\
78 @deftypefnx {Loadable Function} {[@var{cc}, @var{dd}, @var{aa}, @var{bb}] =} balance (@var{a}, @var{b}, @var{opt})\n\ 53 @deftypefnx {Loadable Function} {[@var{cc}, @var{dd}, @var{aa}, @var{bb}] =} balance (@var{a}, @var{b}, @var{opt})\n\
143 { 118 {
144 gripe_square_matrix_required ("balance"); 119 gripe_square_matrix_required ("balance");
145 return retval; 120 return retval;
146 } 121 }
147 122
123 bool isfloat = args(0).is_single_type () ||
124 (! AEPcase && args(1).is_single_type());
125
126 bool complex_case = (args(0).is_complex_type () ||
127 (! AEPcase && args(1).is_complex_type ()));
128
148 // Extract argument 1 parameter for both AEP and GEP. 129 // Extract argument 1 parameter for both AEP and GEP.
149 Matrix aa; 130 Matrix aa;
150 ComplexMatrix caa; 131 ComplexMatrix caa;
151 132 FloatMatrix faa;
152 if (args(0).is_complex_type ()) 133 FloatComplexMatrix fcaa;
153 caa = args(0).complex_matrix_value (); 134
135 if (isfloat)
136 {
137 if (complex_case)
138 fcaa = args(0).float_complex_matrix_value ();
139 else
140 faa = args(0).float_matrix_value ();
141 }
154 else 142 else
155 aa = args(0).matrix_value (); 143 {
144 if (complex_case)
145 caa = args(0).complex_matrix_value ();
146 else
147 aa = args(0).matrix_value ();
148 }
156 149
157 if (error_state) 150 if (error_state)
158 return retval; 151 return retval;
159 152
160 // Treat AEP/GEP cases. 153 // Treat AEP/GEP cases.
171 error ("balance: AEP argument 2 must be a string"); 164 error ("balance: AEP argument 2 must be a string");
172 return retval; 165 return retval;
173 } 166 }
174 167
175 // balance the AEP 168 // balance the AEP
176 if (args(0).is_complex_type ()) 169 if (isfloat)
177 { 170 {
178 ComplexAEPBALANCE result (caa, bal_job); 171 if (complex_case)
179 172 {
180 if (nargout == 0 || nargout == 1) 173 FloatComplexAEPBALANCE result (fcaa, bal_job);
181 retval(0) = result.balanced_matrix (); 174
182 else 175 if (nargout == 0 || nargout == 1)
183 { 176 retval(0) = result.balanced_matrix ();
184 retval(1) = result.balanced_matrix (); 177 else
185 retval(0) = result.balancing_matrix (); 178 {
186 } 179 retval(1) = result.balanced_matrix ();
187 } 180 retval(0) = result.balancing_matrix ();
188 else 181 }
189 { 182 }
190 AEPBALANCE result (aa, bal_job); 183 else
191 184 {
192 if (nargout == 0 || nargout == 1) 185 FloatAEPBALANCE result (faa, bal_job);
193 retval(0) = result.balanced_matrix (); 186
194 else 187 if (nargout == 0 || nargout == 1)
195 { 188 retval(0) = result.balanced_matrix ();
196 retval(1) = result.balanced_matrix (); 189 else
197 retval(0) = result.balancing_matrix (); 190 {
191 retval(1) = result.balanced_matrix ();
192 retval(0) = result.balancing_matrix ();
193 }
194 }
195 }
196 else
197 {
198 if (complex_case)
199 {
200 ComplexAEPBALANCE result (caa, bal_job);
201
202 if (nargout == 0 || nargout == 1)
203 retval(0) = result.balanced_matrix ();
204 else
205 {
206 retval(1) = result.balanced_matrix ();
207 retval(0) = result.balancing_matrix ();
208 }
209 }
210 else
211 {
212 AEPBALANCE result (aa, bal_job);
213
214 if (nargout == 0 || nargout == 1)
215 retval(0) = result.balanced_matrix ();
216 else
217 {
218 retval(1) = result.balanced_matrix ();
219 retval(0) = result.balancing_matrix ();
220 }
198 } 221 }
199 } 222 }
200 } 223 }
201 else 224 else
202 { 225 {
226 if (nargout == 1)
227 warning ("balance: used GEP, should have two output arguments");
228
203 // Generalized eigenvalue problem. 229 // Generalized eigenvalue problem.
204 if (nargin == 2) 230 if (nargin == 2)
205 bal_job = "B"; 231 bal_job = "B";
206 else if (args(2).is_string ()) 232 else if (args(2).is_string ())
207 bal_job = args(2).string_value (); 233 bal_job = args(2).string_value ();
217 return retval; 243 return retval;
218 } 244 }
219 245
220 Matrix bb; 246 Matrix bb;
221 ComplexMatrix cbb; 247 ComplexMatrix cbb;
222 248 FloatMatrix fbb;
223 if (args(1).is_complex_type ()) 249 FloatComplexMatrix fcbb;
224 cbb = args(1).complex_matrix_value (); 250
225 else 251 if (isfloat)
226 bb = args(1).matrix_value (); 252 {
227 253 if (complex_case)
228 if (error_state) 254 fcbb = args(1).float_complex_matrix_value ();
229 return retval; 255 else
230 256 fbb = args(1).float_matrix_value ();
231 // Both matrices loaded, now let's check what kind of arithmetic: 257 }
232 // first, declare variables used in both the real and complex case 258 else
233 259 {
234 octave_idx_type ilo, ihi, info; 260 if (complex_case)
235 RowVector lscale(nn), rscale(nn), work(6*nn); 261 cbb = args(1).complex_matrix_value ();
236 char job = bal_job[0]; 262 else
237 263 bb = args(1).matrix_value ();
238 static octave_idx_type complex_case 264 }
239 = (args(0).is_complex_type () || args(1).is_complex_type ()); 265
240 266 // balance the GEP
241 // now balance 267 if (isfloat)
242 if (complex_case) 268 {
243 { 269 if (complex_case)
244 if (args(0).is_real_type ()) 270 {
245 caa = ComplexMatrix (aa); 271 FloatComplexGEPBALANCE result (fcaa, fcbb, bal_job);
246 272
247 if (args(1).is_real_type ()) 273 switch (nargout)
248 cbb = ComplexMatrix (bb); 274 {
249 275 case 4:
250 F77_XFCN (zggbal, ZGGBAL, 276 retval(3) = result.balanced_matrix2 ();
251 (F77_CONST_CHAR_ARG2 (&job, 1), 277 // fall through
252 nn, caa.fortran_vec (), nn, cbb.fortran_vec (), 278 case 3:
253 nn, ilo, ihi, lscale.fortran_vec (), 279 retval(2) = result.balanced_matrix ();
254 rscale.fortran_vec (), work.fortran_vec (), info 280 retval(1) = result.balancing_matrix2 ();
255 F77_CHAR_ARG_LEN (1))); 281 retval(0) = result.balancing_matrix ();
256 } 282 break;
257 else 283 case 2:
258 { 284 retval(1) = result.balancing_matrix2 ();
259 // real matrices case 285 // fall through
260 286 case 1:
261 F77_XFCN (dggbal, DGGBAL, 287 retval(0) = result.balancing_matrix ();
262 (F77_CONST_CHAR_ARG2 (&job, 1), 288 break;
263 nn, aa.fortran_vec (), nn, bb.fortran_vec (), 289 default:
264 nn, ilo, ihi, lscale.fortran_vec (), 290 error ("balance: invalid number of output arguments");
265 rscale.fortran_vec (), work.fortran_vec (), info 291 break;
266 F77_CHAR_ARG_LEN (1))); 292 }
267 } 293 }
268 294 else
269 // Since we just want the balancing matrices, we can use dggbal 295 {
270 // for both the real and complex cases. 296 FloatGEPBALANCE result (faa, fbb, bal_job);
271 297
272 Matrix Pl(nn,nn), Pr(nn,nn); 298 switch (nargout)
273 299 {
274 for (octave_idx_type ii = 0; ii < nn; ii++) 300 case 4:
275 for (octave_idx_type jj = 0; jj < nn; jj++) 301 retval(3) = result.balanced_matrix2 ();
276 { 302 // fall through
277 OCTAVE_QUIT; 303 case 3:
278 304 retval(2) = result.balanced_matrix ();
279 Pl(ii,jj) = Pr(ii,jj) = (ii == jj ? 1.0 : 0.0); 305 retval(1) = result.balancing_matrix2 ();
280 } 306 retval(0) = result.balancing_matrix ();
281 307 break;
282 // left first 308 case 2:
283 F77_XFCN (dggbak, DGGBAK, 309 retval(1) = result.balancing_matrix2 ();
284 (F77_CONST_CHAR_ARG2 (&job, 1), 310 // fall through
285 F77_CONST_CHAR_ARG2 ("L", 1), 311 case 1:
286 nn, ilo, ihi, lscale.data (), rscale.data (), 312 retval(0) = result.balancing_matrix ();
287 nn, Pl.fortran_vec (), nn, info 313 break;
288 F77_CHAR_ARG_LEN (1) 314 default:
289 F77_CHAR_ARG_LEN (1))); 315 error ("balance: invalid number of output arguments");
290 316 break;
291 // then right 317 }
292 F77_XFCN (dggbak, DGGBAK, 318 }
293 (F77_CONST_CHAR_ARG2 (&job, 1), 319 }
294 F77_CONST_CHAR_ARG2 ("R", 1), 320 else
295 nn, ilo, ihi, lscale.data (), rscale.data (), 321 {
296 nn, Pr.fortran_vec (), nn, info 322 if (complex_case)
297 F77_CHAR_ARG_LEN (1) 323 {
298 F77_CHAR_ARG_LEN (1))); 324 ComplexGEPBALANCE result (caa, cbb, bal_job);
299 325
300 switch (nargout) 326 switch (nargout)
301 { 327 {
302 case 0: 328 case 4:
303 case 1: 329 retval(3) = result.balanced_matrix2 ();
304 warning ("balance: used GEP, should have two output arguments"); 330 // fall through
305 if (complex_case) 331 case 3:
306 retval(0) = caa; 332 retval(2) = result.balanced_matrix ();
307 else 333 retval(1) = result.balancing_matrix2 ();
308 retval(0) = aa; 334 retval(0) = result.balancing_matrix ();
309 break; 335 break;
310 336 case 2:
311 case 2: 337 retval(1) = result.balancing_matrix2 ();
312 if (complex_case) 338 // fall through
313 { 339 case 1:
314 retval(1) = cbb; 340 retval(0) = result.balancing_matrix ();
315 retval(0) = caa; 341 break;
316 } 342 default:
317 else 343 error ("balance: invalid number of output arguments");
318 { 344 break;
319 retval(1) = bb; 345 }
320 retval(0) = aa; 346 }
321 } 347 else
322 break; 348 {
323 349 GEPBALANCE result (aa, bb, bal_job);
324 case 4: 350
325 if (complex_case) 351 switch (nargout)
326 { 352 {
327 retval(3) = cbb; 353 case 4:
328 retval(2) = caa; 354 retval(3) = result.balanced_matrix2 ();
329 } 355 // fall through
330 else 356 case 3:
331 { 357 retval(2) = result.balanced_matrix ();
332 retval(3) = bb; 358 retval(1) = result.balancing_matrix2 ();
333 retval(2) = aa; 359 retval(0) = result.balancing_matrix ();
334 } 360 break;
335 retval(1) = Pr; 361 case 2:
336 retval(0) = Pl.transpose (); // so that aa_bal = cc*aa*dd, etc. 362 retval(1) = result.balancing_matrix2 ();
337 break; 363 // fall through
338 364 case 1:
339 default: 365 retval(0) = result.balancing_matrix ();
340 error ("balance: invalid number of output arguments"); 366 break;
341 break; 367 default:
368 error ("balance: invalid number of output arguments");
369 break;
370 }
371 }
342 } 372 }
343 } 373 }
344 374
345 return retval; 375 return retval;
346 } 376 }