5164
|
1 /* ========================================================================== */ |
|
2 /* === umf_config.h ========================================================= */ |
|
3 /* ========================================================================== */ |
|
4 |
|
5 /* -------------------------------------------------------------------------- */ |
|
6 /* UMFPACK Version 4.4, Copyright (c) 2005 by Timothy A. Davis. CISE Dept, */ |
|
7 /* Univ. of Florida. All Rights Reserved. See ../Doc/License for License. */ |
|
8 /* web: http://www.cise.ufl.edu/research/sparse/umfpack */ |
|
9 /* -------------------------------------------------------------------------- */ |
|
10 |
|
11 /* |
|
12 This file controls the compile-time configuration of UMFPACK. Modify the |
|
13 Makefile, the architecture-dependent Make.* file, and this file if |
|
14 necessary, to control these options. The following flags may be given |
|
15 as options to your C compiler (as in "cc -DNBLAS", for example). These |
|
16 flags are normally placed in your CONFIG string, defined in your Make.*. |
|
17 |
|
18 All of these options, except for the timer, are for accessing the BLAS. |
|
19 |
|
20 -DNBLAS |
|
21 |
|
22 BLAS mode. If -DNBLAS is set, then no BLAS will be used. Vanilla |
|
23 C code will be used instead. This is portable, and easier to |
|
24 install, but you won't get the best performance. |
|
25 |
|
26 If -DNBLAS is not set, then externally-available BLAS routines |
|
27 (dgemm, dger, and dgemv or the equivalent C-BLAS routines) will be |
|
28 used. This will give you the best performance, but perhaps at the |
|
29 expense of portability. |
|
30 |
|
31 The default is to use the BLAS, for both the C-callable libumfpack.a |
|
32 library and the MATLAB mexFunction. If you have trouble installing |
|
33 UMFPACK, set -DNBLAS (but then UMFPACK will be slow). |
|
34 |
|
35 -DCBLAS |
|
36 |
|
37 If -DCBLAS is set, then the C-BLAS interface to the BLAS is |
|
38 used. If your vendor-supplied BLAS library does not have a C-BLAS |
|
39 interface, you can obtain the ATLAS BLAS, available at |
|
40 http://www.netlib.org/atlas. |
|
41 |
|
42 This flag is ignored if -DNBLAS is set. |
|
43 |
|
44 -DLP64 |
|
45 |
|
46 This should be defined if you are compiling in the LP64 model |
|
47 (32 bit int's, 64 bit long's, and 64 bit pointers). In Solaris, |
|
48 this is obtained with the flags -xtarget=ultra -xarch=v9 for |
|
49 the cc compiler (for example). |
|
50 |
|
51 -DLONGBLAS |
|
52 |
|
53 If not defined, then the BLAS are not called in the long integer |
|
54 version of UMFPACK (the umfpack_*l_* routines). The most common |
|
55 definitions of the BLAS, unfortunately, use int arguments, and |
|
56 are thus not suitable for use in the LP64 model. Only the Sun |
|
57 Performance Library, as far as I can tell, has a version of the |
|
58 BLAS that allows long integer (64-bit) input arguments. This |
|
59 flag is set automatically in Sun Solaris if you are using the |
|
60 Sun Performance BLAS. You can set it yourself, too, if your BLAS |
|
61 routines can take long integer input arguments. |
|
62 |
|
63 -DNSUNPERF |
|
64 |
|
65 Applies only to Sun Solaris. If -DNSUNPERF is set, then the Sun |
|
66 Performance Library BLAS will not be used. |
|
67 |
|
68 The Sun Performance Library BLAS is used by default when compiling |
|
69 the C-callable libumfpack.a library on Sun Solaris. |
|
70 |
|
71 This flag is ignored if -DNBLAS is set. |
|
72 |
|
73 -DNSCSL |
|
74 |
|
75 Applies only to SGI IRIX. If -DSCSL is set, then the SGI SCSL |
|
76 Scientific Library BLAS will not be used. |
|
77 |
|
78 The SGI SCSL Scientific Library BLAS is used by default when |
|
79 compiling the C-callable libumfpack.a library on SGI IRIX. |
|
80 |
|
81 This flag is ignored if -DNBLAS is set. |
|
82 |
|
83 -DNPOSIX |
|
84 |
|
85 If -DNPOSIX is set, then your Unix operating system is not POSIX- |
|
86 compliant, and the POSIX routines sysconf ( ) and times ( ) |
|
87 routines are not used. These routines provide CPU time and |
|
88 wallclock time information. If -DNPOSIX is set, then the ANSI |
|
89 C clock ( ) routine is used. If -DNPOSIX is not set, then |
|
90 sysconf ( ) and times ( ) are used in umfpack_tic and umfpack_toc. |
|
91 See umfpack_tictoc.c for more information. |
|
92 The default is to use the POSIX routines, except for Windows, |
|
93 which is not POSIX-compliant. |
|
94 |
|
95 -DGETRUSAGE |
|
96 |
|
97 If -DGETRUSAGE is set, then your system's getrusage ( ) routine |
|
98 will be used for getting the process CPU time. Otherwise the ANSI |
|
99 C clock ( ) routine will be used. The default is to use getrusage |
|
100 ( ) on Unix systems, and to use clock on all other architectures. |
|
101 |
|
102 -DNO_TIMER |
|
103 |
|
104 If -DNO_TIMER is set, then no timing routines are used at all. |
|
105 |
|
106 -DNUTIL |
|
107 |
|
108 If -DNUTIL is set, then the internal MATLAB utMalloc, utFree, and |
|
109 utRealloc routines are not used in the UMFPACK mexFunction. The |
|
110 regular mxMalloc, mxFree, and mxRealloc routines are used instead. |
|
111 These routines are not documented, but are available for use. For |
|
112 Windows, -DNUTIL is defined below, because access to the ut* |
|
113 routines is not available by default. |
|
114 |
|
115 -DNRECIPROCAL |
|
116 |
|
117 This option controls a tradeoff between speed and accuracy. Using |
|
118 -DNRECIPROCAL can lead to more accurate results, but with perhaps |
|
119 some cost in performance, particularly if floating-point division |
|
120 is much more costly than floating-point multiplication. |
|
121 |
|
122 This option determines the method used to scale the pivot column. |
|
123 If set, or if the absolute value of the pivot is < 1e-12 (or is a |
|
124 NaN), then the pivot column is divided by the pivot value. |
|
125 Otherwise, the reciprocal of the pivot value is computed, and the |
|
126 pivot column is multiplied by (1/pivot). Multiplying by the |
|
127 reciprocal can be slightly less accurate than dividing by the |
|
128 pivot, but it is often faster. See umf_scale.c. |
|
129 |
|
130 This has a small effect on the performance of UMFPACK, at least on |
|
131 a Pentium 4M. It may have a larger effect on other architectures |
|
132 where floating-point division is much more costly than floating- |
|
133 point multiplication. The RS 6000 is one such example. |
|
134 |
|
135 By default, the method chosen is to multiply by the reciprocal |
|
136 (sacrificing accuracy for speed), except when compiling UMFPACK |
|
137 as a built-in routine in MATLAB, or when gcc is being used. |
|
138 |
|
139 When MATHWORKS is defined, -DNRECIPROCAL is forced on, and the pivot |
|
140 column is divided by the pivot value. The only way of using the |
|
141 other method in this case is to edit this file. |
|
142 |
|
143 If -DNRECIPROCAL is enabled, then the row scaling factors are always |
|
144 applied by dividing each row by the scale factor, rather than |
|
145 multiplying by the reciprocal. If -DNRECIPROCAL is not enabled |
|
146 (the default case), then the scale factors are normally applied by |
|
147 multiplying by the reciprocal. If, however, the smallest scale |
|
148 factor is tiny, then the scale factors are applied via division. |
|
149 |
|
150 -DNO_DIVIDE_BY_ZERO |
|
151 |
|
152 If the pivot is zero, and this flag is set, then no divide-by-zero |
|
153 occurs. |
|
154 |
|
155 You should normally not set these flags yourself: |
|
156 |
|
157 -DBLAS_BY_VALUE if scalars are passed by value, not reference |
|
158 -DBLAS_NO_UNDERSCORE if no underscore should be appended |
|
159 -DBLAS_CHAR_ARG if BLAS options are single char's, not strings |
|
160 |
|
161 The BLAS options are normally set automatically. If your |
|
162 architecture cannot be determined (see UMFPACK_ARCHITECTURE, below) |
|
163 then you may need to set these flags yourself. |
|
164 |
|
165 The following options are controlled by amd_internal.h: |
|
166 |
|
167 -DMATLAB_MEX_FILE |
|
168 |
|
169 This flag is turned on when compiling the umfpack mexFunction for |
|
170 use in MATLAB. When compiling the MATLAB mexFunction, the MATLAB |
|
171 BLAS are used (unless -DNBLAS is set). The -DCBLAS, -DNSCSL, and |
|
172 -DNSUNPERF flags are all ignored. The -DNRECIPROCAL flag is |
|
173 forced on. Otherwise, [L,U,P,Q,R] = umfpack (A) would return |
|
174 either L*U = P*(R\A)*Q or L*U = P*R*A*Q. Rather than returning a |
|
175 flag stating how the scale factors R are to be applied, the umfpack |
|
176 mexFunction always takes the more accurate route and returns |
|
177 L*U = P*(R\A)*Q. |
|
178 |
|
179 -DMATHWORKS |
|
180 |
|
181 This flag is turned on when compiling umfpack as a built-in routine |
|
182 in MATLAB. The MATLAB BLAS are used for all architectures (-DNBLAS, |
|
183 -DCBLAS, -DNSCSL, and -DNSUNPERF flags are all ignored). Internal |
|
184 routines utMalloc, utFree, utRealloc, utPrintf, utDivideComplex, |
|
185 and utFdlibm_hypot are used, and the "util.h" file is included. |
|
186 This avoids the problem discussed in the User Guide regarding memory |
|
187 allocation in MATLAB. utMalloc returns NULL on failure, instead of |
|
188 terminating the mexFunction (which is what mxMalloc does). However, |
|
189 the ut* routines are not documented by The MathWorks, Inc., so I |
|
190 cannot guarantee that you will always be able to use them. |
|
191 The -DNRECIPROCAL flag is turned on. |
|
192 |
|
193 -DNDEBUG |
|
194 |
|
195 Debugging mode (if NDEBUG is not defined). The default, of course, |
|
196 is no debugging. Turning on debugging takes some work (see below). |
|
197 If you do not edit this file, then debugging is turned off anyway, |
|
198 regardless of whether or not -DNDEBUG is specified in your compiler |
|
199 options. |
|
200 */ |
|
201 |
|
202 /* ========================================================================== */ |
|
203 /* === AMD configuration ==================================================== */ |
|
204 /* ========================================================================== */ |
|
205 |
|
206 /* NDEBUG, PRINTF defined in amd_internal.h */ |
|
207 |
|
208 /* ========================================================================== */ |
|
209 /* === reciprocal option ==================================================== */ |
|
210 /* ========================================================================== */ |
|
211 |
|
212 /* Force the definition NRECIPROCAL when MATHWORKS or MATLAB_MEX_FILE |
|
213 * are defined. Do not multiply by the reciprocal in those cases. */ |
|
214 |
|
215 #ifndef NRECIPROCAL |
|
216 #if defined (MATHWORKS) || defined (MATLAB_MEX_FILE) |
|
217 #define NRECIPROCAL |
|
218 #endif |
|
219 #endif |
|
220 |
|
221 /* ========================================================================== */ |
|
222 /* === Microsoft Windows configuration ====================================== */ |
|
223 /* ========================================================================== */ |
|
224 |
|
225 #ifdef UMF_WINDOWS |
|
226 /* Windows can't access the ut* routines, and it isn't Unix. */ |
|
227 #define NUTIL |
|
228 #define NPOSIX |
|
229 #endif |
|
230 |
|
231 /* ========================================================================== */ |
|
232 /* === 0-based or 1-based printing ========================================== */ |
|
233 /* ========================================================================== */ |
|
234 |
|
235 #if defined (MATLAB_MEX_FILE) && defined (NDEBUG) |
|
236 /* In MATLAB, matrices are 1-based to the user, but 0-based internally. */ |
|
237 /* One is added to all row and column indices when printing matrices */ |
|
238 /* for the MATLAB user. The +1 shift is turned off when debugging. */ |
|
239 #define INDEX(i) ((i)+1) |
|
240 #else |
|
241 /* In ANSI C, matrices are 0-based and indices are reported as such. */ |
|
242 /* This mode is also used for debug mode, and if MATHWORKS is defined rather */ |
|
243 /* than MATLAB_MEX_FILE. */ |
|
244 #define INDEX(i) (i) |
|
245 #endif |
|
246 |
|
247 /* ========================================================================== */ |
|
248 /* === Timer ================================================================ */ |
|
249 /* ========================================================================== */ |
|
250 |
|
251 /* |
|
252 If you have the getrusage routine (all Unix systems I've test do), then use |
|
253 that. Otherwise, use the ANSI C clock function. Note that on many |
|
254 systems, the ANSI clock function wraps around after only 2147 seconds, or |
|
255 about 36 minutes. BE CAREFUL: if you compare the run time of UMFPACK with |
|
256 other sparse matrix packages, be sure to use the same timer. See |
|
257 umfpack_tictoc.c for the timer used internally by UMFPACK. See also |
|
258 umfpack_timer.c for the timer used in an earlier version of UMFPACK. |
|
259 That timer is still available as a user-callable routine, but it is no |
|
260 longer used internally by UMFPACK. |
|
261 */ |
|
262 |
|
263 /* Sun Solaris, SGI Irix, Linux, Compaq Alpha, and IBM RS 6000 all have */ |
|
264 /* getrusage. It's in BSD unix, so perhaps all unix systems have it. */ |
|
265 #if defined (UMF_SOL2) || defined (UMF_SGI) || defined (UMF_LINUX) \ |
|
266 || defined (UMF_ALPHA) || defined (UMF_AIX) |
|
267 #define GETRUSAGE |
|
268 #endif |
|
269 |
|
270 |
|
271 /* ========================================================================== */ |
|
272 /* === BLAS ================================================================= */ |
|
273 /* ========================================================================== */ |
|
274 |
|
275 /* |
|
276 The adventure begins. Figure out how to call the BLAS ... |
|
277 |
|
278 This works, but it is incredibly ugly. The C-BLAS was supposed to solve |
|
279 this problem, and make it easier to interface a C program to the BLAS. |
|
280 Unfortunately, the C-BLAS does not have a "long" integer (64 bit) version. |
|
281 Various vendors have done their own 64-bit BLAS. Sun has dgemm_64 routines |
|
282 with "long" integers, SGI has a 64-bit dgemm in their scsl_blas_i8 library |
|
283 with "long long" integers, and so on. |
|
284 |
|
285 Different vendors also have different ways of defining a complex number, |
|
286 some using struct's. That's a bad idea. See umf_version.h for the better |
|
287 way to do it (the method that was also chosen for the complex C-BLAS, |
|
288 which is compatible and guaranteed to be portable with ANSI C). |
|
289 |
|
290 To make matters worse, SGI's SCSL BLAS has a C-BLAS interface which |
|
291 differs from the ATLAS C-BLAS interface (see immediately below); |
|
292 although a more recent version of SGI's C-BLAS interface is correct |
|
293 if SCSL_VOID_ARGS is defined. |
|
294 */ |
|
295 |
|
296 |
|
297 /* -------------------------------------------------------------------------- */ |
|
298 /* Determine which BLAS to use. */ |
|
299 /* -------------------------------------------------------------------------- */ |
|
300 |
|
301 #if defined (MATHWORKS) |
|
302 #define USE_MATLAB_BLAS |
|
303 |
|
304 #elif defined (NBLAS) |
|
305 #define USE_NO_BLAS |
|
306 |
|
307 #elif defined (MATLAB_MEX_FILE) |
|
308 #define USE_MATLAB_BLAS |
|
309 |
|
310 #elif defined (CBLAS) |
|
311 #define USE_C_BLAS |
|
312 |
|
313 #elif defined (UMF_SOL2) && !defined (NSUNPERF) |
|
314 #define USE_SUNPERF_BLAS |
|
315 |
|
316 #elif defined (UMF_SGI) && !defined (NSCSL) |
|
317 #define USE_SCSL_BLAS |
|
318 |
|
319 #else |
|
320 #define USE_FORTRAN_BLAS |
|
321 #endif |
|
322 |
|
323 /* -------------------------------------------------------------------------- */ |
|
324 /* int vs. long integer arguments */ |
|
325 /* -------------------------------------------------------------------------- */ |
|
326 |
|
327 /* |
|
328 Determine if the BLAS exists for the long integer version. It exists if |
|
329 LONGBLAS is defined in the Makefile, or if using the BLAS from the |
|
330 Sun Performance Library, or SGI's SCSL Scientific Library. |
|
331 */ |
|
332 |
|
333 #if defined (USE_SUNPERF_BLAS) || defined (USE_SCSL_BLAS) |
|
334 #ifndef LONGBLAS |
|
335 #define LONGBLAS |
|
336 #endif |
|
337 #endif |
|
338 |
|
339 /* do not use the BLAS if Int's are long and LONGBLAS is not defined */ |
|
340 #if defined (LONG_INTEGER) && !defined (LONGBLAS) && !defined (USE_NO_BLAS) |
|
341 #define USE_NO_BLAS |
|
342 #endif |
|
343 |
|
344 |
|
345 /* -------------------------------------------------------------------------- */ |
|
346 /* Use (void *) arguments for the SGI */ |
|
347 /* -------------------------------------------------------------------------- */ |
|
348 |
|
349 #if defined (UMF_SGI) |
|
350 /* |
|
351 Use (void *) pointers for complex types in SCSL. |
|
352 The ATLAS C-BLAS, and the SGI C-BLAS differ. The former uses (void *) |
|
353 arguments, the latter uses SCSL_ZOMPLEX_T, which are either scsl_zomplex |
|
354 or (void *). Using (void *) is simpler, and is selected by defining |
|
355 SCSL_VOID_ARGS, below. The cc compiler doesn't complain, but gcc is |
|
356 more picky, and generates a warning without this next statement. |
|
357 With gcc and the 07/09/98 version of SGI's cblas.h, spurious warnings |
|
358 about complex BLAS arguments will be reported anyway. This is because this |
|
359 older version of SGI's cblas.h does not make use of the SCSL_VOID_ARGS |
|
360 parameter, which is present in the 12/6/01 version of SGI's cblas.h. You |
|
361 can safely ignore these warnings. |
|
362 */ |
|
363 #define SCSL_VOID_ARGS |
|
364 #endif |
|
365 |
|
366 |
|
367 /* -------------------------------------------------------------------------- */ |
|
368 /* The BLAS exists, construct appropriate macros */ |
|
369 /* -------------------------------------------------------------------------- */ |
|
370 |
|
371 #if !defined (USE_NO_BLAS) /* { */ |
|
372 |
|
373 /* |
|
374 If the compile-time flag -DNBLAS is defined, then the BLAS are not used, |
|
375 portable vanilla C code is used instead, and the remainder of this file |
|
376 is ignored. |
|
377 |
|
378 Using the BLAS is much faster, but how C calls the Fortran BLAS is |
|
379 machine-dependent and thus can cause portability problems. Thus, use |
|
380 -DNBLAS to ensure portability (at the expense of speed). |
|
381 |
|
382 Preferences: |
|
383 |
|
384 *** The best interface to use, regardless of the option you select |
|
385 below, is the standard C-BLAS interface. Not all BLAS libraries |
|
386 use this interface. The only problem with this interface is that |
|
387 it does not extend to the LP64 model. The C-BLAS does not provide |
|
388 for a 64-bit integer. In addition, SGI's older cblas.h can cause |
|
389 spurious warnings when using the C-BLAS interface. |
|
390 |
|
391 1) often the most preferred (but see option (3)): use the |
|
392 optimized vendor-supplied library (such as the Sun Performance |
|
393 Library, or IBM's ESSL). This is often the fastest, but might not |
|
394 be portable and might not always be available. When compiling a |
|
395 MATLAB mexFunction it might be difficult get the mex compiler |
|
396 script to recognize the vendor- supplied BLAS. Note that the |
|
397 freely-available BLAS (option 3) can be faster than the vendor- |
|
398 specific BLAS. You are encourage to try both option (1) and (3). |
|
399 |
|
400 2) When compiling the UMFPACK mexFunction to use UMFPACK in MATLAB, use |
|
401 the BLAS provided by The Mathworks, Inc. This assumes you are using |
|
402 MATLAB V6 or higher, since the BLAS are not incorporated in V5 or |
|
403 earlier versions. On my Sun workstation, the MATLAB BLAS gave |
|
404 slightly worse performance than the Sun Perf. BLAS. The advantage |
|
405 of using the MATLAB BLAS is that it's available on any computer that |
|
406 has MATLAB V6 or higher. I have not tried using MATLAB BLAS outside |
|
407 of a mexFunction in a stand-alone C code, but MATLAB (V6) allows for |
|
408 this. This is well worth trying if you have MATLAB and don't want |
|
409 to bother installing the ATLAS BLAS (option 3a, below). The only |
|
410 glitch to this is that MATLAB does not provide a portable interface |
|
411 to the BLAS (an underscore is required for some but not all |
|
412 architectures). For Windows and MATLAB 6.0 or 6.1, you also need |
|
413 to copy the libmwlapack.dll file into your MATLAB installation |
|
414 directory; see the User Guide for details. |
|
415 |
|
416 In the current distribution, the only BLAS that the UMFPACK |
|
417 mexFunction will use is the internal MATLAB BLAS. It's possible to |
|
418 use other BLAS, but handling the porting of using the mex compiler |
|
419 with different BLAS libraries is not trivial. |
|
420 |
|
421 As of MATLAB 6.5, the BLAS used internally in MATLAB is the ATLAS |
|
422 BLAS. |
|
423 |
|
424 3) Use a freely-available high-performance BLAS library: |
|
425 |
|
426 (a) The BLAS by Kazashige Goto and Robert van de Geijn, at |
|
427 http://www.cs.utexas.edu/users/flame/goto. This BLAS increased |
|
428 the performance of UMFPACK by almost 50% as compared to the |
|
429 ATLAS BLAS (v3.2). |
|
430 |
|
431 (b) The ATLAS BLAS, available at http://www.netlib.org/atlas, |
|
432 by R. Clint Whaley, Antoine Petitet, and Jack Dongarra. |
|
433 This has a standard C interface, and thus the interface to it is |
|
434 fully portable. Its performance rivals, and sometimes exceeds, |
|
435 the vendor-supplied BLAS on many computers. |
|
436 |
|
437 (b) The Fortran RISC BLAS by Michel Dayde', Iain Duff, Antoine |
|
438 Petitet, and Abderrahim Qrichi Aniba, available via anonymous |
|
439 ftp to ftp.enseeiht.fr in the pub/numerique/BLAS/RISC directory, |
|
440 See M. J. Dayde' and I. S. Duff, "The RISC BLAS: A blocked |
|
441 implementation of level 3 BLAS for RISC processors, ACM Trans. |
|
442 Math. Software, vol. 25, no. 3., Sept. 1999. This will give |
|
443 you good performance, but with the same C-to-Fortran portability |
|
444 problems as option (1). |
|
445 |
|
446 4) Use UMFPACK's built-in vanilla C code by setting -DNBLAS at compile |
|
447 time. The key advantage is portability, which is guaranteed if you |
|
448 have an ANSI C compliant compiler. You also don't need to download |
|
449 any other package - UMFPACK is stand-alone. No Fortran is used |
|
450 anywhere in UMFPACK. UMFPACK will be much slower than when using |
|
451 options (1) through (3), however. |
|
452 |
|
453 5) least preferred: use the standard Fortran implementation of the |
|
454 BLAS, also available at Netlib (http://www.netlib.org/blas). This |
|
455 will be no faster than option (4), and not portable because of |
|
456 C-to-Fortran calling conventions. Don't bother trying option (5). |
|
457 |
|
458 The mechanics of how C calls the BLAS on various computers are as follows: |
|
459 |
|
460 * C-BLAS (from the ATLAS library, for example): |
|
461 The same interface is used on all computers. |
|
462 |
|
463 * Defaults for calling the Fortran BLAS: |
|
464 add underscore, pass scalars by reference, use string arguments. |
|
465 |
|
466 * The Fortran BLAS on Sun Solaris (when compiling the MATLAB mexFunction |
|
467 or when using the Fortran RISC BLAS), SGI IRIX, Linux, and Compaq |
|
468 Alpha: use defaults. |
|
469 |
|
470 * Sun Solaris (when using the C-callable Sun Performance library): |
|
471 no underscore, pass scalars by value, use character arguments. |
|
472 |
|
473 * The Fortran BLAS (ESSL Library) on the IBM RS 6000, and HP Unix: |
|
474 no underscore, pass scalars by reference, use string arguments. |
|
475 |
|
476 * The Fortran BLAS on Windows: |
|
477 no underscore, pass scalars by reference, use string arguments. |
|
478 If you compile the umfpack mexFunction using umfpack_make, and are |
|
479 using the lcc compiler bundled with MATLAB, then you must first |
|
480 copy the umfpack\lcc_lib\libmwlapack.lib file into the |
|
481 <matlab>\extern\lib\win32\lcc\ directory, where <matlab> is the |
|
482 directory in which MATLAB is installed. Next, type mex -setup |
|
483 at the MATLAB prompt, and ask MATLAB to select the lcc compiler. |
|
484 MATLAB has built-in BLAS, but it cannot be accessed by a program |
|
485 compiled by lcc without first copying this file. |
|
486 */ |
|
487 |
|
488 |
|
489 |
|
490 /* -------------------------------------------------------------------------- */ |
|
491 #ifdef USE_C_BLAS /* { */ |
|
492 /* -------------------------------------------------------------------------- */ |
|
493 |
|
494 |
|
495 /* -------------------------------------------------------------------------- */ |
|
496 /* use the C-BLAS (any computer) */ |
|
497 /* -------------------------------------------------------------------------- */ |
|
498 |
|
499 /* |
|
500 C-BLAS is the default interface, with the following exceptions. Solaris |
|
501 uses the Sun Performance BLAS for libumfpack.a (the C-callable library). |
|
502 SGI IRIX uses the SCSL BLAS for libumfpack.a. All architectures use |
|
503 MATLAB's internal BLAS for the mexFunction on any architecture. These |
|
504 options are set in the Make.* files. The Make.generic file uses no BLAS |
|
505 at all. |
|
506 |
|
507 If you use the ATLAS C-BLAS, then be sure to set the -I flag to |
|
508 -I/path/ATLAS/include, where /path/ATLAS is the ATLAS installation |
|
509 directory. See Make.solaris for an example. You do not need to do this |
|
510 for the SGI, which has a /usr/include/cblas.h. |
|
511 */ |
|
512 |
|
513 #include "cblas.h" |
|
514 |
|
515 #ifdef COMPLEX |
|
516 #define BLAS_GEMM_ROUTINE cblas_zgemm |
|
517 #define BLAS_TRSM_ROUTINE cblas_ztrsm |
|
518 #define BLAS_TRSV_ROUTINE cblas_ztrsv |
|
519 #define BLAS_GEMV_ROUTINE cblas_zgemv |
|
520 #define BLAS_GER_ROUTINE cblas_zgeru |
|
521 #define BLAS_SCAL_ROUTINE cblas_zscal |
|
522 #define BLAS_COPY_ROUTINE cblas_zcopy |
|
523 #define BLAS_DECLARE_SCALAR(x) double x [2] |
|
524 #define BLAS_ASSIGN(x,xr,xi) { x [0] = xr ; x [1] = xi ; } |
|
525 #else |
|
526 #define BLAS_GEMM_ROUTINE cblas_dgemm |
|
527 #define BLAS_TRSM_ROUTINE cblas_dtrsm |
|
528 #define BLAS_TRSV_ROUTINE cblas_dtrsv |
|
529 #define BLAS_GEMV_ROUTINE cblas_dgemv |
|
530 #define BLAS_GER_ROUTINE cblas_dger |
|
531 #define BLAS_SCAL_ROUTINE cblas_dscal |
|
532 #define BLAS_COPY_ROUTINE cblas_dcopy |
|
533 #define BLAS_DECLARE_SCALAR(x) double x |
|
534 #define BLAS_ASSIGN(x,xr,xi) { x = xr ; } |
|
535 #endif |
|
536 |
|
537 #define BLAS_LOWER CblasLower |
|
538 #define BLAS_UNIT_DIAGONAL CblasUnit |
|
539 #define BLAS_RIGHT CblasRight |
|
540 #define BLAS_NO_TRANSPOSE CblasNoTrans |
|
541 #define BLAS_TRANSPOSE CblasTrans |
|
542 #define BLAS_COLUMN_MAJOR_ORDER CblasColMajor, |
|
543 #define BLAS_SCALAR(x) x |
|
544 #define BLAS_INT_SCALAR(n) n |
|
545 #define BLAS_ARRAY(a) a |
|
546 |
|
547 |
|
548 |
|
549 /* -------------------------------------------------------------------------- */ |
|
550 #else /* } USE_C_BLAS { */ |
|
551 /* -------------------------------------------------------------------------- */ |
|
552 |
|
553 /* -------------------------------------------------------------------------- */ |
|
554 /* use Fortran (or other architecture-specific) BLAS */ |
|
555 /* -------------------------------------------------------------------------- */ |
|
556 |
|
557 /* No such argument when not using the C-BLAS */ |
|
558 #define BLAS_COLUMN_MAJOR_ORDER |
|
559 |
|
560 /* Determine which architecture we're on and set options accordingly. */ |
|
561 /* The default, if nothing is defined is to add an underscore, */ |
|
562 /* pass scalars by reference, and use string arguments. */ |
|
563 |
|
564 /* ---------------------------------- */ |
|
565 /* Sun Performance BLAS */ |
|
566 /* ---------------------------------- */ |
|
567 |
|
568 #ifdef USE_SUNPERF_BLAS |
|
569 #ifdef _SUNPERF_H |
|
570 /* <sunperf.h> has been included somehow anyway, outside of umf_config.h */ |
|
571 #error "sunperf.h must NOT be #include'd. See umf_config.h for details." |
|
572 #endif |
|
573 #define BLAS_BY_VALUE |
|
574 #define BLAS_NO_UNDERSCORE |
|
575 #define BLAS_CHAR_ARG |
|
576 #endif /* USE_SUNPERF_BLAS */ |
|
577 |
|
578 /* ---------------------------------- */ |
|
579 /* SGI SCSL BLAS */ |
|
580 /* ---------------------------------- */ |
|
581 |
|
582 #ifdef USE_SCSL_BLAS |
|
583 #if defined (LP64) |
|
584 #include <scsl_blas_i8.h> |
|
585 #else |
|
586 #include <scsl_blas.h> |
|
587 #endif |
|
588 #define BLAS_BY_VALUE |
|
589 #define BLAS_NO_UNDERSCORE |
|
590 #endif /* USE_SCSL_BLAS */ |
|
591 |
|
592 /* ---------------------------------- */ |
|
593 /* IBM AIX, Windows, and HP Fortran BLAS */ |
|
594 /* ---------------------------------- */ |
|
595 |
|
596 #if defined (UMF_AIX) || defined (UMF_WINDOWS) || defined (UMF_HP) |
|
597 #define BLAS_NO_UNDERSCORE |
|
598 #endif |
|
599 |
|
600 |
|
601 /* -------------------------------------------------------------------------- */ |
|
602 /* BLAS names */ |
|
603 /* -------------------------------------------------------------------------- */ |
|
604 |
|
605 #if defined (LP64) && defined (USE_SUNPERF_BLAS) && defined (LONG_INTEGER) |
|
606 |
|
607 /* 64-bit sunperf BLAS, for Sun Solaris only */ |
|
608 #ifdef COMPLEX |
|
609 #define BLAS_GEMM_ROUTINE zgemm_64 |
|
610 #define BLAS_TRSM_ROUTINE ztrsm_64 |
|
611 #define BLAS_TRSV_ROUTINE ztrsv_64 |
|
612 #define BLAS_GEMV_ROUTINE zgemv_64 |
|
613 #define BLAS_GER_ROUTINE zgeru_64 |
|
614 #define BLAS_SCAL_ROUTINE zscal_64 |
|
615 #define BLAS_COPY_ROUTINE zcopy_64 |
|
616 #else |
|
617 #define BLAS_GEMM_ROUTINE dgemm_64 |
|
618 #define BLAS_TRSM_ROUTINE dtrsm_64 |
|
619 #define BLAS_TRSV_ROUTINE dtrsv_64 |
|
620 #define BLAS_GEMV_ROUTINE dgemv_64 |
|
621 #define BLAS_GER_ROUTINE dger_64 |
|
622 #define BLAS_SCAL_ROUTINE dscal_64 |
|
623 #define BLAS_COPY_ROUTINE dcopy_64 |
|
624 #endif /* COMPLEX */ |
|
625 |
|
626 #else |
|
627 |
|
628 #ifdef COMPLEX |
|
629 |
|
630 /* naming convention (use underscore, or not) */ |
|
631 #ifdef BLAS_NO_UNDERSCORE |
|
632 #define BLAS_GEMM_ROUTINE zgemm |
|
633 #define BLAS_TRSM_ROUTINE ztrsm |
|
634 #define BLAS_TRSV_ROUTINE ztrsv |
|
635 #define BLAS_GEMV_ROUTINE zgemv |
|
636 #define BLAS_GER_ROUTINE zgeru |
|
637 #define BLAS_SCAL_ROUTINE zscal |
|
638 #define BLAS_COPY_ROUTINE zcopy |
|
639 #else |
|
640 /* default: add underscore */ |
|
641 #define BLAS_GEMM_ROUTINE zgemm_ |
|
642 #define BLAS_TRSM_ROUTINE ztrsm_ |
|
643 #define BLAS_TRSV_ROUTINE ztrsv_ |
|
644 #define BLAS_GEMV_ROUTINE zgemv_ |
|
645 #define BLAS_GER_ROUTINE zgeru_ |
|
646 #define BLAS_SCAL_ROUTINE zscal_ |
|
647 #define BLAS_COPY_ROUTINE zcopy_ |
|
648 #endif |
|
649 |
|
650 #else |
|
651 |
|
652 /* naming convention (use underscore, or not) */ |
|
653 #ifdef BLAS_NO_UNDERSCORE |
|
654 #define BLAS_GEMM_ROUTINE dgemm |
|
655 #define BLAS_TRSM_ROUTINE dtrsm |
|
656 #define BLAS_TRSV_ROUTINE dtrsv |
|
657 #define BLAS_GEMV_ROUTINE dgemv |
|
658 #define BLAS_GER_ROUTINE dger |
|
659 #define BLAS_SCAL_ROUTINE dscal |
|
660 #define BLAS_COPY_ROUTINE dcopy |
|
661 #else |
|
662 /* default: add underscore */ |
|
663 #define BLAS_GEMM_ROUTINE dgemm_ |
|
664 #define BLAS_TRSM_ROUTINE dtrsm_ |
|
665 #define BLAS_TRSV_ROUTINE dtrsv_ |
|
666 #define BLAS_GEMV_ROUTINE dgemv_ |
|
667 #define BLAS_GER_ROUTINE dger_ |
|
668 #define BLAS_SCAL_ROUTINE dscal_ |
|
669 #define BLAS_COPY_ROUTINE dcopy_ |
|
670 #endif |
|
671 |
|
672 #endif /* COMPLEX */ |
|
673 |
|
674 #endif /* LP64 && USE_SUNPERF_BLAS */ |
|
675 |
|
676 |
|
677 /* -------------------------------------------------------------------------- */ |
|
678 /* BLAS real or complex floating-point scalars */ |
|
679 /* -------------------------------------------------------------------------- */ |
|
680 |
|
681 #ifdef COMPLEX |
|
682 |
|
683 /* |
|
684 The SunPerf BLAS expects to see a doublecomplex scalar, but it |
|
685 also will accept an array of size 2. See the manual, normally at |
|
686 file:///opt/SUNWspro/WS6U1/lib/locale/C/html/manuals/perflib/user_guide |
|
687 /plug_using_perflib.html . This manual is inconsistent with the man pages |
|
688 for zgemm, zgemv, and zgeru and also inconsistent with the <sunperf.h> |
|
689 include file. Use this instead, for SunPerf (only works if you do NOT |
|
690 include sunperf.h). Fortunately, this file (umf_config.h) is not included |
|
691 in any user code that calls UMFPACK. Thus, the caller may include |
|
692 sunperf.h in his or her own code, and that is safely ignored here. |
|
693 SGI's SCSL BLAS has yet a different kind of struct, but we can use a |
|
694 double array of size 2 instead (since SCSL_VOID_ARGS is defined). |
|
695 Most BLAS expect complex scalars as pointers to double arrays of size 2. |
|
696 */ |
|
697 |
|
698 #define BLAS_DECLARE_SCALAR(x) double x [2] |
|
699 #define BLAS_ASSIGN(x,xr,xi) { x [0] = xr ; x [1] = xi ; } |
|
700 #define BLAS_SCALAR(x) x |
|
701 |
|
702 #else |
|
703 |
|
704 #define BLAS_DECLARE_SCALAR(x) double x |
|
705 #define BLAS_ASSIGN(x,xr,xi) { x = xr ; } |
|
706 #ifdef BLAS_BY_VALUE |
|
707 #define BLAS_SCALAR(x) x |
|
708 #else |
|
709 #define BLAS_SCALAR(x) &(x) |
|
710 #endif |
|
711 |
|
712 #endif /* COMPLEX */ |
|
713 |
|
714 |
|
715 /* -------------------------------------------------------------------------- */ |
|
716 /* BLAS integer scalars */ |
|
717 /* -------------------------------------------------------------------------- */ |
|
718 |
|
719 /* |
|
720 Fortran requires integers to be passed by reference. |
|
721 The SCSL BLAS requires long long arguments in LP64 mode. |
|
722 */ |
|
723 |
|
724 #if defined (USE_SCSL_BLAS) && defined (LP64) |
|
725 #define BLAS_INT_SCALAR(n) ((long long) n) |
|
726 #else |
|
727 #ifdef BLAS_BY_VALUE |
|
728 #define BLAS_INT_SCALAR(n) n |
|
729 #else |
|
730 #define BLAS_INT_SCALAR(n) &(n) |
|
731 #endif |
|
732 #endif |
|
733 |
|
734 |
|
735 /* -------------------------------------------------------------------------- */ |
|
736 /* BLAS strings */ |
|
737 /* -------------------------------------------------------------------------- */ |
|
738 |
|
739 /* |
|
740 The Sun Performance BLAS wants a character instead of a string. |
|
741 */ |
|
742 |
|
743 #ifdef BLAS_CHAR_ARG |
|
744 #define BLAS_NO_TRANSPOSE 'N' |
|
745 #define BLAS_TRANSPOSE 'T' |
|
746 #define BLAS_LEFT 'L' |
|
747 #define BLAS_RIGHT 'R' |
|
748 #define BLAS_LOWER 'L' |
|
749 #define BLAS_UNIT_DIAGONAL 'U' |
|
750 #else |
|
751 #define BLAS_NO_TRANSPOSE "N" |
|
752 #define BLAS_TRANSPOSE "T" |
|
753 #define BLAS_LEFT "L" |
|
754 #define BLAS_RIGHT "R" |
|
755 #define BLAS_LOWER "L" |
|
756 #define BLAS_UNIT_DIAGONAL "U" |
|
757 #endif |
|
758 |
|
759 |
|
760 /* -------------------------------------------------------------------------- */ |
|
761 /* BLAS arrays */ |
|
762 /* -------------------------------------------------------------------------- */ |
|
763 |
|
764 /* |
|
765 The complex SunPerf BLAS expects to see a doublecomplex array of size s. |
|
766 This is broken (see above, regarding complex scalars in sunperf.h). |
|
767 For SunPerf BLAS, just pass a pointer to the array, and ignore sunperf.h. |
|
768 With sunperf.h, you would need: |
|
769 |
|
770 #define BLAS_ARRAY(a) ((doublecomplex *)(a)) |
|
771 |
|
772 SGI's SCSL BLAS has yet a different kind of struct, but we can use a |
|
773 double array of size 2 instead (since SCSL_VOID_ARGS is defined). |
|
774 |
|
775 The real versions all use just a (double *) pointer. |
|
776 |
|
777 In all cases, no typecast is required. This will break if <sunperf.h> is |
|
778 included. |
|
779 |
|
780 If you have read this far, I hope you see now why (void *) a much better |
|
781 choice for complex BLAS prototypes, and why double x [2] is better than |
|
782 an architecture dependent struct { double real ; double imag ; } |
|
783 type definition. |
|
784 |
|
785 */ |
|
786 |
|
787 #define BLAS_ARRAY(a) (a) |
|
788 |
|
789 |
|
790 /* -------------------------------------------------------------------------- */ |
|
791 #endif /* USE_C_BLAS } */ |
|
792 /* -------------------------------------------------------------------------- */ |
|
793 |
|
794 |
|
795 |
|
796 |
|
797 |
|
798 /* -------------------------------------------------------------------------- */ |
|
799 /* BLAS macros, for all interfaces */ |
|
800 /* -------------------------------------------------------------------------- */ |
|
801 |
|
802 /* |
|
803 All architecture dependent issues have now been taken into consideration, |
|
804 and folded into the macros BLAS_DECLARE_SCALAR, BLAS_ASSIGN, BLAS_*_ROUTINE, |
|
805 BLAS_COLUMN_MAJOR_ORDER, BLAS_NO_TRANSPOSE, BLAS_TRANSPOSE, BLAS_SCALAR, |
|
806 BLAS_INT_SCALAR, BLAS_ARRAY, and Int. |
|
807 |
|
808 You will note that there is not a *** single *** name, declaration, or |
|
809 argument to the BLAS which is not somehow different in one or more versions |
|
810 of the BLAS! |
|
811 */ |
|
812 |
|
813 |
|
814 /* C = C - A*B', where: |
|
815 * A is m-by-k with leading dimension ldac |
|
816 * B is k-by-n with leading dimension ldb |
|
817 * C is m-by-n with leading dimension ldac */ |
|
818 #define BLAS_GEMM(m,n,k,A,B,ldb,C,ldac) \ |
|
819 { \ |
|
820 BLAS_DECLARE_SCALAR (alpha) ; \ |
|
821 BLAS_DECLARE_SCALAR (beta) ; \ |
|
822 BLAS_ASSIGN (alpha, -1.0, 0.0) ; \ |
|
823 BLAS_ASSIGN (beta, 1.0, 0.0) ; \ |
|
824 (void) BLAS_GEMM_ROUTINE (BLAS_COLUMN_MAJOR_ORDER \ |
|
825 BLAS_NO_TRANSPOSE, BLAS_TRANSPOSE, \ |
|
826 BLAS_INT_SCALAR (m), BLAS_INT_SCALAR (n), BLAS_INT_SCALAR (k), \ |
|
827 BLAS_SCALAR (alpha), \ |
|
828 BLAS_ARRAY (A), BLAS_INT_SCALAR (ldac), \ |
|
829 BLAS_ARRAY (B), BLAS_INT_SCALAR (ldb), BLAS_SCALAR (beta), \ |
|
830 BLAS_ARRAY (C), BLAS_INT_SCALAR (ldac)) ; \ |
|
831 } |
|
832 |
|
833 /* A = A - x*y', where: |
|
834 * A is m-by-n with leading dimension d |
|
835 x is a column vector with stride 1 |
|
836 y is a column vector with stride 1 */ |
|
837 #define BLAS_GER(m,n,x,y,A,d) \ |
|
838 { \ |
|
839 Int one = 1 ; \ |
|
840 BLAS_DECLARE_SCALAR (alpha) ; \ |
|
841 BLAS_ASSIGN (alpha, -1.0, 0.0) ; \ |
|
842 (void) BLAS_GER_ROUTINE (BLAS_COLUMN_MAJOR_ORDER \ |
|
843 BLAS_INT_SCALAR (m), BLAS_INT_SCALAR (n), \ |
|
844 BLAS_SCALAR (alpha), \ |
|
845 BLAS_ARRAY (x), BLAS_INT_SCALAR (one), \ |
|
846 BLAS_ARRAY (y), BLAS_INT_SCALAR (one), \ |
|
847 BLAS_ARRAY (A), BLAS_INT_SCALAR (d)) ; \ |
|
848 } |
|
849 |
|
850 /* y = y - A*x, where A is m-by-n with leading dimension d, |
|
851 x is a column vector with stride 1 |
|
852 y is a column vector with stride 1 */ |
|
853 #define BLAS_GEMV(m,n,A,x,y,d) \ |
|
854 { \ |
|
855 Int one = 1 ; \ |
|
856 BLAS_DECLARE_SCALAR (alpha) ; \ |
|
857 BLAS_DECLARE_SCALAR (beta) ; \ |
|
858 BLAS_ASSIGN (alpha, -1.0, 0.0) ; \ |
|
859 BLAS_ASSIGN (beta, 1.0, 0.0) ; \ |
|
860 (void) BLAS_GEMV_ROUTINE (BLAS_COLUMN_MAJOR_ORDER \ |
|
861 BLAS_NO_TRANSPOSE, \ |
|
862 BLAS_INT_SCALAR (m), BLAS_INT_SCALAR (n), \ |
|
863 BLAS_SCALAR (alpha), \ |
|
864 BLAS_ARRAY (A), BLAS_INT_SCALAR (d), \ |
|
865 BLAS_ARRAY (x), BLAS_INT_SCALAR (one), BLAS_SCALAR (beta), \ |
|
866 BLAS_ARRAY (y), BLAS_INT_SCALAR (one)) ; \ |
|
867 } |
|
868 |
|
869 |
|
870 /* solve Lx=b, where: |
|
871 * B is a column vector (m-by-1) with leading dimension d |
|
872 * A is m-by-m with leading dimension d */ |
|
873 #define BLAS_TRSV(m,A,b,d) \ |
|
874 { \ |
|
875 Int one = 1 ; \ |
|
876 (void) BLAS_TRSV_ROUTINE (BLAS_COLUMN_MAJOR_ORDER \ |
|
877 BLAS_LOWER, BLAS_NO_TRANSPOSE, BLAS_UNIT_DIAGONAL, \ |
|
878 BLAS_INT_SCALAR (m), \ |
|
879 BLAS_ARRAY (A), BLAS_INT_SCALAR (d), \ |
|
880 BLAS_ARRAY (b), BLAS_INT_SCALAR (one)) ; \ |
|
881 } |
|
882 |
|
883 /* solve XL'=B where: |
|
884 * B is m-by-n with leading dimension ldb |
|
885 * A is n-by-n with leading dimension lda */ |
|
886 #define BLAS_TRSM_RIGHT(m,n,A,lda,B,ldb) \ |
|
887 { \ |
|
888 BLAS_DECLARE_SCALAR (alpha) ; \ |
|
889 BLAS_ASSIGN (alpha, 1.0, 0.0) ; \ |
|
890 (void) BLAS_TRSM_ROUTINE (BLAS_COLUMN_MAJOR_ORDER \ |
|
891 BLAS_RIGHT, BLAS_LOWER, BLAS_TRANSPOSE, BLAS_UNIT_DIAGONAL, \ |
|
892 BLAS_INT_SCALAR (m), BLAS_INT_SCALAR (n), \ |
|
893 BLAS_SCALAR (alpha), \ |
|
894 BLAS_ARRAY (A), BLAS_INT_SCALAR (lda), \ |
|
895 BLAS_ARRAY (B), BLAS_INT_SCALAR (ldb)) ; \ |
|
896 } |
|
897 |
|
898 /* x = s*x, where x is a stride-1 vector of length n */ |
|
899 #define BLAS_SCAL(n,s,x) \ |
|
900 { \ |
|
901 Int one = 1 ; \ |
|
902 BLAS_DECLARE_SCALAR (alpha) ; \ |
|
903 BLAS_ASSIGN (alpha, REAL_COMPONENT (s), IMAG_COMPONENT (s)) ; \ |
|
904 (void) BLAS_SCAL_ROUTINE ( \ |
|
905 BLAS_INT_SCALAR (n), BLAS_SCALAR (alpha), \ |
|
906 BLAS_ARRAY (x), BLAS_INT_SCALAR (one)) ; \ |
|
907 } |
|
908 |
|
909 /* x = y, where x and y are a stride-1 vectors of length n */ |
|
910 #define BLAS_COPY(n,x,y) \ |
|
911 { \ |
|
912 Int one = 1 ; \ |
|
913 (void) BLAS_COPY_ROUTINE ( \ |
|
914 BLAS_INT_SCALAR (n), \ |
|
915 BLAS_ARRAY (x), BLAS_INT_SCALAR (one), \ |
|
916 BLAS_ARRAY (y), BLAS_INT_SCALAR (one)) ; \ |
|
917 } |
|
918 |
|
919 #endif /* !defined (USE_NO_BLAS) } */ |