comparison main/sparse/SuperLU/SRC/dgssvx.c @ 0:6b33357c7561 octave-forge

Initial revision
author pkienzle
date Wed, 10 Oct 2001 19:54:49 +0000
parents
children 7dad48fc229c
comparison
equal deleted inserted replaced
-1:000000000000 0:6b33357c7561
1
2
3 /*
4 * -- SuperLU routine (version 2.0) --
5 * Univ. of California Berkeley, Xerox Palo Alto Research Center,
6 * and Lawrence Berkeley National Lab.
7 * November 15, 1997
8 *
9 */
10 #include "dsp_defs.h"
11 #include "util.h"
12
13 void
14 dgssvx(char *fact, char *trans, char *refact,
15 SuperMatrix *A, factor_param_t *factor_params, int *perm_c,
16 int *perm_r, int *etree, char *equed, double *R, double *C,
17 SuperMatrix *L, SuperMatrix *U, void *work, int lwork,
18 SuperMatrix *B, SuperMatrix *X, double *recip_pivot_growth,
19 double *rcond, double *ferr, double *berr,
20 mem_usage_t *mem_usage, int *info )
21 {
22 /*
23 * Purpose
24 * =======
25 *
26 * DGSSVX solves the system of linear equations A*X=B or A'*X=B, using
27 * the LU factorization from dgstrf(). Error bounds on the solution and
28 * a condition estimate are also provided. It performs the following steps:
29 *
30 * 1. If A is stored column-wise (A->Stype = NC):
31 *
32 * 1.1. If fact = 'E', scaling factors are computed to equilibrate the
33 * system:
34 * trans = 'N': diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B
35 * trans = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B
36 * trans = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B
37 * Whether or not the system will be equilibrated depends on the
38 * scaling of the matrix A, but if equilibration is used, A is
39 * overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if trans='N')
40 * or diag(C)*B (if trans = 'T' or 'C').
41 *
42 * 1.2. Permute columns of A, forming A*Pc, where Pc is a permutation
43 * matrix that usually preserves sparsity.
44 * For more details of this step, see sp_preorder.c.
45 *
46 * 1.3. If fact = 'N' or 'E', the LU decomposition is used to factor the
47 * matrix A (after equilibration if fact = 'E') as Pr*A*Pc = L*U,
48 * with Pr determined by partial pivoting.
49 *
50 * 1.4. Compute the reciprocal pivot growth factor.
51 *
52 * 1.5. If some U(i,i) = 0, so that U is exactly singular, then the
53 * routine returns with info = i. Otherwise, the factored form of
54 * A is used to estimate the condition number of the matrix A. If
55 * the reciprocal of the condition number is less than machine
56 * precision, info = A->ncol+1 is returned as a warning, but the
57 * routine still goes on to solve for X and computes error bounds
58 * as described below.
59 *
60 * 1.6. The system of equations is solved for X using the factored form
61 * of A.
62 *
63 * 1.7. Iterative refinement is applied to improve the computed solution
64 * matrix and calculate error bounds and backward error estimates
65 * for it.
66 *
67 * 1.8. If equilibration was used, the matrix X is premultiplied by
68 * diag(C) (if trans = 'N') or diag(R) (if trans = 'T' or 'C') so
69 * that it solves the original system before equilibration.
70 *
71 * 2. If A is stored row-wise (A->Stype = NR), apply the above algorithm
72 * to the transpose of A:
73 *
74 * 2.1. If fact = 'E', scaling factors are computed to equilibrate the
75 * system:
76 * trans = 'N': diag(R)*A'*diag(C) *inv(diag(C))*X = diag(R)*B
77 * trans = 'T': (diag(R)*A'*diag(C))**T *inv(diag(R))*X = diag(C)*B
78 * trans = 'C': (diag(R)*A'*diag(C))**H *inv(diag(R))*X = diag(C)*B
79 * Whether or not the system will be equilibrated depends on the
80 * scaling of the matrix A, but if equilibration is used, A' is
81 * overwritten by diag(R)*A'*diag(C) and B by diag(R)*B
82 * (if trans='N') or diag(C)*B (if trans = 'T' or 'C').
83 *
84 * 2.2. Permute columns of transpose(A) (rows of A),
85 * forming transpose(A)*Pc, where Pc is a permutation matrix that
86 * usually preserves sparsity.
87 * For more details of this step, see sp_preorder.c.
88 *
89 * 2.3. If fact = 'N' or 'E', the LU decomposition is used to factor the
90 * transpose(A) (after equilibration if fact = 'E') as
91 * Pr*transpose(A)*Pc = L*U with the permutation Pr determined by
92 * partial pivoting.
93 *
94 * 2.4. Compute the reciprocal pivot growth factor.
95 *
96 * 2.5. If some U(i,i) = 0, so that U is exactly singular, then the
97 * routine returns with info = i. Otherwise, the factored form
98 * of transpose(A) is used to estimate the condition number of the
99 * matrix A. If the reciprocal of the condition number
100 * is less than machine precision, info = A->nrow+1 is returned as
101 * a warning, but the routine still goes on to solve for X and
102 * computes error bounds as described below.
103 *
104 * 2.6. The system of equations is solved for X using the factored form
105 * of transpose(A).
106 *
107 * 2.7. Iterative refinement is applied to improve the computed solution
108 * matrix and calculate error bounds and backward error estimates
109 * for it.
110 *
111 * 2.8. If equilibration was used, the matrix X is premultiplied by
112 * diag(C) (if trans = 'N') or diag(R) (if trans = 'T' or 'C') so
113 * that it solves the original system before equilibration.
114 *
115 * See supermatrix.h for the definition of 'SuperMatrix' structure.
116 *
117 * Arguments
118 * =========
119 *
120 * fact (input) char*
121 * Specifies whether or not the factored form of the matrix
122 * A is supplied on entry, and if not, whether the matrix A should
123 * be equilibrated before it is factored.
124 * = 'F': On entry, L, U, perm_r and perm_c contain the factored
125 * form of A. If equed is not 'N', the matrix A has been
126 * equilibrated with scaling factors R and C.
127 * A, L, U, perm_r are not modified.
128 * = 'N': The matrix A will be factored, and the factors will be
129 * stored in L and U.
130 * = 'E': The matrix A will be equilibrated if necessary, then
131 * factored into L and U.
132 *
133 * trans (input) char*
134 * Specifies the form of the system of equations:
135 * = 'N': A * X = B (No transpose)
136 * = 'T': A**T * X = B (Transpose)
137 * = 'C': A**H * X = B (Transpose)
138 *
139 * refact (input) char*
140 * Specifies whether we want to re-factor the matrix.
141 * = 'N': Factor the matrix A.
142 * = 'Y': Matrix A was factored before, now we want to re-factor
143 * matrix A with perm_r and etree as inputs. Use
144 * the same storage for the L\U factors previously allocated,
145 * expand it if necessary. User should insure to use the same
146 * memory model.
147 * If fact = 'F', then refact is not accessed.
148 *
149 * A (input/output) SuperMatrix*
150 * Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
151 * of the linear equations is A->nrow. Currently, the type of A can be:
152 * Stype = NC or NR, Dtype = _D, Mtype = GE. In the future,
153 * more general A can be handled.
154 *
155 * On entry, If fact = 'F' and equed is not 'N', then A must have
156 * been equilibrated by the scaling factors in R and/or C.
157 * A is not modified if fact = 'F' or 'N', or if fact = 'E' and
158 * equed = 'N' on exit.
159 *
160 * On exit, if fact = 'E' and equed is not 'N', A is scaled as follows:
161 * If A->Stype = NC:
162 * equed = 'R': A := diag(R) * A
163 * equed = 'C': A := A * diag(C)
164 * equed = 'B': A := diag(R) * A * diag(C).
165 * If A->Stype = NR:
166 * equed = 'R': transpose(A) := diag(R) * transpose(A)
167 * equed = 'C': transpose(A) := transpose(A) * diag(C)
168 * equed = 'B': transpose(A) := diag(R) * transpose(A) * diag(C).
169 *
170 * factor_params (input) factor_param_t*
171 * The structure defines the input scalar parameters, consisting of
172 * the following fields. If factor_params = NULL, the default
173 * values are used for all the fields; otherwise, the values
174 * are given by the user.
175 * - panel_size (int): Panel size. A panel consists of at most
176 * panel_size consecutive columns. If panel_size = -1, use
177 * default value 8.
178 * - relax (int): To control degree of relaxing supernodes. If the
179 * number of nodes (columns) in a subtree of the elimination
180 * tree is less than relax, this subtree is considered as one
181 * supernode, regardless of the row structures of those columns.
182 * If relax = -1, use default value 8.
183 * - diag_pivot_thresh (double): Diagonal pivoting threshold.
184 * At step j of the Gaussian elimination, if
185 * abs(A_jj) >= diag_pivot_thresh * (max_(i>=j) abs(A_ij)),
186 * then use A_jj as pivot. 0 <= diag_pivot_thresh <= 1.
187 * If diag_pivot_thresh = -1, use default value 1.0,
188 * which corresponds to standard partial pivoting.
189 * - drop_tol (double): Drop tolerance threshold. (NOT IMPLEMENTED)
190 * At step j of the Gaussian elimination, if
191 * abs(A_ij)/(max_i abs(A_ij)) < drop_tol,
192 * then drop entry A_ij. 0 <= drop_tol <= 1.
193 * If drop_tol = -1, use default value 0.0, which corresponds to
194 * standard Gaussian elimination.
195 *
196 * perm_c (input/output) int*
197 * If A->Stype = NC, Column permutation vector of size A->ncol,
198 * which defines the permutation matrix Pc; perm_c[i] = j means
199 * column i of A is in position j in A*Pc.
200 * On exit, perm_c may be overwritten by the product of the input
201 * perm_c and a permutation that postorders the elimination tree
202 * of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree
203 * is already in postorder.
204 *
205 * If A->Stype = NR, column permutation vector of size A->nrow,
206 * which describes permutation of columns of transpose(A)
207 * (rows of A) as described above.
208 *
209 * perm_r (input/output) int*
210 * If A->Stype = NC, row permutation vector of size A->nrow,
211 * which defines the permutation matrix Pr, and is determined
212 * by partial pivoting. perm_r[i] = j means row i of A is in
213 * position j in Pr*A.
214 *
215 * If A->Stype = NR, permutation vector of size A->ncol, which
216 * determines permutation of rows of transpose(A)
217 * (columns of A) as described above.
218 *
219 * If refact is not 'Y', perm_r is output argument;
220 * If refact = 'Y', the pivoting routine will try to use the input
221 * perm_r, unless a certain threshold criterion is violated.
222 * In that case, perm_r is overwritten by a new permutation
223 * determined by partial pivoting or diagonal threshold pivoting.
224 *
225 * etree (input/output) int*, dimension (A->ncol)
226 * Elimination tree of Pc'*A'*A*Pc.
227 * If fact is not 'F' and refact = 'Y', etree is an input argument,
228 * otherwise it is an output argument.
229 * Note: etree is a vector of parent pointers for a forest whose
230 * vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
231 *
232 * equed (input/output) char*
233 * Specifies the form of equilibration that was done.
234 * = 'N': No equilibration.
235 * = 'R': Row equilibration, i.e., A was premultiplied by diag(R).
236 * = 'C': Column equilibration, i.e., A was postmultiplied by diag(C).
237 * = 'B': Both row and column equilibration, i.e., A was replaced
238 * by diag(R)*A*diag(C).
239 * If fact = 'F', equed is an input argument, otherwise it is
240 * an output argument.
241 *
242 * R (input/output) double*, dimension (A->nrow)
243 * The row scale factors for A or transpose(A).
244 * If equed = 'R' or 'B', A (if A->Stype = NC) or transpose(A) (if
245 * A->Stype = NR) is multiplied on the left by diag(R).
246 * If equed = 'N' or 'C', R is not accessed.
247 * If fact = 'F', R is an input argument; otherwise, R is output.
248 * If fact = 'F' and equed = 'R' or 'B', each element of R must
249 * be positive.
250 *
251 * C (input/output) double*, dimension (A->ncol)
252 * The column scale factors for A or transpose(A).
253 * If equed = 'C' or 'B', A (if A->Stype = NC) or transpose(A) (if
254 * A->Stype = NR) is multiplied on the right by diag(C).
255 * If equed = 'N' or 'R', C is not accessed.
256 * If fact = 'F', C is an input argument; otherwise, C is output.
257 * If fact = 'F' and equed = 'C' or 'B', each element of C must
258 * be positive.
259 *
260 * L (output) SuperMatrix*
261 * The factor L from the factorization
262 * Pr*A*Pc=L*U (if A->Stype = NC) or
263 * Pr*transpose(A)*Pc=L*U (if A->Stype = NR).
264 * Uses compressed row subscripts storage for supernodes, i.e.,
265 * L has types: Stype = SC, Dtype = _D, Mtype = TRLU.
266 *
267 * U (output) SuperMatrix*
268 * The factor U from the factorization
269 * Pr*A*Pc=L*U (if A->Stype = NC) or
270 * Pr*transpose(A)*Pc=L*U (if A->Stype = NR).
271 * Uses column-wise storage scheme, i.e., U has types:
272 * Stype = NC, Dtype = _D, Mtype = TRU.
273 *
274 * work (workspace/output) void*, size (lwork) (in bytes)
275 * User supplied workspace, should be large enough
276 * to hold data structures for factors L and U.
277 * On exit, if fact is not 'F', L and U point to this array.
278 *
279 * lwork (input) int
280 * Specifies the size of work array in bytes.
281 * = 0: allocate space internally by system malloc;
282 * > 0: use user-supplied work array of length lwork in bytes,
283 * returns error if space runs out.
284 * = -1: the routine guesses the amount of space needed without
285 * performing the factorization, and returns it in
286 * mem_usage->total_needed; no other side effects.
287 *
288 * See argument 'mem_usage' for memory usage statistics.
289 *
290 * B (input/output) SuperMatrix*
291 * B has types: Stype = DN, Dtype = _D, Mtype = GE.
292 * On entry, the right hand side matrix.
293 * On exit,
294 * if equed = 'N', B is not modified; otherwise
295 * if A->Stype = NC:
296 * if trans = 'N' and equed = 'R' or 'B', B is overwritten by
297 * diag(R)*B;
298 * if trans = 'T' or 'C' and equed = 'C' of 'B', B is
299 * overwritten by diag(C)*B;
300 * if A->Stype = NR:
301 * if trans = 'N' and equed = 'C' or 'B', B is overwritten by
302 * diag(C)*B;
303 * if trans = 'T' or 'C' and equed = 'R' of 'B', B is
304 * overwritten by diag(R)*B.
305 *
306 * X (output) SuperMatrix*
307 * X has types: Stype = DN, Dtype = _D, Mtype = GE.
308 * If info = 0 or info = A->ncol+1, X contains the solution matrix
309 * to the original system of equations. Note that A and B are modified
310 * on exit if equed is not 'N', and the solution to the equilibrated
311 * system is inv(diag(C))*X if trans = 'N' and equed = 'C' or 'B',
312 * or inv(diag(R))*X if trans = 'T' or 'C' and equed = 'R' or 'B'.
313 *
314 * recip_pivot_growth (output) double*
315 * The reciprocal pivot growth factor max_j( norm(A_j)/norm(U_j) ).
316 * The infinity norm is used. If recip_pivot_growth is much less
317 * than 1, the stability of the LU factorization could be poor.
318 *
319 * rcond (output) double*
320 * The estimate of the reciprocal condition number of the matrix A
321 * after equilibration (if done). If rcond is less than the machine
322 * precision (in particular, if rcond = 0), the matrix is singular
323 * to working precision. This condition is indicated by a return
324 * code of info > 0.
325 *
326 * FERR (output) double*, dimension (B->ncol)
327 * The estimated forward error bound for each solution vector
328 * X(j) (the j-th column of the solution matrix X).
329 * If XTRUE is the true solution corresponding to X(j), FERR(j)
330 * is an estimated upper bound for the magnitude of the largest
331 * element in (X(j) - XTRUE) divided by the magnitude of the
332 * largest element in X(j). The estimate is as reliable as
333 * the estimate for RCOND, and is almost always a slight
334 * overestimate of the true error.
335 *
336 * BERR (output) double*, dimension (B->ncol)
337 * The componentwise relative backward error of each solution
338 * vector X(j) (i.e., the smallest relative change in
339 * any element of A or B that makes X(j) an exact solution).
340 *
341 * mem_usage (output) mem_usage_t*
342 * Record the memory usage statistics, consisting of following fields:
343 * - for_lu (float)
344 * The amount of space used in bytes for L\U data structures.
345 * - total_needed (float)
346 * The amount of space needed in bytes to perform factorization.
347 * - expansions (int)
348 * The number of memory expansions during the LU factorization.
349 *
350 * info (output) int*
351 * = 0: successful exit
352 * < 0: if info = -i, the i-th argument had an illegal value
353 * > 0: if info = i, and i is
354 * <= A->ncol: U(i,i) is exactly zero. The factorization has
355 * been completed, but the factor U is exactly
356 * singular, so the solution and error bounds
357 * could not be computed.
358 * = A->ncol+1: U is nonsingular, but RCOND is less than machine
359 * precision, meaning that the matrix is singular to
360 * working precision. Nevertheless, the solution and
361 * error bounds are computed because there are a number
362 * of situations where the computed solution can be more
363 * accurate than the value of RCOND would suggest.
364 * > A->ncol+1: number of bytes allocated when memory allocation
365 * failure occurred, plus A->ncol.
366 *
367 */
368
369 DNformat *Bstore, *Xstore;
370 double *Bmat, *Xmat;
371 int ldb, ldx, nrhs;
372 SuperMatrix *AA; /* A in NC format used by the factorization routine.*/
373 SuperMatrix AC; /* Matrix postmultiplied by Pc */
374 int colequ, equil, nofact, notran, rowequ;
375 char trant[1], norm[1];
376 int i, j, info1;
377 double amax, anorm, bignum, smlnum, colcnd, rowcnd, rcmax, rcmin;
378 int relax, panel_size;
379 double diag_pivot_thresh, drop_tol;
380 double t0; /* temporary time */
381 double *utime;
382 extern SuperLUStat_t SuperLUStat;
383
384 /* External functions */
385 extern double dlangs(char *, SuperMatrix *);
386 extern double dlamch_(char *);
387
388 Bstore = B->Store;
389 Xstore = X->Store;
390 Bmat = Bstore->nzval;
391 Xmat = Xstore->nzval;
392 ldb = Bstore->lda;
393 ldx = Xstore->lda;
394 nrhs = B->ncol;
395
396 #if 0
397 printf("dgssvx: fact=%c, trans=%c, refact=%c, equed=%c\n",
398 *fact, *trans, *refact, *equed);
399 #endif
400
401 *info = 0;
402 nofact = lsame_(fact, "N");
403 equil = lsame_(fact, "E");
404 notran = lsame_(trans, "N");
405 if (nofact || equil) {
406 *(unsigned char *)equed = 'N';
407 rowequ = FALSE;
408 colequ = FALSE;
409 } else {
410 rowequ = lsame_(equed, "R") || lsame_(equed, "B");
411 colequ = lsame_(equed, "C") || lsame_(equed, "B");
412 smlnum = dlamch_("Safe minimum");
413 bignum = 1. / smlnum;
414 }
415
416 /* Test the input parameters */
417 if (!nofact && !equil && !lsame_(fact, "F")) *info = -1;
418 else if (!notran && !lsame_(trans, "T") && !lsame_(trans, "C")) *info = -2;
419 else if ( !(lsame_(refact,"Y") || lsame_(refact, "N")) ) *info = -3;
420 else if ( A->nrow != A->ncol || A->nrow < 0 ||
421 (A->Stype != NC && A->Stype != NR) ||
422 A->Dtype != _D || A->Mtype != GE )
423 *info = -4;
424 else if (lsame_(fact, "F") && !(rowequ || colequ || lsame_(equed, "N")))
425 *info = -9;
426 else {
427 if (rowequ) {
428 rcmin = bignum;
429 rcmax = 0.;
430 for (j = 0; j < A->nrow; ++j) {
431 rcmin = MIN(rcmin, R[j]);
432 rcmax = MAX(rcmax, R[j]);
433 }
434 if (rcmin <= 0.) *info = -10;
435 else if ( A->nrow > 0)
436 rowcnd = MAX(rcmin,smlnum) / MIN(rcmax,bignum);
437 else rowcnd = 1.;
438 }
439 if (colequ && *info == 0) {
440 rcmin = bignum;
441 rcmax = 0.;
442 for (j = 0; j < A->nrow; ++j) {
443 rcmin = MIN(rcmin, C[j]);
444 rcmax = MAX(rcmax, C[j]);
445 }
446 if (rcmin <= 0.) *info = -11;
447 else if (A->nrow > 0)
448 colcnd = MAX(rcmin,smlnum) / MIN(rcmax,bignum);
449 else colcnd = 1.;
450 }
451 if (*info == 0) {
452 if ( lwork < -1 ) *info = -15;
453 else if ( B->ncol < 0 || Bstore->lda < MAX(0, A->nrow) ||
454 B->Stype != DN || B->Dtype != _D ||
455 B->Mtype != GE )
456 *info = -16;
457 else if ( X->ncol < 0 || Xstore->lda < MAX(0, A->nrow) ||
458 B->ncol != X->ncol || X->Stype != DN ||
459 X->Dtype != _D || X->Mtype != GE )
460 *info = -17;
461 }
462 }
463 if (*info != 0) {
464 i = -(*info);
465 xerbla_("dgssvx", &i);
466 return;
467 }
468
469 /* Default values for factor_params */
470 panel_size = sp_ienv(1);
471 relax = sp_ienv(2);
472 diag_pivot_thresh = 1.0;
473 drop_tol = 0.0;
474 if ( factor_params != NULL ) {
475 if ( factor_params->panel_size != -1 )
476 panel_size = factor_params->panel_size;
477 if ( factor_params->relax != -1 ) relax = factor_params->relax;
478 if ( factor_params->diag_pivot_thresh != -1 )
479 diag_pivot_thresh = factor_params->diag_pivot_thresh;
480 if ( factor_params->drop_tol != -1 )
481 drop_tol = factor_params->drop_tol;
482 }
483
484 StatInit(panel_size, relax);
485 utime = SuperLUStat.utime;
486
487 /* Convert A to NC format when necessary. */
488 if ( A->Stype == NR ) {
489 NRformat *Astore = A->Store;
490 AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) );
491 dCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz,
492 Astore->nzval, Astore->colind, Astore->rowptr,
493 NC, A->Dtype, A->Mtype);
494 if ( notran ) { /* Reverse the transpose argument. */
495 *trant = 'T';
496 notran = 0;
497 } else {
498 *trant = 'N';
499 notran = 1;
500 }
501 } else { /* A->Stype == NC */
502 *trant = *trans;
503 AA = A;
504 }
505
506 if ( equil ) {
507 t0 = SuperLU_timer_();
508 /* Compute row and column scalings to equilibrate the matrix A. */
509 dgsequ(AA, R, C, &rowcnd, &colcnd, &amax, &info1);
510
511 if ( info1 == 0 ) {
512 /* Equilibrate matrix A. */
513 dlaqgs(AA, R, C, rowcnd, colcnd, amax, equed);
514 rowequ = lsame_(equed, "R") || lsame_(equed, "B");
515 colequ = lsame_(equed, "C") || lsame_(equed, "B");
516 }
517 utime[EQUIL] = SuperLU_timer_() - t0;
518 }
519
520 /* Scale the right hand side if equilibration was performed. */
521 if ( notran ) {
522 if ( rowequ ) {
523 for (j = 0; j < nrhs; ++j)
524 for (i = 0; i < A->nrow; ++i) {
525 Bmat[i + j*ldb] *= R[i];
526 }
527 }
528 } else if ( colequ ) {
529 for (j = 0; j < nrhs; ++j)
530 for (i = 0; i < A->nrow; ++i) {
531 Bmat[i + j*ldb] *= C[i];
532 }
533 }
534
535 if ( nofact || equil ) {
536
537 t0 = SuperLU_timer_();
538 sp_preorder(refact, AA, perm_c, etree, &AC);
539 utime[ETREE] = SuperLU_timer_() - t0;
540
541 /* printf("Factor PA = LU ... relax %d\tw %d\tmaxsuper %d\trowblk %d\n",
542 relax, panel_size, sp_ienv(3), sp_ienv(4));
543 fflush(stdout); */
544
545 /* Compute the LU factorization of A*Pc. */
546 t0 = SuperLU_timer_();
547 dgstrf(refact, &AC, diag_pivot_thresh, drop_tol, relax, panel_size,
548 etree, work, lwork, perm_r, perm_c, L, U, info);
549 utime[FACT] = SuperLU_timer_() - t0;
550
551 if ( lwork == -1 ) {
552 mem_usage->total_needed = *info - A->ncol;
553 return;
554 }
555 }
556
557 if ( *info > 0 ) {
558 if ( *info <= A->ncol ) {
559 /* Compute the reciprocal pivot growth factor of the leading
560 rank-deficient *info columns of A. */
561 *recip_pivot_growth = dPivotGrowth(*info, AA, perm_c, L, U);
562 }
563 return;
564 }
565
566 /* Compute the reciprocal pivot growth factor *recip_pivot_growth. */
567 *recip_pivot_growth = dPivotGrowth(A->ncol, AA, perm_c, L, U);
568
569 /* Estimate the reciprocal of the condition number of A. */
570 t0 = SuperLU_timer_();
571 if ( notran ) {
572 *(unsigned char *)norm = '1';
573 } else {
574 *(unsigned char *)norm = 'I';
575 }
576 anorm = dlangs(norm, AA);
577 dgscon(norm, L, U, anorm, rcond, info);
578 utime[RCOND] = SuperLU_timer_() - t0;
579
580 /* Compute the solution matrix X. */
581 for (j = 0; j < nrhs; j++) /* Save a copy of the right hand sides */
582 for (i = 0; i < B->nrow; i++)
583 Xmat[i + j*ldx] = Bmat[i + j*ldb];
584
585 t0 = SuperLU_timer_();
586 dgstrs (trant, L, U, perm_r, perm_c, X, info);
587 utime[SOLVE] = SuperLU_timer_() - t0;
588
589 /* Use iterative refinement to improve the computed solution and compute
590 error bounds and backward error estimates for it. */
591 t0 = SuperLU_timer_();
592 dgsrfs(trant, AA, L, U, perm_r, perm_c, equed, R, C, B,
593 X, ferr, berr, info);
594 utime[REFINE] = SuperLU_timer_() - t0;
595
596 /* Transform the solution matrix X to a solution of the original system. */
597 if ( notran ) {
598 if ( colequ ) {
599 for (j = 0; j < nrhs; ++j)
600 for (i = 0; i < A->nrow; ++i) {
601 Xmat[i + j*ldx] *= C[i];
602 }
603 }
604 } else if ( rowequ ) {
605 for (j = 0; j < nrhs; ++j)
606 for (i = 0; i < A->nrow; ++i) {
607 Xmat[i + j*ldx] *= R[i];
608 }
609 }
610
611 /* Set INFO = A->ncol+1 if the matrix is singular to working precision. */
612 if ( *rcond < dlamch_("E") ) *info = A->ncol + 1;
613
614 dQuerySpace(L, U, panel_size, mem_usage);
615
616 if ( nofact || equil ) Destroy_CompCol_Permuted(&AC);
617 if ( A->Stype == NR ) {
618 Destroy_SuperMatrix_Store(AA);
619 SUPERLU_FREE(AA);
620 }
621
622 PrintStat( &SuperLUStat );
623 StatFree();
624 }