Mercurial > forge
comparison main/sparse/SuperLU/SRC/dgssvx.c @ 0:6b33357c7561 octave-forge
Initial revision
author | pkienzle |
---|---|
date | Wed, 10 Oct 2001 19:54:49 +0000 |
parents | |
children | 7dad48fc229c |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:6b33357c7561 |
---|---|
1 | |
2 | |
3 /* | |
4 * -- SuperLU routine (version 2.0) -- | |
5 * Univ. of California Berkeley, Xerox Palo Alto Research Center, | |
6 * and Lawrence Berkeley National Lab. | |
7 * November 15, 1997 | |
8 * | |
9 */ | |
10 #include "dsp_defs.h" | |
11 #include "util.h" | |
12 | |
13 void | |
14 dgssvx(char *fact, char *trans, char *refact, | |
15 SuperMatrix *A, factor_param_t *factor_params, int *perm_c, | |
16 int *perm_r, int *etree, char *equed, double *R, double *C, | |
17 SuperMatrix *L, SuperMatrix *U, void *work, int lwork, | |
18 SuperMatrix *B, SuperMatrix *X, double *recip_pivot_growth, | |
19 double *rcond, double *ferr, double *berr, | |
20 mem_usage_t *mem_usage, int *info ) | |
21 { | |
22 /* | |
23 * Purpose | |
24 * ======= | |
25 * | |
26 * DGSSVX solves the system of linear equations A*X=B or A'*X=B, using | |
27 * the LU factorization from dgstrf(). Error bounds on the solution and | |
28 * a condition estimate are also provided. It performs the following steps: | |
29 * | |
30 * 1. If A is stored column-wise (A->Stype = NC): | |
31 * | |
32 * 1.1. If fact = 'E', scaling factors are computed to equilibrate the | |
33 * system: | |
34 * trans = 'N': diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B | |
35 * trans = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B | |
36 * trans = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B | |
37 * Whether or not the system will be equilibrated depends on the | |
38 * scaling of the matrix A, but if equilibration is used, A is | |
39 * overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if trans='N') | |
40 * or diag(C)*B (if trans = 'T' or 'C'). | |
41 * | |
42 * 1.2. Permute columns of A, forming A*Pc, where Pc is a permutation | |
43 * matrix that usually preserves sparsity. | |
44 * For more details of this step, see sp_preorder.c. | |
45 * | |
46 * 1.3. If fact = 'N' or 'E', the LU decomposition is used to factor the | |
47 * matrix A (after equilibration if fact = 'E') as Pr*A*Pc = L*U, | |
48 * with Pr determined by partial pivoting. | |
49 * | |
50 * 1.4. Compute the reciprocal pivot growth factor. | |
51 * | |
52 * 1.5. If some U(i,i) = 0, so that U is exactly singular, then the | |
53 * routine returns with info = i. Otherwise, the factored form of | |
54 * A is used to estimate the condition number of the matrix A. If | |
55 * the reciprocal of the condition number is less than machine | |
56 * precision, info = A->ncol+1 is returned as a warning, but the | |
57 * routine still goes on to solve for X and computes error bounds | |
58 * as described below. | |
59 * | |
60 * 1.6. The system of equations is solved for X using the factored form | |
61 * of A. | |
62 * | |
63 * 1.7. Iterative refinement is applied to improve the computed solution | |
64 * matrix and calculate error bounds and backward error estimates | |
65 * for it. | |
66 * | |
67 * 1.8. If equilibration was used, the matrix X is premultiplied by | |
68 * diag(C) (if trans = 'N') or diag(R) (if trans = 'T' or 'C') so | |
69 * that it solves the original system before equilibration. | |
70 * | |
71 * 2. If A is stored row-wise (A->Stype = NR), apply the above algorithm | |
72 * to the transpose of A: | |
73 * | |
74 * 2.1. If fact = 'E', scaling factors are computed to equilibrate the | |
75 * system: | |
76 * trans = 'N': diag(R)*A'*diag(C) *inv(diag(C))*X = diag(R)*B | |
77 * trans = 'T': (diag(R)*A'*diag(C))**T *inv(diag(R))*X = diag(C)*B | |
78 * trans = 'C': (diag(R)*A'*diag(C))**H *inv(diag(R))*X = diag(C)*B | |
79 * Whether or not the system will be equilibrated depends on the | |
80 * scaling of the matrix A, but if equilibration is used, A' is | |
81 * overwritten by diag(R)*A'*diag(C) and B by diag(R)*B | |
82 * (if trans='N') or diag(C)*B (if trans = 'T' or 'C'). | |
83 * | |
84 * 2.2. Permute columns of transpose(A) (rows of A), | |
85 * forming transpose(A)*Pc, where Pc is a permutation matrix that | |
86 * usually preserves sparsity. | |
87 * For more details of this step, see sp_preorder.c. | |
88 * | |
89 * 2.3. If fact = 'N' or 'E', the LU decomposition is used to factor the | |
90 * transpose(A) (after equilibration if fact = 'E') as | |
91 * Pr*transpose(A)*Pc = L*U with the permutation Pr determined by | |
92 * partial pivoting. | |
93 * | |
94 * 2.4. Compute the reciprocal pivot growth factor. | |
95 * | |
96 * 2.5. If some U(i,i) = 0, so that U is exactly singular, then the | |
97 * routine returns with info = i. Otherwise, the factored form | |
98 * of transpose(A) is used to estimate the condition number of the | |
99 * matrix A. If the reciprocal of the condition number | |
100 * is less than machine precision, info = A->nrow+1 is returned as | |
101 * a warning, but the routine still goes on to solve for X and | |
102 * computes error bounds as described below. | |
103 * | |
104 * 2.6. The system of equations is solved for X using the factored form | |
105 * of transpose(A). | |
106 * | |
107 * 2.7. Iterative refinement is applied to improve the computed solution | |
108 * matrix and calculate error bounds and backward error estimates | |
109 * for it. | |
110 * | |
111 * 2.8. If equilibration was used, the matrix X is premultiplied by | |
112 * diag(C) (if trans = 'N') or diag(R) (if trans = 'T' or 'C') so | |
113 * that it solves the original system before equilibration. | |
114 * | |
115 * See supermatrix.h for the definition of 'SuperMatrix' structure. | |
116 * | |
117 * Arguments | |
118 * ========= | |
119 * | |
120 * fact (input) char* | |
121 * Specifies whether or not the factored form of the matrix | |
122 * A is supplied on entry, and if not, whether the matrix A should | |
123 * be equilibrated before it is factored. | |
124 * = 'F': On entry, L, U, perm_r and perm_c contain the factored | |
125 * form of A. If equed is not 'N', the matrix A has been | |
126 * equilibrated with scaling factors R and C. | |
127 * A, L, U, perm_r are not modified. | |
128 * = 'N': The matrix A will be factored, and the factors will be | |
129 * stored in L and U. | |
130 * = 'E': The matrix A will be equilibrated if necessary, then | |
131 * factored into L and U. | |
132 * | |
133 * trans (input) char* | |
134 * Specifies the form of the system of equations: | |
135 * = 'N': A * X = B (No transpose) | |
136 * = 'T': A**T * X = B (Transpose) | |
137 * = 'C': A**H * X = B (Transpose) | |
138 * | |
139 * refact (input) char* | |
140 * Specifies whether we want to re-factor the matrix. | |
141 * = 'N': Factor the matrix A. | |
142 * = 'Y': Matrix A was factored before, now we want to re-factor | |
143 * matrix A with perm_r and etree as inputs. Use | |
144 * the same storage for the L\U factors previously allocated, | |
145 * expand it if necessary. User should insure to use the same | |
146 * memory model. | |
147 * If fact = 'F', then refact is not accessed. | |
148 * | |
149 * A (input/output) SuperMatrix* | |
150 * Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number | |
151 * of the linear equations is A->nrow. Currently, the type of A can be: | |
152 * Stype = NC or NR, Dtype = _D, Mtype = GE. In the future, | |
153 * more general A can be handled. | |
154 * | |
155 * On entry, If fact = 'F' and equed is not 'N', then A must have | |
156 * been equilibrated by the scaling factors in R and/or C. | |
157 * A is not modified if fact = 'F' or 'N', or if fact = 'E' and | |
158 * equed = 'N' on exit. | |
159 * | |
160 * On exit, if fact = 'E' and equed is not 'N', A is scaled as follows: | |
161 * If A->Stype = NC: | |
162 * equed = 'R': A := diag(R) * A | |
163 * equed = 'C': A := A * diag(C) | |
164 * equed = 'B': A := diag(R) * A * diag(C). | |
165 * If A->Stype = NR: | |
166 * equed = 'R': transpose(A) := diag(R) * transpose(A) | |
167 * equed = 'C': transpose(A) := transpose(A) * diag(C) | |
168 * equed = 'B': transpose(A) := diag(R) * transpose(A) * diag(C). | |
169 * | |
170 * factor_params (input) factor_param_t* | |
171 * The structure defines the input scalar parameters, consisting of | |
172 * the following fields. If factor_params = NULL, the default | |
173 * values are used for all the fields; otherwise, the values | |
174 * are given by the user. | |
175 * - panel_size (int): Panel size. A panel consists of at most | |
176 * panel_size consecutive columns. If panel_size = -1, use | |
177 * default value 8. | |
178 * - relax (int): To control degree of relaxing supernodes. If the | |
179 * number of nodes (columns) in a subtree of the elimination | |
180 * tree is less than relax, this subtree is considered as one | |
181 * supernode, regardless of the row structures of those columns. | |
182 * If relax = -1, use default value 8. | |
183 * - diag_pivot_thresh (double): Diagonal pivoting threshold. | |
184 * At step j of the Gaussian elimination, if | |
185 * abs(A_jj) >= diag_pivot_thresh * (max_(i>=j) abs(A_ij)), | |
186 * then use A_jj as pivot. 0 <= diag_pivot_thresh <= 1. | |
187 * If diag_pivot_thresh = -1, use default value 1.0, | |
188 * which corresponds to standard partial pivoting. | |
189 * - drop_tol (double): Drop tolerance threshold. (NOT IMPLEMENTED) | |
190 * At step j of the Gaussian elimination, if | |
191 * abs(A_ij)/(max_i abs(A_ij)) < drop_tol, | |
192 * then drop entry A_ij. 0 <= drop_tol <= 1. | |
193 * If drop_tol = -1, use default value 0.0, which corresponds to | |
194 * standard Gaussian elimination. | |
195 * | |
196 * perm_c (input/output) int* | |
197 * If A->Stype = NC, Column permutation vector of size A->ncol, | |
198 * which defines the permutation matrix Pc; perm_c[i] = j means | |
199 * column i of A is in position j in A*Pc. | |
200 * On exit, perm_c may be overwritten by the product of the input | |
201 * perm_c and a permutation that postorders the elimination tree | |
202 * of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree | |
203 * is already in postorder. | |
204 * | |
205 * If A->Stype = NR, column permutation vector of size A->nrow, | |
206 * which describes permutation of columns of transpose(A) | |
207 * (rows of A) as described above. | |
208 * | |
209 * perm_r (input/output) int* | |
210 * If A->Stype = NC, row permutation vector of size A->nrow, | |
211 * which defines the permutation matrix Pr, and is determined | |
212 * by partial pivoting. perm_r[i] = j means row i of A is in | |
213 * position j in Pr*A. | |
214 * | |
215 * If A->Stype = NR, permutation vector of size A->ncol, which | |
216 * determines permutation of rows of transpose(A) | |
217 * (columns of A) as described above. | |
218 * | |
219 * If refact is not 'Y', perm_r is output argument; | |
220 * If refact = 'Y', the pivoting routine will try to use the input | |
221 * perm_r, unless a certain threshold criterion is violated. | |
222 * In that case, perm_r is overwritten by a new permutation | |
223 * determined by partial pivoting or diagonal threshold pivoting. | |
224 * | |
225 * etree (input/output) int*, dimension (A->ncol) | |
226 * Elimination tree of Pc'*A'*A*Pc. | |
227 * If fact is not 'F' and refact = 'Y', etree is an input argument, | |
228 * otherwise it is an output argument. | |
229 * Note: etree is a vector of parent pointers for a forest whose | |
230 * vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol. | |
231 * | |
232 * equed (input/output) char* | |
233 * Specifies the form of equilibration that was done. | |
234 * = 'N': No equilibration. | |
235 * = 'R': Row equilibration, i.e., A was premultiplied by diag(R). | |
236 * = 'C': Column equilibration, i.e., A was postmultiplied by diag(C). | |
237 * = 'B': Both row and column equilibration, i.e., A was replaced | |
238 * by diag(R)*A*diag(C). | |
239 * If fact = 'F', equed is an input argument, otherwise it is | |
240 * an output argument. | |
241 * | |
242 * R (input/output) double*, dimension (A->nrow) | |
243 * The row scale factors for A or transpose(A). | |
244 * If equed = 'R' or 'B', A (if A->Stype = NC) or transpose(A) (if | |
245 * A->Stype = NR) is multiplied on the left by diag(R). | |
246 * If equed = 'N' or 'C', R is not accessed. | |
247 * If fact = 'F', R is an input argument; otherwise, R is output. | |
248 * If fact = 'F' and equed = 'R' or 'B', each element of R must | |
249 * be positive. | |
250 * | |
251 * C (input/output) double*, dimension (A->ncol) | |
252 * The column scale factors for A or transpose(A). | |
253 * If equed = 'C' or 'B', A (if A->Stype = NC) or transpose(A) (if | |
254 * A->Stype = NR) is multiplied on the right by diag(C). | |
255 * If equed = 'N' or 'R', C is not accessed. | |
256 * If fact = 'F', C is an input argument; otherwise, C is output. | |
257 * If fact = 'F' and equed = 'C' or 'B', each element of C must | |
258 * be positive. | |
259 * | |
260 * L (output) SuperMatrix* | |
261 * The factor L from the factorization | |
262 * Pr*A*Pc=L*U (if A->Stype = NC) or | |
263 * Pr*transpose(A)*Pc=L*U (if A->Stype = NR). | |
264 * Uses compressed row subscripts storage for supernodes, i.e., | |
265 * L has types: Stype = SC, Dtype = _D, Mtype = TRLU. | |
266 * | |
267 * U (output) SuperMatrix* | |
268 * The factor U from the factorization | |
269 * Pr*A*Pc=L*U (if A->Stype = NC) or | |
270 * Pr*transpose(A)*Pc=L*U (if A->Stype = NR). | |
271 * Uses column-wise storage scheme, i.e., U has types: | |
272 * Stype = NC, Dtype = _D, Mtype = TRU. | |
273 * | |
274 * work (workspace/output) void*, size (lwork) (in bytes) | |
275 * User supplied workspace, should be large enough | |
276 * to hold data structures for factors L and U. | |
277 * On exit, if fact is not 'F', L and U point to this array. | |
278 * | |
279 * lwork (input) int | |
280 * Specifies the size of work array in bytes. | |
281 * = 0: allocate space internally by system malloc; | |
282 * > 0: use user-supplied work array of length lwork in bytes, | |
283 * returns error if space runs out. | |
284 * = -1: the routine guesses the amount of space needed without | |
285 * performing the factorization, and returns it in | |
286 * mem_usage->total_needed; no other side effects. | |
287 * | |
288 * See argument 'mem_usage' for memory usage statistics. | |
289 * | |
290 * B (input/output) SuperMatrix* | |
291 * B has types: Stype = DN, Dtype = _D, Mtype = GE. | |
292 * On entry, the right hand side matrix. | |
293 * On exit, | |
294 * if equed = 'N', B is not modified; otherwise | |
295 * if A->Stype = NC: | |
296 * if trans = 'N' and equed = 'R' or 'B', B is overwritten by | |
297 * diag(R)*B; | |
298 * if trans = 'T' or 'C' and equed = 'C' of 'B', B is | |
299 * overwritten by diag(C)*B; | |
300 * if A->Stype = NR: | |
301 * if trans = 'N' and equed = 'C' or 'B', B is overwritten by | |
302 * diag(C)*B; | |
303 * if trans = 'T' or 'C' and equed = 'R' of 'B', B is | |
304 * overwritten by diag(R)*B. | |
305 * | |
306 * X (output) SuperMatrix* | |
307 * X has types: Stype = DN, Dtype = _D, Mtype = GE. | |
308 * If info = 0 or info = A->ncol+1, X contains the solution matrix | |
309 * to the original system of equations. Note that A and B are modified | |
310 * on exit if equed is not 'N', and the solution to the equilibrated | |
311 * system is inv(diag(C))*X if trans = 'N' and equed = 'C' or 'B', | |
312 * or inv(diag(R))*X if trans = 'T' or 'C' and equed = 'R' or 'B'. | |
313 * | |
314 * recip_pivot_growth (output) double* | |
315 * The reciprocal pivot growth factor max_j( norm(A_j)/norm(U_j) ). | |
316 * The infinity norm is used. If recip_pivot_growth is much less | |
317 * than 1, the stability of the LU factorization could be poor. | |
318 * | |
319 * rcond (output) double* | |
320 * The estimate of the reciprocal condition number of the matrix A | |
321 * after equilibration (if done). If rcond is less than the machine | |
322 * precision (in particular, if rcond = 0), the matrix is singular | |
323 * to working precision. This condition is indicated by a return | |
324 * code of info > 0. | |
325 * | |
326 * FERR (output) double*, dimension (B->ncol) | |
327 * The estimated forward error bound for each solution vector | |
328 * X(j) (the j-th column of the solution matrix X). | |
329 * If XTRUE is the true solution corresponding to X(j), FERR(j) | |
330 * is an estimated upper bound for the magnitude of the largest | |
331 * element in (X(j) - XTRUE) divided by the magnitude of the | |
332 * largest element in X(j). The estimate is as reliable as | |
333 * the estimate for RCOND, and is almost always a slight | |
334 * overestimate of the true error. | |
335 * | |
336 * BERR (output) double*, dimension (B->ncol) | |
337 * The componentwise relative backward error of each solution | |
338 * vector X(j) (i.e., the smallest relative change in | |
339 * any element of A or B that makes X(j) an exact solution). | |
340 * | |
341 * mem_usage (output) mem_usage_t* | |
342 * Record the memory usage statistics, consisting of following fields: | |
343 * - for_lu (float) | |
344 * The amount of space used in bytes for L\U data structures. | |
345 * - total_needed (float) | |
346 * The amount of space needed in bytes to perform factorization. | |
347 * - expansions (int) | |
348 * The number of memory expansions during the LU factorization. | |
349 * | |
350 * info (output) int* | |
351 * = 0: successful exit | |
352 * < 0: if info = -i, the i-th argument had an illegal value | |
353 * > 0: if info = i, and i is | |
354 * <= A->ncol: U(i,i) is exactly zero. The factorization has | |
355 * been completed, but the factor U is exactly | |
356 * singular, so the solution and error bounds | |
357 * could not be computed. | |
358 * = A->ncol+1: U is nonsingular, but RCOND is less than machine | |
359 * precision, meaning that the matrix is singular to | |
360 * working precision. Nevertheless, the solution and | |
361 * error bounds are computed because there are a number | |
362 * of situations where the computed solution can be more | |
363 * accurate than the value of RCOND would suggest. | |
364 * > A->ncol+1: number of bytes allocated when memory allocation | |
365 * failure occurred, plus A->ncol. | |
366 * | |
367 */ | |
368 | |
369 DNformat *Bstore, *Xstore; | |
370 double *Bmat, *Xmat; | |
371 int ldb, ldx, nrhs; | |
372 SuperMatrix *AA; /* A in NC format used by the factorization routine.*/ | |
373 SuperMatrix AC; /* Matrix postmultiplied by Pc */ | |
374 int colequ, equil, nofact, notran, rowequ; | |
375 char trant[1], norm[1]; | |
376 int i, j, info1; | |
377 double amax, anorm, bignum, smlnum, colcnd, rowcnd, rcmax, rcmin; | |
378 int relax, panel_size; | |
379 double diag_pivot_thresh, drop_tol; | |
380 double t0; /* temporary time */ | |
381 double *utime; | |
382 extern SuperLUStat_t SuperLUStat; | |
383 | |
384 /* External functions */ | |
385 extern double dlangs(char *, SuperMatrix *); | |
386 extern double dlamch_(char *); | |
387 | |
388 Bstore = B->Store; | |
389 Xstore = X->Store; | |
390 Bmat = Bstore->nzval; | |
391 Xmat = Xstore->nzval; | |
392 ldb = Bstore->lda; | |
393 ldx = Xstore->lda; | |
394 nrhs = B->ncol; | |
395 | |
396 #if 0 | |
397 printf("dgssvx: fact=%c, trans=%c, refact=%c, equed=%c\n", | |
398 *fact, *trans, *refact, *equed); | |
399 #endif | |
400 | |
401 *info = 0; | |
402 nofact = lsame_(fact, "N"); | |
403 equil = lsame_(fact, "E"); | |
404 notran = lsame_(trans, "N"); | |
405 if (nofact || equil) { | |
406 *(unsigned char *)equed = 'N'; | |
407 rowequ = FALSE; | |
408 colequ = FALSE; | |
409 } else { | |
410 rowequ = lsame_(equed, "R") || lsame_(equed, "B"); | |
411 colequ = lsame_(equed, "C") || lsame_(equed, "B"); | |
412 smlnum = dlamch_("Safe minimum"); | |
413 bignum = 1. / smlnum; | |
414 } | |
415 | |
416 /* Test the input parameters */ | |
417 if (!nofact && !equil && !lsame_(fact, "F")) *info = -1; | |
418 else if (!notran && !lsame_(trans, "T") && !lsame_(trans, "C")) *info = -2; | |
419 else if ( !(lsame_(refact,"Y") || lsame_(refact, "N")) ) *info = -3; | |
420 else if ( A->nrow != A->ncol || A->nrow < 0 || | |
421 (A->Stype != NC && A->Stype != NR) || | |
422 A->Dtype != _D || A->Mtype != GE ) | |
423 *info = -4; | |
424 else if (lsame_(fact, "F") && !(rowequ || colequ || lsame_(equed, "N"))) | |
425 *info = -9; | |
426 else { | |
427 if (rowequ) { | |
428 rcmin = bignum; | |
429 rcmax = 0.; | |
430 for (j = 0; j < A->nrow; ++j) { | |
431 rcmin = MIN(rcmin, R[j]); | |
432 rcmax = MAX(rcmax, R[j]); | |
433 } | |
434 if (rcmin <= 0.) *info = -10; | |
435 else if ( A->nrow > 0) | |
436 rowcnd = MAX(rcmin,smlnum) / MIN(rcmax,bignum); | |
437 else rowcnd = 1.; | |
438 } | |
439 if (colequ && *info == 0) { | |
440 rcmin = bignum; | |
441 rcmax = 0.; | |
442 for (j = 0; j < A->nrow; ++j) { | |
443 rcmin = MIN(rcmin, C[j]); | |
444 rcmax = MAX(rcmax, C[j]); | |
445 } | |
446 if (rcmin <= 0.) *info = -11; | |
447 else if (A->nrow > 0) | |
448 colcnd = MAX(rcmin,smlnum) / MIN(rcmax,bignum); | |
449 else colcnd = 1.; | |
450 } | |
451 if (*info == 0) { | |
452 if ( lwork < -1 ) *info = -15; | |
453 else if ( B->ncol < 0 || Bstore->lda < MAX(0, A->nrow) || | |
454 B->Stype != DN || B->Dtype != _D || | |
455 B->Mtype != GE ) | |
456 *info = -16; | |
457 else if ( X->ncol < 0 || Xstore->lda < MAX(0, A->nrow) || | |
458 B->ncol != X->ncol || X->Stype != DN || | |
459 X->Dtype != _D || X->Mtype != GE ) | |
460 *info = -17; | |
461 } | |
462 } | |
463 if (*info != 0) { | |
464 i = -(*info); | |
465 xerbla_("dgssvx", &i); | |
466 return; | |
467 } | |
468 | |
469 /* Default values for factor_params */ | |
470 panel_size = sp_ienv(1); | |
471 relax = sp_ienv(2); | |
472 diag_pivot_thresh = 1.0; | |
473 drop_tol = 0.0; | |
474 if ( factor_params != NULL ) { | |
475 if ( factor_params->panel_size != -1 ) | |
476 panel_size = factor_params->panel_size; | |
477 if ( factor_params->relax != -1 ) relax = factor_params->relax; | |
478 if ( factor_params->diag_pivot_thresh != -1 ) | |
479 diag_pivot_thresh = factor_params->diag_pivot_thresh; | |
480 if ( factor_params->drop_tol != -1 ) | |
481 drop_tol = factor_params->drop_tol; | |
482 } | |
483 | |
484 StatInit(panel_size, relax); | |
485 utime = SuperLUStat.utime; | |
486 | |
487 /* Convert A to NC format when necessary. */ | |
488 if ( A->Stype == NR ) { | |
489 NRformat *Astore = A->Store; | |
490 AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) ); | |
491 dCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz, | |
492 Astore->nzval, Astore->colind, Astore->rowptr, | |
493 NC, A->Dtype, A->Mtype); | |
494 if ( notran ) { /* Reverse the transpose argument. */ | |
495 *trant = 'T'; | |
496 notran = 0; | |
497 } else { | |
498 *trant = 'N'; | |
499 notran = 1; | |
500 } | |
501 } else { /* A->Stype == NC */ | |
502 *trant = *trans; | |
503 AA = A; | |
504 } | |
505 | |
506 if ( equil ) { | |
507 t0 = SuperLU_timer_(); | |
508 /* Compute row and column scalings to equilibrate the matrix A. */ | |
509 dgsequ(AA, R, C, &rowcnd, &colcnd, &amax, &info1); | |
510 | |
511 if ( info1 == 0 ) { | |
512 /* Equilibrate matrix A. */ | |
513 dlaqgs(AA, R, C, rowcnd, colcnd, amax, equed); | |
514 rowequ = lsame_(equed, "R") || lsame_(equed, "B"); | |
515 colequ = lsame_(equed, "C") || lsame_(equed, "B"); | |
516 } | |
517 utime[EQUIL] = SuperLU_timer_() - t0; | |
518 } | |
519 | |
520 /* Scale the right hand side if equilibration was performed. */ | |
521 if ( notran ) { | |
522 if ( rowequ ) { | |
523 for (j = 0; j < nrhs; ++j) | |
524 for (i = 0; i < A->nrow; ++i) { | |
525 Bmat[i + j*ldb] *= R[i]; | |
526 } | |
527 } | |
528 } else if ( colequ ) { | |
529 for (j = 0; j < nrhs; ++j) | |
530 for (i = 0; i < A->nrow; ++i) { | |
531 Bmat[i + j*ldb] *= C[i]; | |
532 } | |
533 } | |
534 | |
535 if ( nofact || equil ) { | |
536 | |
537 t0 = SuperLU_timer_(); | |
538 sp_preorder(refact, AA, perm_c, etree, &AC); | |
539 utime[ETREE] = SuperLU_timer_() - t0; | |
540 | |
541 /* printf("Factor PA = LU ... relax %d\tw %d\tmaxsuper %d\trowblk %d\n", | |
542 relax, panel_size, sp_ienv(3), sp_ienv(4)); | |
543 fflush(stdout); */ | |
544 | |
545 /* Compute the LU factorization of A*Pc. */ | |
546 t0 = SuperLU_timer_(); | |
547 dgstrf(refact, &AC, diag_pivot_thresh, drop_tol, relax, panel_size, | |
548 etree, work, lwork, perm_r, perm_c, L, U, info); | |
549 utime[FACT] = SuperLU_timer_() - t0; | |
550 | |
551 if ( lwork == -1 ) { | |
552 mem_usage->total_needed = *info - A->ncol; | |
553 return; | |
554 } | |
555 } | |
556 | |
557 if ( *info > 0 ) { | |
558 if ( *info <= A->ncol ) { | |
559 /* Compute the reciprocal pivot growth factor of the leading | |
560 rank-deficient *info columns of A. */ | |
561 *recip_pivot_growth = dPivotGrowth(*info, AA, perm_c, L, U); | |
562 } | |
563 return; | |
564 } | |
565 | |
566 /* Compute the reciprocal pivot growth factor *recip_pivot_growth. */ | |
567 *recip_pivot_growth = dPivotGrowth(A->ncol, AA, perm_c, L, U); | |
568 | |
569 /* Estimate the reciprocal of the condition number of A. */ | |
570 t0 = SuperLU_timer_(); | |
571 if ( notran ) { | |
572 *(unsigned char *)norm = '1'; | |
573 } else { | |
574 *(unsigned char *)norm = 'I'; | |
575 } | |
576 anorm = dlangs(norm, AA); | |
577 dgscon(norm, L, U, anorm, rcond, info); | |
578 utime[RCOND] = SuperLU_timer_() - t0; | |
579 | |
580 /* Compute the solution matrix X. */ | |
581 for (j = 0; j < nrhs; j++) /* Save a copy of the right hand sides */ | |
582 for (i = 0; i < B->nrow; i++) | |
583 Xmat[i + j*ldx] = Bmat[i + j*ldb]; | |
584 | |
585 t0 = SuperLU_timer_(); | |
586 dgstrs (trant, L, U, perm_r, perm_c, X, info); | |
587 utime[SOLVE] = SuperLU_timer_() - t0; | |
588 | |
589 /* Use iterative refinement to improve the computed solution and compute | |
590 error bounds and backward error estimates for it. */ | |
591 t0 = SuperLU_timer_(); | |
592 dgsrfs(trant, AA, L, U, perm_r, perm_c, equed, R, C, B, | |
593 X, ferr, berr, info); | |
594 utime[REFINE] = SuperLU_timer_() - t0; | |
595 | |
596 /* Transform the solution matrix X to a solution of the original system. */ | |
597 if ( notran ) { | |
598 if ( colequ ) { | |
599 for (j = 0; j < nrhs; ++j) | |
600 for (i = 0; i < A->nrow; ++i) { | |
601 Xmat[i + j*ldx] *= C[i]; | |
602 } | |
603 } | |
604 } else if ( rowequ ) { | |
605 for (j = 0; j < nrhs; ++j) | |
606 for (i = 0; i < A->nrow; ++i) { | |
607 Xmat[i + j*ldx] *= R[i]; | |
608 } | |
609 } | |
610 | |
611 /* Set INFO = A->ncol+1 if the matrix is singular to working precision. */ | |
612 if ( *rcond < dlamch_("E") ) *info = A->ncol + 1; | |
613 | |
614 dQuerySpace(L, U, panel_size, mem_usage); | |
615 | |
616 if ( nofact || equil ) Destroy_CompCol_Permuted(&AC); | |
617 if ( A->Stype == NR ) { | |
618 Destroy_SuperMatrix_Store(AA); | |
619 SUPERLU_FREE(AA); | |
620 } | |
621 | |
622 PrintStat( &SuperLUStat ); | |
623 StatFree(); | |
624 } |