0
|
1 |
|
2 |
|
3 /* |
|
4 * -- SuperLU routine (version 2.0) -- |
|
5 * Univ. of California Berkeley, Xerox Palo Alto Research Center, |
|
6 * and Lawrence Berkeley National Lab. |
|
7 * November 15, 1997 |
|
8 * |
|
9 */ |
|
10 /* |
|
11 * File name: zgsrfs.c |
|
12 * History: Modified from lapack routine ZGERFS |
|
13 */ |
|
14 #include <math.h> |
|
15 #include "zsp_defs.h" |
|
16 #include "util.h" |
|
17 |
|
18 void |
|
19 zgsrfs(char *trans, SuperMatrix *A, SuperMatrix *L, SuperMatrix *U, |
|
20 int *perm_r, int *perm_c, char *equed, double *R, double *C, |
|
21 SuperMatrix *B, SuperMatrix *X, |
|
22 double *ferr, double *berr, int *info) |
|
23 { |
|
24 /* |
|
25 * Purpose |
|
26 * ======= |
|
27 * |
|
28 * ZGSRFS improves the computed solution to a system of linear |
|
29 * equations and provides error bounds and backward error estimates for |
|
30 * the solution. |
|
31 * |
|
32 * If equilibration was performed, the system becomes: |
|
33 * (diag(R)*A_original*diag(C)) * X = diag(R)*B_original. |
|
34 * |
|
35 * See supermatrix.h for the definition of 'SuperMatrix' structure. |
|
36 * |
|
37 * Arguments |
|
38 * ========= |
|
39 * |
|
40 * trans (input) char* |
|
41 * Specifies the form of the system of equations: |
|
42 * = 'N': A * X = B (No transpose) |
|
43 * = 'T': A**T * X = B (Transpose) |
|
44 * = 'C': A**H * X = B (Conjugate transpose = Transpose) |
|
45 * |
|
46 * A (input) SuperMatrix* |
|
47 * The original matrix A in the system, or the scaled A if |
|
48 * equilibration was done. The type of A can be: |
|
49 * Stype = NC, Dtype = _Z, Mtype = GE. |
|
50 * |
|
51 * L (input) SuperMatrix* |
|
52 * The factor L from the factorization Pr*A*Pc=L*U. Use |
|
53 * compressed row subscripts storage for supernodes, |
|
54 * i.e., L has types: Stype = SC, Dtype = _Z, Mtype = TRLU. |
|
55 * |
|
56 * U (input) SuperMatrix* |
|
57 * The factor U from the factorization Pr*A*Pc=L*U as computed by |
|
58 * zgstrf(). Use column-wise storage scheme, |
|
59 * i.e., U has types: Stype = NC, Dtype = _Z, Mtype = TRU. |
|
60 * |
|
61 * perm_r (input) int*, dimension (A->nrow) |
|
62 * Row permutation vector, which defines the permutation matrix Pr; |
|
63 * perm_r[i] = j means row i of A is in position j in Pr*A. |
|
64 * |
|
65 * perm_c (input) int*, dimension (A->ncol) |
|
66 * Column permutation vector, which defines the |
|
67 * permutation matrix Pc; perm_c[i] = j means column i of A is |
|
68 * in position j in A*Pc. |
|
69 * |
|
70 * equed (input) Specifies the form of equilibration that was done. |
|
71 * = 'N': No equilibration. |
|
72 * = 'R': Row equilibration, i.e., A was premultiplied by diag(R). |
|
73 * = 'C': Column equilibration, i.e., A was postmultiplied by |
|
74 * diag(C). |
|
75 * = 'B': Both row and column equilibration, i.e., A was replaced |
|
76 * by diag(R)*A*diag(C). |
|
77 * |
|
78 * R (input) double*, dimension (A->nrow) |
|
79 * The row scale factors for A. |
|
80 * If equed = 'R' or 'B', A is premultiplied by diag(R). |
|
81 * If equed = 'N' or 'C', R is not accessed. |
|
82 * |
|
83 * C (input) double*, dimension (A->ncol) |
|
84 * The column scale factors for A. |
|
85 * If equed = 'C' or 'B', A is postmultiplied by diag(C). |
|
86 * If equed = 'N' or 'R', C is not accessed. |
|
87 * |
|
88 * B (input) SuperMatrix* |
|
89 * B has types: Stype = DN, Dtype = _Z, Mtype = GE. |
|
90 * The right hand side matrix B. |
|
91 * if equed = 'R' or 'B', B is premultiplied by diag(R). |
|
92 * |
|
93 * X (input/output) SuperMatrix* |
|
94 * X has types: Stype = DN, Dtype = _Z, Mtype = GE. |
|
95 * On entry, the solution matrix X, as computed by zgstrs(). |
|
96 * On exit, the improved solution matrix X. |
|
97 * if *equed = 'C' or 'B', X should be premultiplied by diag(C) |
|
98 * in order to obtain the solution to the original system. |
|
99 * |
|
100 * FERR (output) double*, dimension (B->ncol) |
|
101 * The estimated forward error bound for each solution vector |
|
102 * X(j) (the j-th column of the solution matrix X). |
|
103 * If XTRUE is the true solution corresponding to X(j), FERR(j) |
|
104 * is an estimated upper bound for the magnitude of the largest |
|
105 * element in (X(j) - XTRUE) divided by the magnitude of the |
|
106 * largest element in X(j). The estimate is as reliable as |
|
107 * the estimate for RCOND, and is almost always a slight |
|
108 * overestimate of the true error. |
|
109 * |
|
110 * BERR (output) double*, dimension (B->ncol) |
|
111 * The componentwise relative backward error of each solution |
|
112 * vector X(j) (i.e., the smallest relative change in |
|
113 * any element of A or B that makes X(j) an exact solution). |
|
114 * |
|
115 * info (output) int* |
|
116 * = 0: successful exit |
|
117 * < 0: if INFO = -i, the i-th argument had an illegal value |
|
118 * |
|
119 * Internal Parameters |
|
120 * =================== |
|
121 * |
|
122 * ITMAX is the maximum number of steps of iterative refinement. |
|
123 * |
|
124 */ |
|
125 |
|
126 #define ITMAX 5 |
|
127 |
|
128 /* Table of constant values */ |
|
129 int ione = 1; |
|
130 doublecomplex ndone = {-1., 0.}; |
|
131 doublecomplex done = {1., 0.}; |
|
132 |
|
133 /* Local variables */ |
|
134 NCformat *Astore; |
|
135 doublecomplex *Aval; |
|
136 SuperMatrix Bjcol; |
|
137 DNformat *Bstore, *Xstore, *Bjcol_store; |
|
138 doublecomplex *Bmat, *Xmat, *Bptr, *Xptr; |
|
139 int kase; |
|
140 double safe1, safe2; |
|
141 int i, j, k, irow, nz, count, notran, rowequ, colequ; |
|
142 int ldb, ldx, nrhs; |
|
143 double s, xk, lstres, eps, safmin; |
|
144 char transt[1]; |
|
145 doublecomplex *work; |
|
146 double *rwork; |
|
147 int *iwork; |
|
148 extern double dlamch_(char *); |
|
149 extern int zlacon_(int *, doublecomplex *, doublecomplex *, double *, int *); |
|
150 #ifdef _CRAY |
|
151 extern int CCOPY(int *, doublecomplex *, int *, doublecomplex *, int *); |
|
152 extern int CSAXPY(int *, doublecomplex *, doublecomplex *, int *, doublecomplex *, int *); |
|
153 #else |
|
154 extern int zcopy_(int *, doublecomplex *, int *, doublecomplex *, int *); |
|
155 extern int zaxpy_(int *, doublecomplex *, doublecomplex *, int *, doublecomplex *, int *); |
|
156 #endif |
|
157 |
|
158 Astore = A->Store; |
|
159 Aval = Astore->nzval; |
|
160 Bstore = B->Store; |
|
161 Xstore = X->Store; |
|
162 Bmat = Bstore->nzval; |
|
163 Xmat = Xstore->nzval; |
|
164 ldb = Bstore->lda; |
|
165 ldx = Xstore->lda; |
|
166 nrhs = B->ncol; |
|
167 |
|
168 /* Test the input parameters */ |
|
169 *info = 0; |
|
170 notran = lsame_(trans, "N"); |
|
171 if ( !notran && !lsame_(trans, "T") && !lsame_(trans, "C")) *info = -1; |
|
172 else if ( A->nrow != A->ncol || A->nrow < 0 || |
|
173 A->Stype != NC || A->Dtype != _Z || A->Mtype != GE ) |
|
174 *info = -2; |
|
175 else if ( L->nrow != L->ncol || L->nrow < 0 || |
|
176 L->Stype != SC || L->Dtype != _Z || L->Mtype != TRLU ) |
|
177 *info = -3; |
|
178 else if ( U->nrow != U->ncol || U->nrow < 0 || |
|
179 U->Stype != NC || U->Dtype != _Z || U->Mtype != TRU ) |
|
180 *info = -4; |
|
181 else if ( ldb < MAX(0, A->nrow) || |
|
182 B->Stype != DN || B->Dtype != _Z || B->Mtype != GE ) |
|
183 *info = -10; |
|
184 else if ( ldx < MAX(0, A->nrow) || |
|
185 X->Stype != DN || X->Dtype != _Z || X->Mtype != GE ) |
|
186 *info = -11; |
|
187 if (*info != 0) { |
|
188 i = -(*info); |
|
189 xerbla_("zgsrfs", &i); |
|
190 return; |
|
191 } |
|
192 |
|
193 /* Quick return if possible */ |
|
194 if ( A->nrow == 0 || nrhs == 0) { |
|
195 for (j = 0; j < nrhs; ++j) { |
|
196 ferr[j] = 0.; |
|
197 berr[j] = 0.; |
|
198 } |
|
199 return; |
|
200 } |
|
201 |
|
202 rowequ = lsame_(equed, "R") || lsame_(equed, "B"); |
|
203 colequ = lsame_(equed, "C") || lsame_(equed, "B"); |
|
204 |
|
205 /* Allocate working space */ |
|
206 work = doublecomplexMalloc(2*A->nrow); |
|
207 rwork = (double *) SUPERLU_MALLOC( A->nrow * sizeof(double) ); |
|
208 iwork = intMalloc(A->nrow); |
|
209 if ( !work || !rwork || !iwork ) |
|
210 ABORT("Malloc fails for work/rwork/iwork."); |
|
211 |
|
212 if ( notran ) { |
|
213 *(unsigned char *)transt = 'T'; |
|
214 } else { |
|
215 *(unsigned char *)transt = 'N'; |
|
216 } |
|
217 |
|
218 /* NZ = maximum number of nonzero elements in each row of A, plus 1 */ |
|
219 nz = A->ncol + 1; |
|
220 eps = dlamch_("Epsilon"); |
|
221 safmin = dlamch_("Safe minimum"); |
|
222 safe1 = nz * safmin; |
|
223 safe2 = safe1 / eps; |
|
224 |
|
225 /* Compute the number of nonzeros in each row (or column) of A */ |
|
226 for (i = 0; i < A->nrow; ++i) iwork[i] = 0; |
|
227 if ( notran ) { |
|
228 for (k = 0; k < A->ncol; ++k) |
|
229 for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) |
|
230 ++iwork[Astore->rowind[i]]; |
|
231 } else { |
|
232 for (k = 0; k < A->ncol; ++k) |
|
233 iwork[k] = Astore->colptr[k+1] - Astore->colptr[k]; |
|
234 } |
|
235 |
|
236 /* Copy one column of RHS B into Bjcol. */ |
|
237 Bjcol.Stype = B->Stype; |
|
238 Bjcol.Dtype = B->Dtype; |
|
239 Bjcol.Mtype = B->Mtype; |
|
240 Bjcol.nrow = B->nrow; |
|
241 Bjcol.ncol = 1; |
|
242 Bjcol.Store = (void *) SUPERLU_MALLOC( sizeof(DNformat) ); |
|
243 if ( !Bjcol.Store ) ABORT("SUPERLU_MALLOC fails for Bjcol.Store"); |
|
244 Bjcol_store = Bjcol.Store; |
|
245 Bjcol_store->lda = ldb; |
|
246 Bjcol_store->nzval = work; /* address aliasing */ |
|
247 |
|
248 /* Do for each right hand side ... */ |
|
249 for (j = 0; j < nrhs; ++j) { |
|
250 count = 0; |
|
251 lstres = 3.; |
|
252 Bptr = &Bmat[j*ldb]; |
|
253 Xptr = &Xmat[j*ldx]; |
|
254 |
|
255 while (1) { /* Loop until stopping criterion is satisfied. */ |
|
256 |
|
257 /* Compute residual R = B - op(A) * X, |
|
258 where op(A) = A, A**T, or A**H, depending on TRANS. */ |
|
259 |
|
260 #ifdef _CRAY |
|
261 CCOPY(&A->nrow, Bptr, &ione, work, &ione); |
|
262 #else |
|
263 zcopy_(&A->nrow, Bptr, &ione, work, &ione); |
|
264 #endif |
|
265 sp_zgemv(trans, ndone, A, Xptr, ione, done, work, ione); |
|
266 |
|
267 /* Compute componentwise relative backward error from formula |
|
268 max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) |
|
269 where abs(Z) is the componentwise absolute value of the matrix |
|
270 or vector Z. If the i-th component of the denominator is less |
|
271 than SAFE2, then SAFE1 is added to the i-th component of the |
|
272 numerator and denominator before dividing. */ |
|
273 |
|
274 for (i = 0; i < A->nrow; ++i) rwork[i] = z_abs1( &Bptr[i] ); |
|
275 |
|
276 /* Compute abs(op(A))*abs(X) + abs(B). */ |
|
277 if (notran) { |
|
278 for (k = 0; k < A->ncol; ++k) { |
|
279 xk = z_abs1( &Xptr[k] ); |
|
280 for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) |
|
281 rwork[Astore->rowind[i]] += z_abs1(&Aval[i]) * xk; |
|
282 } |
|
283 } else { |
|
284 for (k = 0; k < A->ncol; ++k) { |
|
285 s = 0.; |
|
286 for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) { |
|
287 irow = Astore->rowind[i]; |
|
288 s += z_abs1(&Aval[i]) * z_abs1(&Xptr[irow]); |
|
289 } |
|
290 rwork[k] += s; |
|
291 } |
|
292 } |
|
293 s = 0.; |
|
294 for (i = 0; i < A->nrow; ++i) { |
|
295 if (rwork[i] > safe2) |
|
296 s = MAX( s, z_abs1(&work[i]) / rwork[i] ); |
|
297 else |
|
298 s = MAX( s, (z_abs1(&work[i]) + safe1) / |
|
299 (rwork[i] + safe1) ); |
|
300 } |
|
301 berr[j] = s; |
|
302 |
|
303 /* Test stopping criterion. Continue iterating if |
|
304 1) The residual BERR(J) is larger than machine epsilon, and |
|
305 2) BERR(J) decreased by at least a factor of 2 during the |
|
306 last iteration, and |
|
307 3) At most ITMAX iterations tried. */ |
|
308 |
|
309 if (berr[j] > eps && berr[j] * 2. <= lstres && count < ITMAX) { |
|
310 /* Update solution and try again. */ |
|
311 zgstrs (trans, L, U, perm_r, perm_c, &Bjcol, info); |
|
312 |
|
313 #ifdef _CRAY |
|
314 CAXPY(&A->nrow, &done, work, &ione, |
|
315 &Xmat[j*ldx], &ione); |
|
316 #else |
|
317 zaxpy_(&A->nrow, &done, work, &ione, |
|
318 &Xmat[j*ldx], &ione); |
|
319 #endif |
|
320 lstres = berr[j]; |
|
321 ++count; |
|
322 } else { |
|
323 break; |
|
324 } |
|
325 |
|
326 } /* end while */ |
|
327 |
|
328 /* Bound error from formula: |
|
329 norm(X - XTRUE) / norm(X) .le. FERR = norm( abs(inv(op(A)))* |
|
330 ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) |
|
331 where |
|
332 norm(Z) is the magnitude of the largest component of Z |
|
333 inv(op(A)) is the inverse of op(A) |
|
334 abs(Z) is the componentwise absolute value of the matrix or |
|
335 vector Z |
|
336 NZ is the maximum number of nonzeros in any row of A, plus 1 |
|
337 EPS is machine epsilon |
|
338 |
|
339 The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) |
|
340 is incremented by SAFE1 if the i-th component of |
|
341 abs(op(A))*abs(X) + abs(B) is less than SAFE2. |
|
342 |
|
343 Use ZLACON to estimate the infinity-norm of the matrix |
|
344 inv(op(A)) * diag(W), |
|
345 where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */ |
|
346 |
|
347 for (i = 0; i < A->nrow; ++i) rwork[i] = z_abs1( &Bptr[i] ); |
|
348 |
|
349 /* Compute abs(op(A))*abs(X) + abs(B). */ |
|
350 if ( notran ) { |
|
351 for (k = 0; k < A->ncol; ++k) { |
|
352 xk = z_abs1( &Xptr[k] ); |
|
353 for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) |
|
354 rwork[Astore->rowind[i]] += z_abs1(&Aval[i]) * xk; |
|
355 } |
|
356 } else { |
|
357 for (k = 0; k < A->ncol; ++k) { |
|
358 s = 0.; |
|
359 for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) { |
|
360 irow = Astore->rowind[i]; |
|
361 xk = z_abs1( &Xptr[irow] ); |
|
362 s += z_abs1(&Aval[i]) * xk; |
|
363 } |
|
364 rwork[k] += s; |
|
365 } |
|
366 } |
|
367 |
|
368 for (i = 0; i < A->nrow; ++i) |
|
369 if (rwork[i] > safe2) |
|
370 rwork[i] = z_abs(&work[i]) + (iwork[i]+1)*eps*rwork[i]; |
|
371 else |
|
372 rwork[i] = z_abs(&work[i])+(iwork[i]+1)*eps*rwork[i]+safe1; |
|
373 kase = 0; |
|
374 |
|
375 do { |
|
376 zlacon_(&A->nrow, &work[A->nrow], work, |
|
377 &ferr[j], &kase); |
|
378 if (kase == 0) break; |
|
379 |
|
380 if (kase == 1) { |
|
381 /* Multiply by diag(W)*inv(op(A)**T)*(diag(C) or diag(R)). */ |
|
382 if ( notran && colequ ) |
|
383 for (i = 0; i < A->ncol; ++i) { |
|
384 zd_mult(&work[i], &work[i], C[i]); |
|
385 } |
|
386 else if ( !notran && rowequ ) |
|
387 for (i = 0; i < A->nrow; ++i) { |
|
388 zd_mult(&work[i], &work[i], R[i]); |
|
389 } |
|
390 |
|
391 zgstrs (transt, L, U, perm_r, perm_c, &Bjcol, info); |
|
392 |
|
393 for (i = 0; i < A->nrow; ++i) { |
|
394 zd_mult(&work[i], &work[i], rwork[i]); |
|
395 } |
|
396 } else { |
|
397 /* Multiply by (diag(C) or diag(R))*inv(op(A))*diag(W). */ |
|
398 for (i = 0; i < A->nrow; ++i) { |
|
399 zd_mult(&work[i], &work[i], rwork[i]); |
|
400 } |
|
401 |
|
402 zgstrs (trans, L, U, perm_r, perm_c, &Bjcol, info); |
|
403 |
|
404 if ( notran && colequ ) |
|
405 for (i = 0; i < A->ncol; ++i) { |
|
406 zd_mult(&work[i], &work[i], C[i]); |
|
407 } |
|
408 else if ( !notran && rowequ ) |
|
409 for (i = 0; i < A->ncol; ++i) { |
|
410 zd_mult(&work[i], &work[i], R[i]); |
|
411 } |
|
412 } |
|
413 |
|
414 } while ( kase != 0 ); |
|
415 |
|
416 /* Normalize error. */ |
|
417 lstres = 0.; |
|
418 if ( notran && colequ ) { |
|
419 for (i = 0; i < A->nrow; ++i) |
|
420 lstres = MAX( lstres, C[i] * z_abs1( &Xptr[i]) ); |
|
421 } else if ( !notran && rowequ ) { |
|
422 for (i = 0; i < A->nrow; ++i) |
|
423 lstres = MAX( lstres, R[i] * z_abs1( &Xptr[i]) ); |
|
424 } else { |
|
425 for (i = 0; i < A->nrow; ++i) |
|
426 lstres = MAX( lstres, z_abs1( &Xptr[i]) ); |
|
427 } |
|
428 if ( lstres != 0. ) |
|
429 ferr[j] /= lstres; |
|
430 |
|
431 } /* for each RHS j ... */ |
|
432 |
|
433 SUPERLU_FREE(work); |
|
434 SUPERLU_FREE(rwork); |
|
435 SUPERLU_FREE(iwork); |
|
436 SUPERLU_FREE(Bjcol.Store); |
|
437 |
|
438 return; |
|
439 |
|
440 } /* zgsrfs */ |