0
|
1 |
|
2 |
|
3 /* |
|
4 * -- SuperLU routine (version 2.0) -- |
|
5 * Univ. of California Berkeley, Xerox Palo Alto Research Center, |
|
6 * and Lawrence Berkeley National Lab. |
|
7 * November 15, 1997 |
|
8 * |
|
9 */ |
|
10 /* |
|
11 Copyright (c) 1994 by Xerox Corporation. All rights reserved. |
|
12 |
|
13 THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY |
|
14 EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. |
|
15 |
|
16 Permission is hereby granted to use or copy this program for any |
|
17 purpose, provided the above notices are retained on all copies. |
|
18 Permission to modify the code and to distribute modified code is |
|
19 granted, provided the above notices are retained, and a notice that |
|
20 the code was modified is included with the above copyright notice. |
|
21 */ |
|
22 |
|
23 #include <math.h> |
|
24 #include "dsp_defs.h" |
|
25 #include "util.h" |
|
26 |
|
27 void |
|
28 dCreate_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz, |
|
29 double *nzval, int *rowind, int *colptr, |
|
30 Stype_t stype, Dtype_t dtype, Mtype_t mtype) |
|
31 { |
|
32 NCformat *Astore; |
|
33 |
|
34 A->Stype = stype; |
|
35 A->Dtype = dtype; |
|
36 A->Mtype = mtype; |
|
37 A->nrow = m; |
|
38 A->ncol = n; |
|
39 A->Store = (void *) SUPERLU_MALLOC( sizeof(NCformat) ); |
|
40 if ( !(A->Store) ) ABORT("SUPERLU_MALLOC fails for A->Store"); |
|
41 Astore = A->Store; |
|
42 Astore->nnz = nnz; |
|
43 Astore->nzval = nzval; |
|
44 Astore->rowind = rowind; |
|
45 Astore->colptr = colptr; |
|
46 } |
|
47 |
|
48 void |
|
49 dCreate_CompRow_Matrix(SuperMatrix *A, int m, int n, int nnz, |
|
50 double *nzval, int *colind, int *rowptr, |
|
51 Stype_t stype, Dtype_t dtype, Mtype_t mtype) |
|
52 { |
|
53 NRformat *Astore; |
|
54 |
|
55 A->Stype = stype; |
|
56 A->Dtype = dtype; |
|
57 A->Mtype = mtype; |
|
58 A->nrow = m; |
|
59 A->ncol = n; |
|
60 A->Store = (void *) SUPERLU_MALLOC( sizeof(NRformat) ); |
|
61 if ( !(A->Store) ) ABORT("SUPERLU_MALLOC fails for A->Store"); |
|
62 Astore = A->Store; |
|
63 Astore->nnz = nnz; |
|
64 Astore->nzval = nzval; |
|
65 Astore->colind = colind; |
|
66 Astore->rowptr = rowptr; |
|
67 } |
|
68 |
|
69 /* Copy matrix A into matrix B. */ |
|
70 void |
|
71 dCopy_CompCol_Matrix(SuperMatrix *A, SuperMatrix *B) |
|
72 { |
|
73 NCformat *Astore, *Bstore; |
|
74 int ncol, nnz, i; |
|
75 |
|
76 B->Stype = A->Stype; |
|
77 B->Dtype = A->Dtype; |
|
78 B->Mtype = A->Mtype; |
|
79 B->nrow = A->nrow;; |
|
80 B->ncol = ncol = A->ncol; |
|
81 Astore = (NCformat *) A->Store; |
|
82 Bstore = (NCformat *) B->Store; |
|
83 Bstore->nnz = nnz = Astore->nnz; |
|
84 for (i = 0; i < nnz; ++i) |
|
85 ((double *)Bstore->nzval)[i] = ((double *)Astore->nzval)[i]; |
|
86 for (i = 0; i < nnz; ++i) Bstore->rowind[i] = Astore->rowind[i]; |
|
87 for (i = 0; i <= ncol; ++i) Bstore->colptr[i] = Astore->colptr[i]; |
|
88 } |
|
89 |
|
90 |
|
91 void |
|
92 dCreate_Dense_Matrix(SuperMatrix *X, int m, int n, double *x, int ldx, |
|
93 Stype_t stype, Dtype_t dtype, Mtype_t mtype) |
|
94 { |
|
95 DNformat *Xstore; |
|
96 |
|
97 X->Stype = stype; |
|
98 X->Dtype = dtype; |
|
99 X->Mtype = mtype; |
|
100 X->nrow = m; |
|
101 X->ncol = n; |
|
102 X->Store = (void *) SUPERLU_MALLOC( sizeof(DNformat) ); |
|
103 if ( !(X->Store) ) ABORT("SUPERLU_MALLOC fails for X->Store"); |
|
104 Xstore = (DNformat *) X->Store; |
|
105 Xstore->lda = ldx; |
|
106 Xstore->nzval = (double *) x; |
|
107 } |
|
108 |
|
109 void |
|
110 dCopy_Dense_Matrix(int M, int N, double *X, int ldx, |
|
111 double *Y, int ldy) |
|
112 { |
|
113 /* |
|
114 * |
|
115 * Purpose |
|
116 * ======= |
|
117 * |
|
118 * Copies a two-dimensional matrix X to another matrix Y. |
|
119 */ |
|
120 int i, j; |
|
121 |
|
122 for (j = 0; j < N; ++j) |
|
123 for (i = 0; i < M; ++i) |
|
124 Y[i + j*ldy] = X[i + j*ldx]; |
|
125 } |
|
126 |
|
127 void |
|
128 dCreate_SuperNode_Matrix(SuperMatrix *L, int m, int n, int nnz, |
|
129 double *nzval, int *nzval_colptr, int *rowind, |
|
130 int *rowind_colptr, int *col_to_sup, int *sup_to_col, |
|
131 Stype_t stype, Dtype_t dtype, Mtype_t mtype) |
|
132 { |
|
133 SCformat *Lstore; |
|
134 |
|
135 L->Stype = stype; |
|
136 L->Dtype = dtype; |
|
137 L->Mtype = mtype; |
|
138 L->nrow = m; |
|
139 L->ncol = n; |
|
140 L->Store = (void *) SUPERLU_MALLOC( sizeof(SCformat) ); |
|
141 if ( !(L->Store) ) ABORT("SUPERLU_MALLOC fails for L->Store"); |
|
142 Lstore = L->Store; |
|
143 Lstore->nnz = nnz; |
|
144 Lstore->nsuper = col_to_sup[n]; |
|
145 Lstore->nzval = nzval; |
|
146 Lstore->nzval_colptr = nzval_colptr; |
|
147 Lstore->rowind = rowind; |
|
148 Lstore->rowind_colptr = rowind_colptr; |
|
149 Lstore->col_to_sup = col_to_sup; |
|
150 Lstore->sup_to_col = sup_to_col; |
|
151 |
|
152 } |
|
153 |
|
154 |
|
155 /* |
|
156 * Convert a row compressed storage into a column compressed storage. |
|
157 */ |
|
158 void |
|
159 dCompRow_to_CompCol(int m, int n, int nnz, |
|
160 double *a, int *colind, int *rowptr, |
|
161 double **at, int **rowind, int **colptr) |
|
162 { |
|
163 register int i, j, col, relpos; |
|
164 int *marker; |
|
165 |
|
166 /* Allocate storage for another copy of the matrix. */ |
|
167 *at = (double *) doubleMalloc(nnz); |
|
168 *rowind = (int *) intMalloc(nnz); |
|
169 *colptr = (int *) intMalloc(n+1); |
|
170 marker = (int *) intCalloc(n); |
|
171 |
|
172 /* Get counts of each column of A, and set up column pointers */ |
|
173 for (i = 0; i < m; ++i) |
|
174 for (j = rowptr[i]; j < rowptr[i+1]; ++j) ++marker[colind[j]]; |
|
175 (*colptr)[0] = 0; |
|
176 for (j = 0; j < n; ++j) { |
|
177 (*colptr)[j+1] = (*colptr)[j] + marker[j]; |
|
178 marker[j] = (*colptr)[j]; |
|
179 } |
|
180 |
|
181 /* Transfer the matrix into the compressed column storage. */ |
|
182 for (i = 0; i < m; ++i) { |
|
183 for (j = rowptr[i]; j < rowptr[i+1]; ++j) { |
|
184 col = colind[j]; |
|
185 relpos = marker[col]; |
|
186 (*rowind)[relpos] = i; |
|
187 (*at)[relpos] = a[j]; |
|
188 ++marker[col]; |
|
189 } |
|
190 } |
|
191 |
|
192 SUPERLU_FREE(marker); |
|
193 } |
|
194 |
|
195 |
|
196 void |
|
197 dPrint_CompCol_Matrix(char *what, SuperMatrix *A) |
|
198 { |
|
199 NCformat *Astore; |
|
200 register int i,n; |
|
201 double *dp; |
|
202 |
|
203 printf("\nCompCol matrix %s:\n", what); |
|
204 printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype); |
|
205 n = A->ncol; |
|
206 Astore = (NCformat *) A->Store; |
|
207 dp = (double *) Astore->nzval; |
|
208 printf("nrow %d, ncol %d, nnz %d\n", A->nrow,A->ncol,Astore->nnz); |
|
209 printf("nzval: "); |
|
210 for (i = 0; i < Astore->colptr[n]; ++i) printf("%f ", dp[i]); |
|
211 printf("\nrowind: "); |
|
212 for (i = 0; i < Astore->colptr[n]; ++i) printf("%d ", Astore->rowind[i]); |
|
213 printf("\ncolptr: "); |
|
214 for (i = 0; i <= n; ++i) printf("%d ", Astore->colptr[i]); |
|
215 printf("\n"); |
|
216 fflush(stdout); |
|
217 } |
|
218 |
|
219 void |
|
220 dPrint_SuperNode_Matrix(char *what, SuperMatrix *A) |
|
221 { |
|
222 SCformat *Astore; |
|
223 register int i,n; |
|
224 double *dp; |
|
225 |
|
226 printf("\nSuperNode matrix %s:\n", what); |
|
227 printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype); |
|
228 n = A->ncol; |
|
229 Astore = (SCformat *) A->Store; |
|
230 dp = (double *) Astore->nzval; |
|
231 printf("nrow %d, ncol %d, nnz %d, nsuper %d\n", |
|
232 A->nrow,A->ncol,Astore->nnz,Astore->nsuper); |
|
233 printf("nzval: "); |
|
234 for (i = 0; i < Astore->nzval_colptr[n]; ++i) printf("%f ", dp[i]); |
|
235 printf("\nnzval_colptr: "); |
|
236 for (i = 0; i <= n; ++i) printf("%d ", Astore->nzval_colptr[i]); |
|
237 printf("\nrowind: "); |
|
238 for (i = 0; i < Astore->rowind_colptr[n]; ++i) |
|
239 printf("%d ", Astore->rowind[i]); |
|
240 printf("\nrowind_colptr: "); |
|
241 for (i = 0; i <= n; ++i) printf("%d ", Astore->rowind_colptr[i]); |
|
242 printf("\ncol_to_sup: "); |
|
243 for (i = 0; i < n; ++i) printf("%d ", Astore->col_to_sup[i]); |
|
244 printf("\nsup_to_col: "); |
|
245 for (i = 0; i <= Astore->nsuper+1; ++i) |
|
246 printf("%d ", Astore->sup_to_col[i]); |
|
247 printf("\n"); |
|
248 fflush(stdout); |
|
249 } |
|
250 |
|
251 void |
|
252 dPrint_Dense_Matrix(char *what, SuperMatrix *A) |
|
253 { |
|
254 DNformat *Astore; |
|
255 register int i; |
|
256 double *dp; |
|
257 |
|
258 printf("\nDense matrix %s:\n", what); |
|
259 printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype); |
|
260 Astore = (DNformat *) A->Store; |
|
261 dp = (double *) Astore->nzval; |
|
262 printf("nrow %d, ncol %d, lda %d\n", A->nrow,A->ncol,Astore->lda); |
|
263 printf("\nnzval: "); |
|
264 for (i = 0; i < A->nrow; ++i) printf("%f ", dp[i]); |
|
265 printf("\n"); |
|
266 fflush(stdout); |
|
267 } |
|
268 |
|
269 /* |
|
270 * Diagnostic print of column "jcol" in the U/L factor. |
|
271 */ |
|
272 void |
|
273 dprint_lu_col(char *msg, int jcol, int pivrow, int *xprune, GlobalLU_t *Glu) |
|
274 { |
|
275 int i, k, fsupc; |
|
276 int *xsup, *supno; |
|
277 int *xlsub, *lsub; |
|
278 double *lusup; |
|
279 int *xlusup; |
|
280 double *ucol; |
|
281 int *usub, *xusub; |
|
282 |
|
283 xsup = Glu->xsup; |
|
284 supno = Glu->supno; |
|
285 lsub = Glu->lsub; |
|
286 xlsub = Glu->xlsub; |
|
287 lusup = Glu->lusup; |
|
288 xlusup = Glu->xlusup; |
|
289 ucol = Glu->ucol; |
|
290 usub = Glu->usub; |
|
291 xusub = Glu->xusub; |
|
292 |
|
293 printf("%s", msg); |
|
294 printf("col %d: pivrow %d, supno %d, xprune %d\n", |
|
295 jcol, pivrow, supno[jcol], xprune[jcol]); |
|
296 |
|
297 printf("\tU-col:\n"); |
|
298 for (i = xusub[jcol]; i < xusub[jcol+1]; i++) |
|
299 printf("\t%d%10.4f\n", usub[i], ucol[i]); |
|
300 printf("\tL-col in rectangular snode:\n"); |
|
301 fsupc = xsup[supno[jcol]]; /* first col of the snode */ |
|
302 i = xlsub[fsupc]; |
|
303 k = xlusup[jcol]; |
|
304 while ( i < xlsub[fsupc+1] && k < xlusup[jcol+1] ) { |
|
305 printf("\t%d\t%10.4f\n", lsub[i], lusup[k]); |
|
306 i++; k++; |
|
307 } |
|
308 fflush(stdout); |
|
309 } |
|
310 |
|
311 |
|
312 /* |
|
313 * Check whether tempv[] == 0. This should be true before and after |
|
314 * calling any numeric routines, i.e., "panel_bmod" and "column_bmod". |
|
315 */ |
|
316 void dcheck_tempv(int n, double *tempv) |
|
317 { |
|
318 int i; |
|
319 |
|
320 for (i = 0; i < n; i++) { |
|
321 if (tempv[i] != 0.0) |
|
322 { |
|
323 fprintf(stderr,"tempv[%d] = %f\n", i,tempv[i]); |
|
324 ABORT("dcheck_tempv"); |
|
325 } |
|
326 } |
|
327 } |
|
328 |
|
329 |
|
330 void |
|
331 dGenXtrue(int n, int nrhs, double *x, int ldx) |
|
332 { |
|
333 int i, j; |
|
334 for (j = 0; j < nrhs; ++j) |
|
335 for (i = 0; i < n; ++i) { |
|
336 x[i + j*ldx] = 1.0;/* + (double)(i+1.)/n;*/ |
|
337 } |
|
338 } |
|
339 |
|
340 /* |
|
341 * Let rhs[i] = sum of i-th row of A, so the solution vector is all 1's |
|
342 */ |
|
343 void |
|
344 dFillRHS(char *trans, int nrhs, double *x, int ldx, |
|
345 SuperMatrix *A, SuperMatrix *B) |
|
346 { |
|
347 NCformat *Astore; |
|
348 double *Aval; |
|
349 DNformat *Bstore; |
|
350 double *rhs; |
|
351 double one = 1.0; |
|
352 double zero = 0.0; |
|
353 int ldc; |
|
354 |
|
355 Astore = A->Store; |
|
356 Aval = (double *) Astore->nzval; |
|
357 Bstore = B->Store; |
|
358 rhs = Bstore->nzval; |
|
359 ldc = Bstore->lda; |
|
360 |
|
361 sp_dgemm(trans, "N", A->nrow, nrhs, A->ncol, one, A, |
|
362 x, ldx, zero, rhs, ldc); |
|
363 |
|
364 } |
|
365 |
|
366 /* |
|
367 * Fills a double precision array with a given value. |
|
368 */ |
|
369 void |
|
370 dfill(double *a, int alen, double dval) |
|
371 { |
|
372 register int i; |
|
373 for (i = 0; i < alen; i++) a[i] = dval; |
|
374 } |
|
375 |
|
376 |
|
377 |
|
378 /* |
|
379 * Check the inf-norm of the error vector |
|
380 */ |
|
381 void dinf_norm_error(int nrhs, SuperMatrix *X, double *xtrue) |
|
382 { |
|
383 DNformat *Xstore; |
|
384 double err, xnorm; |
|
385 double *Xmat, *soln_work; |
|
386 int i, j; |
|
387 |
|
388 Xstore = X->Store; |
|
389 Xmat = Xstore->nzval; |
|
390 |
|
391 for (j = 0; j < nrhs; j++) { |
|
392 soln_work = &Xmat[j*Xstore->lda]; |
|
393 err = xnorm = 0.0; |
|
394 for (i = 0; i < X->nrow; i++) { |
|
395 err = MAX(err, fabs(soln_work[i] - xtrue[i])); |
|
396 xnorm = MAX(xnorm, fabs(soln_work[i])); |
|
397 } |
|
398 err = err / xnorm; |
|
399 printf("||X - Xtrue||/||X|| = %e\n", err); |
|
400 } |
|
401 } |
|
402 |
|
403 |
|
404 |
|
405 /* Print performance of the code. */ |
|
406 void |
|
407 dPrintPerf(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage, |
|
408 double rpg, double rcond, double *ferr, |
|
409 double *berr, char *equed) |
|
410 { |
|
411 SCformat *Lstore; |
|
412 NCformat *Ustore; |
|
413 extern SuperLUStat_t SuperLUStat; |
|
414 double *utime; |
|
415 flops_t *ops; |
|
416 |
|
417 utime = SuperLUStat.utime; |
|
418 ops = SuperLUStat.ops; |
|
419 |
|
420 if ( utime[FACT] != 0. ) |
|
421 printf("Factor flops = %e\tMflops = %8.2f\n", ops[FACT], |
|
422 ops[FACT]*1e-6/utime[FACT]); |
|
423 printf("Identify relaxed snodes = %8.2f\n", utime[RELAX]); |
|
424 if ( utime[SOLVE] != 0. ) |
|
425 printf("Solve flops = %.0f, Mflops = %8.2f\n", ops[SOLVE], |
|
426 ops[SOLVE]*1e-6/utime[SOLVE]); |
|
427 |
|
428 Lstore = (SCformat *) L->Store; |
|
429 Ustore = (NCformat *) U->Store; |
|
430 printf("\tNo of nonzeros in factor L = %d\n", Lstore->nnz); |
|
431 printf("\tNo of nonzeros in factor U = %d\n", Ustore->nnz); |
|
432 printf("\tNo of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz); |
|
433 |
|
434 printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n", |
|
435 mem_usage->for_lu/1e6, mem_usage->total_needed/1e6, |
|
436 mem_usage->expansions); |
|
437 |
|
438 printf("\tFactor\tMflops\tSolve\tMflops\tEtree\tEquil\tRcond\tRefine\n"); |
|
439 printf("PERF:%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f\n", |
|
440 utime[FACT], ops[FACT]*1e-6/utime[FACT], |
|
441 utime[SOLVE], ops[SOLVE]*1e-6/utime[SOLVE], |
|
442 utime[ETREE], utime[EQUIL], utime[RCOND], utime[REFINE]); |
|
443 |
|
444 printf("\tRpg\t\tRcond\t\tFerr\t\tBerr\t\tEquil?\n"); |
|
445 printf("NUM:\t%e\t%e\t%e\t%e\t%s\n", |
|
446 rpg, rcond, ferr[0], berr[0], equed); |
|
447 |
|
448 } |
|
449 |
|
450 |
|
451 |
|
452 |
|
453 print_double_vec(char *what, int n, double *vec) |
|
454 { |
|
455 int i; |
|
456 printf("%s: n %d\n", what, n); |
|
457 for (i = 0; i < n; ++i) printf("%d\t%f\n", i, vec[i]); |
|
458 return 0; |
|
459 } |
|
460 |