0
|
1 |
|
2 |
|
3 /* |
|
4 * -- SuperLU routine (version 2.0) -- |
|
5 * Univ. of California Berkeley, Xerox Palo Alto Research Center, |
|
6 * and Lawrence Berkeley National Lab. |
|
7 * November 15, 1997 |
|
8 * |
|
9 */ |
|
10 /* |
|
11 Copyright (c) 1994 by Xerox Corporation. All rights reserved. |
|
12 |
|
13 THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY |
|
14 EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. |
|
15 |
|
16 Permission is hereby granted to use or copy this program for any |
|
17 purpose, provided the above notices are retained on all copies. |
|
18 Permission to modify the code and to distribute modified code is |
|
19 granted, provided the above notices are retained, and a notice that |
|
20 the code was modified is included with the above copyright notice. |
|
21 */ |
|
22 |
|
23 #include "zsp_defs.h" |
|
24 #include "util.h" |
|
25 |
|
26 /* What type of supernodes we want */ |
|
27 #define T2_SUPER |
|
28 |
|
29 int |
|
30 zcolumn_dfs( |
|
31 const int m, /* in - number of rows in the matrix */ |
|
32 const int jcol, /* in */ |
|
33 int *perm_r, /* in */ |
|
34 int *nseg, /* modified - with new segments appended */ |
|
35 int *lsub_col, /* in - defines the RHS vector to start the dfs */ |
|
36 int *segrep, /* modified - with new segments appended */ |
|
37 int *repfnz, /* modified */ |
|
38 int *xprune, /* modified */ |
|
39 int *marker, /* modified */ |
|
40 int *parent, /* working array */ |
|
41 int *xplore, /* working array */ |
|
42 GlobalLU_t *Glu /* modified */ |
|
43 ) |
|
44 { |
|
45 /* |
|
46 * Purpose |
|
47 * ======= |
|
48 * "column_dfs" performs a symbolic factorization on column jcol, and |
|
49 * decide the supernode boundary. |
|
50 * |
|
51 * This routine does not use numeric values, but only use the RHS |
|
52 * row indices to start the dfs. |
|
53 * |
|
54 * A supernode representative is the last column of a supernode. |
|
55 * The nonzeros in U[*,j] are segments that end at supernodal |
|
56 * representatives. The routine returns a list of such supernodal |
|
57 * representatives in topological order of the dfs that generates them. |
|
58 * The location of the first nonzero in each such supernodal segment |
|
59 * (supernodal entry location) is also returned. |
|
60 * |
|
61 * Local parameters |
|
62 * ================ |
|
63 * nseg: no of segments in current U[*,j] |
|
64 * jsuper: jsuper=NO if column j does not belong to the same |
|
65 * supernode as j-1. Otherwise, jsuper=nsuper. |
|
66 * |
|
67 * marker2: A-row --> A-row/col (0/1) |
|
68 * repfnz: SuperA-col --> PA-row |
|
69 * parent: SuperA-col --> SuperA-col |
|
70 * xplore: SuperA-col --> index to L-structure |
|
71 * |
|
72 * Return value |
|
73 * ============ |
|
74 * 0 success; |
|
75 * > 0 number of bytes allocated when run out of space. |
|
76 * |
|
77 */ |
|
78 int jcolp1, jcolm1, jsuper, nsuper, nextl; |
|
79 int k, krep, krow, kmark, kperm; |
|
80 int *marker2; /* Used for small panel LU */ |
|
81 int fsupc; /* First column of a snode */ |
|
82 int myfnz; /* First nonz column of a U-segment */ |
|
83 int chperm, chmark, chrep, kchild; |
|
84 int xdfs, maxdfs, kpar, oldrep; |
|
85 int jptr, jm1ptr; |
|
86 int ito, ifrom, istop; /* Used to compress row subscripts */ |
|
87 int mem_error; |
|
88 int *xsup, *supno, *lsub, *xlsub; |
|
89 int nzlmax; |
|
90 static int first = 1, maxsuper; |
|
91 |
|
92 xsup = Glu->xsup; |
|
93 supno = Glu->supno; |
|
94 lsub = Glu->lsub; |
|
95 xlsub = Glu->xlsub; |
|
96 nzlmax = Glu->nzlmax; |
|
97 |
|
98 if ( first ) { |
|
99 maxsuper = sp_ienv(3); |
|
100 first = 0; |
|
101 } |
|
102 jcolp1 = jcol + 1; |
|
103 jcolm1 = jcol - 1; |
|
104 nsuper = supno[jcol]; |
|
105 jsuper = nsuper; |
|
106 nextl = xlsub[jcol]; |
|
107 marker2 = &marker[2*m]; |
|
108 |
|
109 |
|
110 /* For each nonzero in A[*,jcol] do dfs */ |
|
111 for (k = 0; lsub_col[k] != EMPTY; k++) { |
|
112 |
|
113 krow = lsub_col[k]; |
|
114 lsub_col[k] = EMPTY; |
|
115 kmark = marker2[krow]; |
|
116 |
|
117 /* krow was visited before, go to the next nonz */ |
|
118 if ( kmark == jcol ) continue; |
|
119 |
|
120 /* For each unmarked nbr krow of jcol |
|
121 * krow is in L: place it in structure of L[*,jcol] |
|
122 */ |
|
123 marker2[krow] = jcol; |
|
124 kperm = perm_r[krow]; |
|
125 |
|
126 if ( kperm == EMPTY ) { |
|
127 lsub[nextl++] = krow; /* krow is indexed into A */ |
|
128 if ( nextl >= nzlmax ) { |
|
129 if ( mem_error = zLUMemXpand(jcol, nextl, LSUB, &nzlmax, Glu) ) |
|
130 return (mem_error); |
|
131 lsub = Glu->lsub; |
|
132 } |
|
133 if ( kmark != jcolm1 ) jsuper = NO; /* Row index subset testing */ |
|
134 } else { |
|
135 /* krow is in U: if its supernode-rep krep |
|
136 * has been explored, update repfnz[*] |
|
137 */ |
|
138 krep = xsup[supno[kperm]+1] - 1; |
|
139 myfnz = repfnz[krep]; |
|
140 |
|
141 if ( myfnz != EMPTY ) { /* Visited before */ |
|
142 if ( myfnz > kperm ) repfnz[krep] = kperm; |
|
143 /* continue; */ |
|
144 } |
|
145 else { |
|
146 /* Otherwise, perform dfs starting at krep */ |
|
147 oldrep = EMPTY; |
|
148 parent[krep] = oldrep; |
|
149 repfnz[krep] = kperm; |
|
150 xdfs = xlsub[krep]; |
|
151 maxdfs = xprune[krep]; |
|
152 |
|
153 do { |
|
154 /* |
|
155 * For each unmarked kchild of krep |
|
156 */ |
|
157 while ( xdfs < maxdfs ) { |
|
158 |
|
159 kchild = lsub[xdfs]; |
|
160 xdfs++; |
|
161 chmark = marker2[kchild]; |
|
162 |
|
163 if ( chmark != jcol ) { /* Not reached yet */ |
|
164 marker2[kchild] = jcol; |
|
165 chperm = perm_r[kchild]; |
|
166 |
|
167 /* Case kchild is in L: place it in L[*,k] */ |
|
168 if ( chperm == EMPTY ) { |
|
169 lsub[nextl++] = kchild; |
|
170 if ( nextl >= nzlmax ) { |
|
171 if ( mem_error = |
|
172 zLUMemXpand(jcol,nextl,LSUB,&nzlmax,Glu) ) |
|
173 return (mem_error); |
|
174 lsub = Glu->lsub; |
|
175 } |
|
176 if ( chmark != jcolm1 ) jsuper = NO; |
|
177 } else { |
|
178 /* Case kchild is in U: |
|
179 * chrep = its supernode-rep. If its rep has |
|
180 * been explored, update its repfnz[*] |
|
181 */ |
|
182 chrep = xsup[supno[chperm]+1] - 1; |
|
183 myfnz = repfnz[chrep]; |
|
184 if ( myfnz != EMPTY ) { /* Visited before */ |
|
185 if ( myfnz > chperm ) |
|
186 repfnz[chrep] = chperm; |
|
187 } else { |
|
188 /* Continue dfs at super-rep of kchild */ |
|
189 xplore[krep] = xdfs; |
|
190 oldrep = krep; |
|
191 krep = chrep; /* Go deeper down G(L^t) */ |
|
192 parent[krep] = oldrep; |
|
193 repfnz[krep] = chperm; |
|
194 xdfs = xlsub[krep]; |
|
195 maxdfs = xprune[krep]; |
|
196 } /* else */ |
|
197 |
|
198 } /* else */ |
|
199 |
|
200 } /* if */ |
|
201 |
|
202 } /* while */ |
|
203 |
|
204 /* krow has no more unexplored nbrs; |
|
205 * place supernode-rep krep in postorder DFS. |
|
206 * backtrack dfs to its parent |
|
207 */ |
|
208 segrep[*nseg] = krep; |
|
209 ++(*nseg); |
|
210 kpar = parent[krep]; /* Pop from stack, mimic recursion */ |
|
211 if ( kpar == EMPTY ) break; /* dfs done */ |
|
212 krep = kpar; |
|
213 xdfs = xplore[krep]; |
|
214 maxdfs = xprune[krep]; |
|
215 |
|
216 } while ( kpar != EMPTY ); /* Until empty stack */ |
|
217 |
|
218 } /* else */ |
|
219 |
|
220 } /* else */ |
|
221 |
|
222 } /* for each nonzero ... */ |
|
223 |
|
224 /* Check to see if j belongs in the same supernode as j-1 */ |
|
225 if ( jcol == 0 ) { /* Do nothing for column 0 */ |
|
226 nsuper = supno[0] = 0; |
|
227 } else { |
|
228 fsupc = xsup[nsuper]; |
|
229 jptr = xlsub[jcol]; /* Not compressed yet */ |
|
230 jm1ptr = xlsub[jcolm1]; |
|
231 |
|
232 #ifdef T2_SUPER |
|
233 if ( (nextl-jptr != jptr-jm1ptr-1) ) jsuper = NO; |
|
234 #endif |
|
235 /* Make sure the number of columns in a supernode doesn't |
|
236 exceed threshold. */ |
|
237 if ( jcol - fsupc >= maxsuper ) jsuper = NO; |
|
238 |
|
239 /* If jcol starts a new supernode, reclaim storage space in |
|
240 * lsub from the previous supernode. Note we only store |
|
241 * the subscript set of the first and last columns of |
|
242 * a supernode. (first for num values, last for pruning) |
|
243 */ |
|
244 if ( jsuper == NO ) { /* starts a new supernode */ |
|
245 if ( (fsupc < jcolm1-1) ) { /* >= 3 columns in nsuper */ |
|
246 #ifdef CHK_COMPRESS |
|
247 printf(" Compress lsub[] at super %d-%d\n", fsupc, jcolm1); |
|
248 #endif |
|
249 ito = xlsub[fsupc+1]; |
|
250 xlsub[jcolm1] = ito; |
|
251 istop = ito + jptr - jm1ptr; |
|
252 xprune[jcolm1] = istop; /* Initialize xprune[jcol-1] */ |
|
253 xlsub[jcol] = istop; |
|
254 for (ifrom = jm1ptr; ifrom < nextl; ++ifrom, ++ito) |
|
255 lsub[ito] = lsub[ifrom]; |
|
256 nextl = ito; /* = istop + length(jcol) */ |
|
257 } |
|
258 nsuper++; |
|
259 supno[jcol] = nsuper; |
|
260 } /* if a new supernode */ |
|
261 |
|
262 } /* else: jcol > 0 */ |
|
263 |
|
264 /* Tidy up the pointers before exit */ |
|
265 xsup[nsuper+1] = jcolp1; |
|
266 supno[jcolp1] = nsuper; |
|
267 xprune[jcol] = nextl; /* Initialize upper bound for pruning */ |
|
268 xlsub[jcolp1] = nextl; |
|
269 |
|
270 return 0; |
|
271 } |