xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision a2596bdbc30ea25b67c8aa9d27518270ab3efaa7)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: aijfact.c,v 1.88 1997/09/18 16:06:33 balay Exp balay $";
3 #endif
4 
5 #include "src/mat/impls/aij/seq/aij.h"
6 #include "src/vec/vecimpl.h"
7 #include "src/inline/dot.h"
8 
9 #undef __FUNC__
10 #define __FUNC__ "MatOrder_Flow_SeqAIJ"
11 int MatOrder_Flow_SeqAIJ(Mat mat,MatReordering type,IS *irow,IS *icol)
12 {
13   SETERRQ(PETSC_ERR_SUP,0,"Code not written");
14 }
15 
16 /*
17     Factorization code for AIJ format.
18 */
19 #undef __FUNC__
20 #define __FUNC__ "MatLUFactorSymbolic_SeqAIJ"
21 int MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,double f,Mat *B)
22 {
23   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b;
24   IS         isicol;
25   int        *r,*ic, ierr, i, n = a->m, *ai = a->i, *aj = a->j;
26   int        *ainew,*ajnew, jmax,*fill, *ajtmp, nz,shift = a->indexshift;
27   int        *idnew, idx, row,m,fm, nnz, nzi, realloc = 0,nzbd,*im;
28 
29   PetscValidHeaderSpecific(isrow,IS_COOKIE);
30   PetscValidHeaderSpecific(iscol,IS_COOKIE);
31 
32   ierr = ISInvertPermutation(iscol,&isicol); CHKERRQ(ierr);
33   ISGetIndices(isrow,&r); ISGetIndices(isicol,&ic);
34 
35   /* get new row pointers */
36   ainew = (int *) PetscMalloc( (n+1)*sizeof(int) ); CHKPTRQ(ainew);
37   ainew[0] = -shift;
38   /* don't know how many column pointers are needed so estimate */
39   jmax = (int) (f*ai[n]+(!shift));
40   ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) ); CHKPTRQ(ajnew);
41   /* fill is a linked list of nonzeros in active row */
42   fill = (int *) PetscMalloc( (2*n+1)*sizeof(int)); CHKPTRQ(fill);
43   im = fill + n + 1;
44   /* idnew is location of diagonal in factor */
45   idnew = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(idnew);
46   idnew[0] = -shift;
47 
48   for ( i=0; i<n; i++ ) {
49     /* first copy previous fill into linked list */
50     nnz     = nz    = ai[r[i]+1] - ai[r[i]];
51     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix");
52     ajtmp   = aj + ai[r[i]] + shift;
53     fill[n] = n;
54     while (nz--) {
55       fm  = n;
56       idx = ic[*ajtmp++ + shift];
57       do {
58         m  = fm;
59         fm = fill[m];
60       } while (fm < idx);
61       fill[m]   = idx;
62       fill[idx] = fm;
63     }
64     row = fill[n];
65     while ( row < i ) {
66       ajtmp = ajnew + idnew[row] + (!shift);
67       nzbd  = 1 + idnew[row] - ainew[row];
68       nz    = im[row] - nzbd;
69       fm    = row;
70       while (nz-- > 0) {
71         idx = *ajtmp++ + shift;
72         nzbd++;
73         if (idx == i) im[row] = nzbd;
74         do {
75           m  = fm;
76           fm = fill[m];
77         } while (fm < idx);
78         if (fm != idx) {
79           fill[m]   = idx;
80           fill[idx] = fm;
81           fm        = idx;
82           nnz++;
83         }
84       }
85       row = fill[row];
86     }
87     /* copy new filled row into permanent storage */
88     ainew[i+1] = ainew[i] + nnz;
89     if (ainew[i+1] > jmax) {
90       /* allocate a longer ajnew */
91       int maxadd;
92       maxadd = (int) ((f*(ai[n]+(!shift))*(n-i+5))/n);
93       if (maxadd < nnz) maxadd = (n-i)*(nnz+1);
94       jmax += maxadd;
95       ajtmp = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(ajtmp);
96       PetscMemcpy(ajtmp,ajnew,(ainew[i]+shift)*sizeof(int));
97       PetscFree(ajnew);
98       ajnew = ajtmp;
99       realloc++; /* count how many times we realloc */
100     }
101     ajtmp = ajnew + ainew[i] + shift;
102     fm    = fill[n];
103     nzi   = 0;
104     im[i] = nnz;
105     while (nnz--) {
106       if (fm < i) nzi++;
107       *ajtmp++ = fm - shift;
108       fm       = fill[fm];
109     }
110     idnew[i] = ainew[i] + nzi;
111   }
112   if (ai[n] != 0) {
113     double af = ((double)ainew[n])/((double)ai[n]);
114     PLogInfo(A,"Info:MatLUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",
115              realloc,f,af);
116     PLogInfo(A,"Info:MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af);
117     PLogInfo(A,"Info:MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af);
118     PLogInfo(A,"Info:MatLUFactorSymbolic_SeqAIJ:for best performance.\n");
119   } else {
120     PLogInfo(A,"Info:MatLUFactorSymbolic_SeqAIJ: Empty matrix\n");
121   }
122 
123   ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr);
124   ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr);
125 
126   PetscFree(fill);
127 
128   /* put together the new matrix */
129   ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,B); CHKERRQ(ierr);
130   PLogObjectParent(*B,isicol);
131   ierr = ISDestroy(isicol); CHKERRQ(ierr);
132   b = (Mat_SeqAIJ *) (*B)->data;
133   PetscFree(b->imax);
134   b->singlemalloc = 0;
135   /* the next line frees the default space generated by the Create() */
136   PetscFree(b->a); PetscFree(b->ilen);
137   b->a          = (Scalar *) PetscMalloc((ainew[n]+shift+1)*sizeof(Scalar));CHKPTRQ(b->a);
138   b->j          = ajnew;
139   b->i          = ainew;
140   b->diag       = idnew;
141   b->ilen       = 0;
142   b->imax       = 0;
143   b->row        = isrow;
144   b->col        = iscol;
145   b->solve_work = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work);
146   /* In b structure:  Free imax, ilen, old a, old j.
147      Allocate idnew, solve_work, new a, new j */
148   PLogObjectMemory(*B,(ainew[n]+shift-n)*(sizeof(int)+sizeof(Scalar)));
149   b->maxnz = b->nz = ainew[n] + shift;
150 
151   (*B)->info.factor_mallocs    = realloc;
152   (*B)->info.fill_ratio_given  = f;
153   if (ai[i] != 0) {
154     (*B)->info.fill_ratio_needed = ((double)ainew[n])/((double)ai[i]);
155   } else {
156     (*B)->info.fill_ratio_needed = 0.0;
157   }
158 
159   return 0;
160 }
161 /* ----------------------------------------------------------- */
162 int Mat_AIJ_CheckInode(Mat);
163 
164 #undef __FUNC__
165 #define __FUNC__ "MatLUFactorNumeric_SeqAIJ"
166 int MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B)
167 {
168   Mat        C = *B;
169   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b = (Mat_SeqAIJ *)C->data;
170   IS         iscol = b->col, isrow = b->row, isicol;
171   int        *r,*ic, ierr, i, j, n = a->m, *ai = b->i, *aj = b->j;
172   int        *ajtmpold, *ajtmp, nz, row, *ics, shift = a->indexshift;
173   int        *diag_offset = b->diag,diag,k;
174   int        preserve_row_sums = (int) a->ilu_preserve_row_sums;
175   Scalar     *rtmp,*v, *pc, multiplier,sum,inner_sum,*rowsums = 0;
176   double     ssum;
177   /* These declarations are for optimizations.  They reduce the number of
178      memory references that are made by locally storing information; the
179      word "register" used here with pointers can be viewed as "private" or
180      "known only to me"
181    */
182   register Scalar *pv, *rtmps,*u_values;
183   register int    *pj;
184 
185   ierr  = ISInvertPermutation(iscol,&isicol); CHKERRQ(ierr);
186   PLogObjectParent(*B,isicol);
187   ierr  = ISGetIndices(isrow,&r); CHKERRQ(ierr);
188   ierr  = ISGetIndices(isicol,&ic); CHKERRQ(ierr);
189   rtmp  = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar) ); CHKPTRQ(rtmp);
190   PetscMemzero(rtmp,(n+1)*sizeof(Scalar));
191   rtmps = rtmp + shift; ics = ic + shift;
192 
193   /* precalcuate row sums */
194   if (preserve_row_sums) {
195     rowsums = (Scalar *) PetscMalloc( n*sizeof(Scalar) ); CHKPTRQ(rowsums);
196     for ( i=0; i<n; i++ ) {
197       nz  = a->i[r[i]+1] - a->i[r[i]];
198       v   = a->a + a->i[r[i]] + shift;
199       sum = 0.0;
200       for ( j=0; j<nz; j++ ) sum += v[j];
201       rowsums[i] = sum;
202     }
203   }
204 
205   for ( i=0; i<n; i++ ) {
206     nz    = ai[i+1] - ai[i];
207     ajtmp = aj + ai[i] + shift;
208     for  ( j=0; j<nz; j++ ) rtmps[ajtmp[j]] = 0.0;
209 
210     /* load in initial (unfactored row) */
211     nz       = a->i[r[i]+1] - a->i[r[i]];
212     ajtmpold = a->j + a->i[r[i]] + shift;
213     v        = a->a + a->i[r[i]] + shift;
214     for ( j=0; j<nz; j++ ) rtmp[ics[ajtmpold[j]]] =  v[j];
215 
216     row = *ajtmp++ + shift;
217       while  (row < i ) {
218       pc = rtmp + row;
219       if (*pc != 0.0) {
220         pv         = b->a + diag_offset[row] + shift;
221         pj         = b->j + diag_offset[row] + (!shift);
222         multiplier = *pc / *pv++;
223         *pc        = multiplier;
224         nz         = ai[row+1] - diag_offset[row] - 1;
225         for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
226         PLogFlops(2*nz);
227       }
228       row = *ajtmp++ + shift;
229     }
230     /* finished row so stick it into b->a */
231     pv = b->a + ai[i] + shift;
232     pj = b->j + ai[i] + shift;
233     nz = ai[i+1] - ai[i];
234     for ( j=0; j<nz; j++ ) {pv[j] = rtmps[pj[j]];}
235     diag = diag_offset[i] - ai[i];
236     /*
237           Possibly adjust diagonal entry on current row to force
238         LU matrix to have same row sum as initial matrix.
239     */
240     if (pv[diag] == 0.0) {
241       SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,0,"Zero pivot");
242     }
243     if (preserve_row_sums) {
244       pj  = b->j + ai[i] + shift;
245       sum = rowsums[i];
246       for ( j=0; j<diag; j++ ) {
247         u_values  = b->a + diag_offset[pj[j]] + shift;
248         nz        = ai[pj[j]+1] - diag_offset[pj[j]];
249         inner_sum = 0.0;
250         for ( k=0; k<nz; k++ ) {
251           inner_sum += u_values[k];
252         }
253         sum -= pv[j]*inner_sum;
254 
255       }
256       nz       = ai[i+1] - diag_offset[i] - 1;
257       u_values = b->a + diag_offset[i] + 1 + shift;
258       for ( k=0; k<nz; k++ ) {
259         sum -= u_values[k];
260       }
261       ssum = PetscAbsScalar(sum/pv[diag]);
262       if (ssum < 1000. && ssum > .001) pv[diag] = sum;
263     }
264     /* check pivot entry for current row */
265   }
266 
267   /* invert diagonal entries for simplier triangular solves */
268   for ( i=0; i<n; i++ ) {
269     b->a[diag_offset[i]+shift] = 1.0/b->a[diag_offset[i]+shift];
270   }
271 
272   if (preserve_row_sums) PetscFree(rowsums);
273   PetscFree(rtmp);
274   ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr);
275   ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr);
276   ierr = ISDestroy(isicol); CHKERRQ(ierr);
277   C->factor = FACTOR_LU;
278   ierr = Mat_AIJ_CheckInode(C); CHKERRQ(ierr);
279   C->assembled = PETSC_TRUE;
280   PLogFlops(b->n);
281   return 0;
282 }
283 /* ----------------------------------------------------------- */
284 #undef __FUNC__
285 #define __FUNC__ "MatLUFactor_SeqAIJ"
286 int MatLUFactor_SeqAIJ(Mat A,IS row,IS col,double f)
287 {
288   Mat_SeqAIJ *mat = (Mat_SeqAIJ *) A->data;
289   int        ierr;
290   Mat        C;
291 
292   ierr = MatLUFactorSymbolic(A,row,col,f,&C); CHKERRQ(ierr);
293   ierr = MatLUFactorNumeric(A,&C); CHKERRQ(ierr);
294 
295   /* free all the data structures from mat */
296   PetscFree(mat->a);
297   if (!mat->singlemalloc) {PetscFree(mat->i); PetscFree(mat->j);}
298   if (mat->diag) PetscFree(mat->diag);
299   if (mat->ilen) PetscFree(mat->ilen);
300   if (mat->imax) PetscFree(mat->imax);
301   if (mat->solve_work) PetscFree(mat->solve_work);
302   if (mat->inode.size) PetscFree(mat->inode.size);
303   PetscFree(mat);
304 
305   PetscMemcpy(A,C,sizeof(struct _p_Mat));
306   PetscHeaderDestroy(C);
307   return 0;
308 }
309 /* ----------------------------------------------------------- */
310 #undef __FUNC__
311 #define __FUNC__ "MatSolve_SeqAIJ"
312 int MatSolve_SeqAIJ(Mat A,Vec bb, Vec xx)
313 {
314   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
315   IS         iscol = a->col, isrow = a->row;
316   int        *r,*c, ierr, i,  n = a->m, *vi, *ai = a->i, *aj = a->j;
317   int        nz,shift = a->indexshift,*rout,*cout;
318   Scalar     *x,*b,*tmp, *tmps, *aa = a->a, sum, *v;
319 
320   if (!n) return 0;
321 
322   VecGetArray_Fast(bb,b);
323   VecGetArray_Fast(xx,x);
324   tmp  = a->solve_work;
325 
326   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
327   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
328 
329   /* forward solve the lower triangular */
330   tmp[0] = b[*r++];
331   tmps   = tmp + shift;
332   for ( i=1; i<n; i++ ) {
333     v   = aa + ai[i] + shift;
334     vi  = aj + ai[i] + shift;
335     nz  = a->diag[i] - ai[i];
336     sum = b[*r++];
337     while (nz--) sum -= *v++ * tmps[*vi++];
338     tmp[i] = sum;
339   }
340 
341   /* backward solve the upper triangular */
342   for ( i=n-1; i>=0; i-- ){
343     v   = aa + a->diag[i] + (!shift);
344     vi  = aj + a->diag[i] + (!shift);
345     nz  = ai[i+1] - a->diag[i] - 1;
346     sum = tmp[i];
347     while (nz--) sum -= *v++ * tmps[*vi++];
348     x[*c--] = tmp[i] = sum*aa[a->diag[i]+shift];
349   }
350 
351   ierr = ISRestoreIndices(isrow,&rout); CHKERRQ(ierr);
352   ierr = ISRestoreIndices(iscol,&cout); CHKERRQ(ierr);
353   PLogFlops(2*a->nz - a->n);
354   return 0;
355 }
356 
357 /* ----------------------------------------------------------- */
358 #undef __FUNC__
359 #define __FUNC__ "MatSolve_SeqAIJ_NaturalOrdering"
360 int MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb, Vec xx)
361 {
362   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
363   int        i,  n = a->m, *vi, *ai = a->i, *aj = a->j,nz, *adiag = a->diag;
364   int        ai_i, adiag_i;
365   Scalar     *x,*b, *aa = a->a, sum, *v;
366 
367   if (!n) return 0;
368   if (a->indexshift) {
369      return MatSolve_SeqAIJ(A,bb,xx);
370   }
371 
372   VecGetArray_Fast(bb,b);
373   VecGetArray_Fast(xx,x);
374 
375 #if defined(USE_FORTRAN_KERNELS)
376   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
377 #else
378   /* forward solve the lower triangular */
379   x[0] = b[0];
380   for ( i=1; i<n; i++ ) {
381     ai_i = ai[i];
382     v    = aa + ai_i;
383     vi   = aj + ai_i;
384     nz   = adiag[i] - ai_i;
385     sum  = b[i];
386     while (nz--) sum -= *v++ * x[*vi++];
387     x[i] = sum;
388   }
389 
390   /* backward solve the upper triangular */
391   for ( i=n-1; i>=0; i-- ){
392     adiag_i = adiag[i];
393     v       = aa + adiag_i + 1;
394     vi      = aj + adiag_i + 1;
395     nz      = ai[i+1] - adiag_i - 1;
396     sum     = x[i];
397     while (nz--) sum -= *v++ * x[*vi++];
398     x[i]    = sum*aa[adiag_i];
399   }
400 #endif
401   PLogFlops(2*a->nz - a->n);
402   return 0;
403 }
404 
405 #undef __FUNC__
406 #define __FUNC__ "MatSolveAdd_SeqAIJ"
407 int MatSolveAdd_SeqAIJ(Mat A,Vec bb, Vec yy, Vec xx)
408 {
409   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
410   IS         iscol = a->col, isrow = a->row;
411   int        *r,*c, ierr, i,  n = a->m, *vi, *ai = a->i, *aj = a->j;
412   int        nz, shift = a->indexshift,*rout,*cout;
413   Scalar     *x,*b,*tmp, *aa = a->a, sum, *v;
414 
415   if (yy != xx) {ierr = VecCopy(yy,xx); CHKERRQ(ierr);}
416 
417   VecGetArray_Fast(bb,b);
418   VecGetArray_Fast(xx,x);
419   tmp  = a->solve_work;
420 
421   ierr = ISGetIndices(isrow,&rout); CHKERRQ(ierr); r = rout;
422   ierr = ISGetIndices(iscol,&cout); CHKERRQ(ierr); c = cout + (n-1);
423 
424   /* forward solve the lower triangular */
425   tmp[0] = b[*r++];
426   for ( i=1; i<n; i++ ) {
427     v   = aa + ai[i] + shift;
428     vi  = aj + ai[i] + shift;
429     nz  = a->diag[i] - ai[i];
430     sum = b[*r++];
431     while (nz--) sum -= *v++ * tmp[*vi++ + shift];
432     tmp[i] = sum;
433   }
434 
435   /* backward solve the upper triangular */
436   for ( i=n-1; i>=0; i-- ){
437     v   = aa + a->diag[i] + (!shift);
438     vi  = aj + a->diag[i] + (!shift);
439     nz  = ai[i+1] - a->diag[i] - 1;
440     sum = tmp[i];
441     while (nz--) sum -= *v++ * tmp[*vi++ + shift];
442     tmp[i] = sum*aa[a->diag[i]+shift];
443     x[*c--] += tmp[i];
444   }
445 
446   ierr = ISRestoreIndices(isrow,&rout); CHKERRQ(ierr);
447   ierr = ISRestoreIndices(iscol,&cout); CHKERRQ(ierr);
448   PLogFlops(2*a->nz);
449 
450   return 0;
451 }
452 /* -------------------------------------------------------------------*/
453 #undef __FUNC__
454 #define __FUNC__ "MatSolveTrans_SeqAIJ"
455 int MatSolveTrans_SeqAIJ(Mat A,Vec bb, Vec xx)
456 {
457   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
458   IS         iscol = a->col, isrow = a->row, invisrow,inviscol;
459   int        *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j;
460   int        nz,shift = a->indexshift,*rout,*cout;
461   Scalar     *x,*b,*tmp, *aa = a->a, *v;
462 
463   VecGetArray_Fast(bb,b);
464   VecGetArray_Fast(xx,x);
465   tmp  = a->solve_work;
466 
467   /* invert the permutations */
468   ierr = ISInvertPermutation(isrow,&invisrow); CHKERRQ(ierr);
469   ierr = ISInvertPermutation(iscol,&inviscol); CHKERRQ(ierr);
470 
471   ierr = ISGetIndices(invisrow,&rout); CHKERRQ(ierr); r = rout;
472   ierr = ISGetIndices(inviscol,&cout); CHKERRQ(ierr); c = cout;
473 
474   /* copy the b into temp work space according to permutation */
475   for ( i=0; i<n; i++ ) tmp[c[i]] = b[i];
476 
477   /* forward solve the U^T */
478   for ( i=0; i<n; i++ ) {
479     v   = aa + a->diag[i] + shift;
480     vi  = aj + a->diag[i] + (!shift);
481     nz  = ai[i+1] - a->diag[i] - 1;
482     tmp[i] *= *v++;
483     while (nz--) {
484       tmp[*vi++ + shift] -= (*v++)*tmp[i];
485     }
486   }
487 
488   /* backward solve the L^T */
489   for ( i=n-1; i>=0; i-- ){
490     v   = aa + a->diag[i] - 1 + shift;
491     vi  = aj + a->diag[i] - 1 + shift;
492     nz  = a->diag[i] - ai[i];
493     while (nz--) {
494       tmp[*vi-- + shift] -= (*v--)*tmp[i];
495     }
496   }
497 
498   /* copy tmp into x according to permutation */
499   for ( i=0; i<n; i++ ) x[r[i]] = tmp[i];
500 
501   ierr = ISRestoreIndices(invisrow,&rout); CHKERRQ(ierr);
502   ierr = ISRestoreIndices(inviscol,&cout); CHKERRQ(ierr);
503   ierr = ISDestroy(invisrow); CHKERRQ(ierr);
504   ierr = ISDestroy(inviscol); CHKERRQ(ierr);
505 
506   PLogFlops(2*a->nz-a->n);
507   return 0;
508 }
509 
510 #undef __FUNC__
511 #define __FUNC__ "MatSolveTransAdd_SeqAIJ"
512 int MatSolveTransAdd_SeqAIJ(Mat A,Vec bb, Vec zz,Vec xx)
513 {
514   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
515   IS         iscol = a->col, isrow = a->row, invisrow,inviscol;
516   int        *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j;
517   int        nz,shift = a->indexshift, *rout, *cout;
518   Scalar     *x,*b,*tmp, *aa = a->a, *v;
519 
520   if (zz != xx) VecCopy(zz,xx);
521 
522   VecGetArray_Fast(bb,b);
523   VecGetArray_Fast(xx,x);
524   tmp = a->solve_work;
525 
526   /* invert the permutations */
527   ierr = ISInvertPermutation(isrow,&invisrow); CHKERRQ(ierr);
528   ierr = ISInvertPermutation(iscol,&inviscol); CHKERRQ(ierr);
529   ierr = ISGetIndices(invisrow,&rout); CHKERRQ(ierr); r = rout;
530   ierr = ISGetIndices(inviscol,&cout); CHKERRQ(ierr); c = cout;
531 
532   /* copy the b into temp work space according to permutation */
533   for ( i=0; i<n; i++ ) tmp[c[i]] = b[i];
534 
535   /* forward solve the U^T */
536   for ( i=0; i<n; i++ ) {
537     v   = aa + a->diag[i] + shift;
538     vi  = aj + a->diag[i] + (!shift);
539     nz  = ai[i+1] - a->diag[i] - 1;
540     tmp[i] *= *v++;
541     while (nz--) {
542       tmp[*vi++ + shift] -= (*v++)*tmp[i];
543     }
544   }
545 
546   /* backward solve the L^T */
547   for ( i=n-1; i>=0; i-- ){
548     v   = aa + a->diag[i] - 1 + shift;
549     vi  = aj + a->diag[i] - 1 + shift;
550     nz  = a->diag[i] - ai[i];
551     while (nz--) {
552       tmp[*vi-- + shift] -= (*v--)*tmp[i];
553     }
554   }
555 
556   /* copy tmp into x according to permutation */
557   for ( i=0; i<n; i++ ) x[r[i]] += tmp[i];
558 
559   ierr = ISRestoreIndices(invisrow,&rout); CHKERRQ(ierr);
560   ierr = ISRestoreIndices(inviscol,&cout); CHKERRQ(ierr);
561   ierr = ISDestroy(invisrow); CHKERRQ(ierr);
562   ierr = ISDestroy(inviscol); CHKERRQ(ierr);
563 
564   PLogFlops(2*a->nz);
565   return 0;
566 }
567 /* ----------------------------------------------------------------*/
568 
569 #undef __FUNC__
570 #define __FUNC__ "MatILUFactorSymbolic_SeqAIJ"
571 int MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,double f,int levels,Mat *fact)
572 {
573   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b;
574   IS         isicol;
575   int        *r,*ic, ierr, prow, n = a->m, *ai = a->i, *aj = a->j;
576   int        *ainew,*ajnew, jmax,*fill, *xi, nz, *im,*ajfill,*flev;
577   int        *dloc, idx, row,m,fm, nzf, nzi,len,  realloc = 0;
578   int        incrlev,nnz,i,shift = a->indexshift;
579   PetscTruth col_identity, row_identity;
580 
581   /* special case that simply copies fill pattern */
582   ISIdentity(isrow,&row_identity); ISIdentity(iscol,&col_identity);
583   if (levels == 0 && row_identity && col_identity) {
584     ierr = MatConvertSameType_SeqAIJ(A,fact,DO_NOT_COPY_VALUES); CHKERRQ(ierr);
585     (*fact)->factor = FACTOR_LU;
586     b               = (Mat_SeqAIJ *) (*fact)->data;
587     if (!b->diag) {
588       ierr = MatMarkDiag_SeqAIJ(*fact); CHKERRQ(ierr);
589     }
590     b->row             = isrow;
591     b->col             = iscol;
592     b->solve_work      = (Scalar *) PetscMalloc((b->m+1)*sizeof(Scalar));CHKPTRQ(b->solve_work);
593     (*fact)->ops.solve = MatSolve_SeqAIJ_NaturalOrdering;
594     return 0;
595   }
596 
597   ierr = ISInvertPermutation(iscol,&isicol); CHKERRQ(ierr);
598   ierr = ISGetIndices(isrow,&r); CHKERRQ(ierr);
599   ierr = ISGetIndices(isicol,&ic); CHKERRQ(ierr);
600 
601   /* get new row pointers */
602   ainew = (int *) PetscMalloc( (n+1)*sizeof(int) ); CHKPTRQ(ainew);
603   ainew[0] = -shift;
604   /* don't know how many column pointers are needed so estimate */
605   jmax = (int) (f*(ai[n]+!shift));
606   ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) ); CHKPTRQ(ajnew);
607   /* ajfill is level of fill for each fill entry */
608   ajfill = (int *) PetscMalloc( (jmax)*sizeof(int) ); CHKPTRQ(ajfill);
609   /* fill is a linked list of nonzeros in active row */
610   fill = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(fill);
611   /* im is level for each filled value */
612   im = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(im);
613   /* dloc is location of diagonal in factor */
614   dloc = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(dloc);
615   dloc[0]  = 0;
616   for ( prow=0; prow<n; prow++ ) {
617     /* first copy previous fill into linked list */
618     nzf     = nz  = ai[r[prow]+1] - ai[r[prow]];
619     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix");
620     xi      = aj + ai[r[prow]] + shift;
621     fill[n] = n;
622     while (nz--) {
623       fm  = n;
624       idx = ic[*xi++ + shift];
625       do {
626         m  = fm;
627         fm = fill[m];
628       } while (fm < idx);
629       fill[m]   = idx;
630       fill[idx] = fm;
631       im[idx]   = 0;
632     }
633     nzi = 0;
634     row = fill[n];
635     while ( row < prow ) {
636       incrlev = im[row] + 1;
637       nz      = dloc[row];
638       xi      = ajnew  + ainew[row] + shift + nz;
639       flev    = ajfill + ainew[row] + shift + nz + 1;
640       nnz     = ainew[row+1] - ainew[row] - nz - 1;
641       if (*xi++ + shift != row) {
642         SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,0,"Zero pivot: try running with -pc_ilu_nonzeros_along_diagonal");
643       }
644       fm      = row;
645       while (nnz-- > 0) {
646         idx = *xi++ + shift;
647         if (*flev + incrlev > levels) {
648           flev++;
649           continue;
650         }
651         do {
652           m  = fm;
653           fm = fill[m];
654         } while (fm < idx);
655         if (fm != idx) {
656           im[idx]   = *flev + incrlev;
657           fill[m]   = idx;
658           fill[idx] = fm;
659           fm        = idx;
660           nzf++;
661         }
662         else {
663           if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev;
664         }
665         flev++;
666       }
667       row = fill[row];
668       nzi++;
669     }
670     /* copy new filled row into permanent storage */
671     ainew[prow+1] = ainew[prow] + nzf;
672     if (ainew[prow+1] > jmax-shift) {
673       /* allocate a longer ajnew */
674       int maxadd;
675       maxadd = (int) ((f*(ai[n]+!shift)*(n-prow+5))/n);
676       if (maxadd < nzf) maxadd = (n-prow)*(nzf+1);
677       jmax += maxadd;
678       xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi);
679       PetscMemcpy(xi,ajnew,(ainew[prow]+shift)*sizeof(int));
680       PetscFree(ajnew);
681       ajnew = xi;
682       /* allocate a longer ajfill */
683       xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi);
684       PetscMemcpy(xi,ajfill,(ainew[prow]+shift)*sizeof(int));
685       PetscFree(ajfill);
686       ajfill = xi;
687       realloc++;
688     }
689     xi          = ajnew + ainew[prow] + shift;
690     flev        = ajfill + ainew[prow] + shift;
691     dloc[prow]  = nzi;
692     fm          = fill[n];
693     while (nzf--) {
694       *xi++   = fm - shift;
695       *flev++ = im[fm];
696       fm      = fill[fm];
697     }
698   }
699   PetscFree(ajfill);
700   ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr);
701   ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr);
702   ierr = ISDestroy(isicol); CHKERRQ(ierr);
703   PetscFree(fill); PetscFree(im);
704 
705   {
706     double af = ((double)ainew[n])/((double)ai[n]);
707     PLogInfo(A,"Info:MatILUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",
708              realloc,f,af);
709     PLogInfo(A,"Info:MatILUFactorSymbolic_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af);
710     PLogInfo(A,"Info:MatILUFactorSymbolic_SeqAIJ:PCILUSetFill(pc,%g);\n",af);
711     PLogInfo(A,"Info:MatILUFactorSymbolic_SeqAIJ:for best performance.\n");
712   }
713 
714   /* put together the new matrix */
715   ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact); CHKERRQ(ierr);
716   b = (Mat_SeqAIJ *) (*fact)->data;
717   PetscFree(b->imax);
718   b->singlemalloc = 0;
719   len = (ainew[n] + shift)*sizeof(Scalar);
720   /* the next line frees the default space generated by the Create() */
721   PetscFree(b->a); PetscFree(b->ilen);
722   b->a          = (Scalar *) PetscMalloc( len+1 ); CHKPTRQ(b->a);
723   b->j          = ajnew;
724   b->i          = ainew;
725   for ( i=0; i<n; i++ ) dloc[i] += ainew[i];
726   b->diag       = dloc;
727   b->ilen       = 0;
728   b->imax       = 0;
729   b->row        = isrow;
730   b->col        = iscol;
731   b->solve_work = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar));
732   CHKPTRQ(b->solve_work);
733   /* In b structure:  Free imax, ilen, old a, old j.
734      Allocate dloc, solve_work, new a, new j */
735   PLogObjectMemory(*fact,(ainew[n]+shift-n) * (sizeof(int)+sizeof(Scalar)));
736   b->maxnz          = b->nz = ainew[n] + shift;
737   (*fact)->factor   = FACTOR_LU;
738 
739   (*fact)->info.factor_mallocs    = realloc;
740   (*fact)->info.fill_ratio_given  = f;
741   (*fact)->info.fill_ratio_needed = ((double)ainew[n])/((double)ai[prow]);
742 
743   return 0;
744 }
745 
746 
747 
748 
749