xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision d77bb2e1bd18432b9760a5a427628540258117e0)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: aijfact.c,v 1.95 1998/03/12 23:18:23 bsmith Exp balay $";
3 #endif
4 
5 #include "src/mat/impls/aij/seq/aij.h"
6 #include "src/vec/vecimpl.h"
7 #include "src/inline/dot.h"
8 
9 #undef __FUNC__
10 #define __FUNC__ "MatOrder_Flow_SeqAIJ"
11 int MatOrder_Flow_SeqAIJ(Mat mat,MatReorderingType type,IS *irow,IS *icol)
12 {
13   PetscFunctionBegin;
14 
15   SETERRQ(PETSC_ERR_SUP,0,"Code not written");
16 #if !defined(USE_PETSC_DEBUG)
17   PetscFunctionReturn(0);
18 #endif
19 }
20 
21 /*
22     Factorization code for AIJ format.
23 */
24 #undef __FUNC__
25 #define __FUNC__ "MatLUFactorSymbolic_SeqAIJ"
26 int MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,double f,Mat *B)
27 {
28   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b;
29   IS         isicol;
30   int        *r,*ic, ierr, i, n = a->m, *ai = a->i, *aj = a->j;
31   int        *ainew,*ajnew, jmax,*fill, *ajtmp, nz,shift = a->indexshift;
32   int        *idnew, idx, row,m,fm, nnz, nzi, realloc = 0,nzbd,*im;
33 
34   PetscFunctionBegin;
35   PetscValidHeaderSpecific(isrow,IS_COOKIE);
36   PetscValidHeaderSpecific(iscol,IS_COOKIE);
37 
38   ierr = ISInvertPermutation(iscol,&isicol); CHKERRQ(ierr);
39   ISGetIndices(isrow,&r); ISGetIndices(isicol,&ic);
40 
41   /* get new row pointers */
42   ainew = (int *) PetscMalloc( (n+1)*sizeof(int) ); CHKPTRQ(ainew);
43   ainew[0] = -shift;
44   /* don't know how many column pointers are needed so estimate */
45   jmax = (int) (f*ai[n]+(!shift));
46   ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) ); CHKPTRQ(ajnew);
47   /* fill is a linked list of nonzeros in active row */
48   fill = (int *) PetscMalloc( (2*n+1)*sizeof(int)); CHKPTRQ(fill);
49   im = fill + n + 1;
50   /* idnew is location of diagonal in factor */
51   idnew = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(idnew);
52   idnew[0] = -shift;
53 
54   for ( i=0; i<n; i++ ) {
55     /* first copy previous fill into linked list */
56     nnz     = nz    = ai[r[i]+1] - ai[r[i]];
57     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix");
58     ajtmp   = aj + ai[r[i]] + shift;
59     fill[n] = n;
60     while (nz--) {
61       fm  = n;
62       idx = ic[*ajtmp++ + shift];
63       do {
64         m  = fm;
65         fm = fill[m];
66       } while (fm < idx);
67       fill[m]   = idx;
68       fill[idx] = fm;
69     }
70     row = fill[n];
71     while ( row < i ) {
72       ajtmp = ajnew + idnew[row] + (!shift);
73       nzbd  = 1 + idnew[row] - ainew[row];
74       nz    = im[row] - nzbd;
75       fm    = row;
76       while (nz-- > 0) {
77         idx = *ajtmp++ + shift;
78         nzbd++;
79         if (idx == i) im[row] = nzbd;
80         do {
81           m  = fm;
82           fm = fill[m];
83         } while (fm < idx);
84         if (fm != idx) {
85           fill[m]   = idx;
86           fill[idx] = fm;
87           fm        = idx;
88           nnz++;
89         }
90       }
91       row = fill[row];
92     }
93     /* copy new filled row into permanent storage */
94     ainew[i+1] = ainew[i] + nnz;
95     if (ainew[i+1] > jmax) {
96       /* allocate a longer ajnew */
97       int maxadd;
98       maxadd = (int) ((f*(ai[n]+(!shift))*(n-i+5))/n);
99       if (maxadd < nnz) maxadd = (n-i)*(nnz+1);
100       jmax += maxadd;
101       ajtmp = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(ajtmp);
102       PetscMemcpy(ajtmp,ajnew,(ainew[i]+shift)*sizeof(int));
103       PetscFree(ajnew);
104       ajnew = ajtmp;
105       realloc++; /* count how many times we realloc */
106     }
107     ajtmp = ajnew + ainew[i] + shift;
108     fm    = fill[n];
109     nzi   = 0;
110     im[i] = nnz;
111     while (nnz--) {
112       if (fm < i) nzi++;
113       *ajtmp++ = fm - shift;
114       fm       = fill[fm];
115     }
116     idnew[i] = ainew[i] + nzi;
117   }
118   if (ai[n] != 0) {
119     double af = ((double)ainew[n])/((double)ai[n]);
120     PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",
121              realloc,f,af);
122     PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af);
123     PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af);
124     PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n");
125   } else {
126     PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n");
127   }
128 
129   ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr);
130   ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr);
131 
132   PetscFree(fill);
133 
134   /* put together the new matrix */
135   ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,B); CHKERRQ(ierr);
136   PLogObjectParent(*B,isicol);
137   b = (Mat_SeqAIJ *) (*B)->data;
138   PetscFree(b->imax);
139   b->singlemalloc = 0;
140   /* the next line frees the default space generated by the Create() */
141   PetscFree(b->a); PetscFree(b->ilen);
142   b->a          = (Scalar *) PetscMalloc((ainew[n]+shift+1)*sizeof(Scalar));CHKPTRQ(b->a);
143   b->j          = ajnew;
144   b->i          = ainew;
145   b->diag       = idnew;
146   b->ilen       = 0;
147   b->imax       = 0;
148   b->row        = isrow;
149   b->col        = iscol;
150   b->icol       = isicol;
151   b->solve_work = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work);
152   /* In b structure:  Free imax, ilen, old a, old j.
153      Allocate idnew, solve_work, new a, new j */
154   PLogObjectMemory(*B,(ainew[n]+shift-n)*(sizeof(int)+sizeof(Scalar)));
155   b->maxnz = b->nz = ainew[n] + shift;
156 
157   (*B)->info.factor_mallocs    = realloc;
158   (*B)->info.fill_ratio_given  = f;
159   if (ai[i] != 0) {
160     (*B)->info.fill_ratio_needed = ((double)ainew[n])/((double)ai[i]);
161   } else {
162     (*B)->info.fill_ratio_needed = 0.0;
163   }
164 
165   PetscFunctionReturn(0);
166 }
167 /* ----------------------------------------------------------- */
168 int Mat_AIJ_CheckInode(Mat);
169 
170 #undef __FUNC__
171 #define __FUNC__ "MatLUFactorNumeric_SeqAIJ"
172 int MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B)
173 {
174   Mat        C = *B;
175   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b = (Mat_SeqAIJ *)C->data;
176   IS         isrow = b->row, isicol = b->icol;
177   int        *r,*ic, ierr, i, j, n = a->m, *ai = b->i, *aj = b->j;
178   int        *ajtmpold, *ajtmp, nz, row, *ics, shift = a->indexshift;
179   int        *diag_offset = b->diag,diag,k;
180   int        preserve_row_sums = (int) a->ilu_preserve_row_sums;
181   register   int    *pj;
182   Scalar     *rtmp,*v, *pc, multiplier,sum,inner_sum,*rowsums = 0;
183   double     ssum;
184   register   Scalar *pv, *rtmps,*u_values;
185 
186   PetscFunctionBegin;
187 
188   ierr  = ISGetIndices(isrow,&r); CHKERRQ(ierr);
189   ierr  = ISGetIndices(isicol,&ic); CHKERRQ(ierr);
190   rtmp  = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar) ); CHKPTRQ(rtmp);
191   PetscMemzero(rtmp,(n+1)*sizeof(Scalar));
192   rtmps = rtmp + shift; ics = ic + shift;
193 
194   /* precalcuate row sums */
195   if (preserve_row_sums) {
196     rowsums = (Scalar *) PetscMalloc( n*sizeof(Scalar) ); CHKPTRQ(rowsums);
197     for ( i=0; i<n; i++ ) {
198       nz  = a->i[r[i]+1] - a->i[r[i]];
199       v   = a->a + a->i[r[i]] + shift;
200       sum = 0.0;
201       for ( j=0; j<nz; j++ ) sum += v[j];
202       rowsums[i] = sum;
203     }
204   }
205 
206   for ( i=0; i<n; i++ ) {
207     nz    = ai[i+1] - ai[i];
208     ajtmp = aj + ai[i] + shift;
209     for  ( j=0; j<nz; j++ ) rtmps[ajtmp[j]] = 0.0;
210 
211     /* load in initial (unfactored row) */
212     nz       = a->i[r[i]+1] - a->i[r[i]];
213     ajtmpold = a->j + a->i[r[i]] + shift;
214     v        = a->a + a->i[r[i]] + shift;
215     for ( j=0; j<nz; j++ ) rtmp[ics[ajtmpold[j]]] =  v[j];
216 
217     row = *ajtmp++ + shift;
218       while  (row < i ) {
219       pc = rtmp + row;
220       if (*pc != 0.0) {
221         pv         = b->a + diag_offset[row] + shift;
222         pj         = b->j + diag_offset[row] + (!shift);
223         multiplier = *pc / *pv++;
224         *pc        = multiplier;
225         nz         = ai[row+1] - diag_offset[row] - 1;
226         for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
227         PLogFlops(2*nz);
228       }
229       row = *ajtmp++ + shift;
230     }
231     /* finished row so stick it into b->a */
232     pv = b->a + ai[i] + shift;
233     pj = b->j + ai[i] + shift;
234     nz = ai[i+1] - ai[i];
235     for ( j=0; j<nz; j++ ) {pv[j] = rtmps[pj[j]];}
236     diag = diag_offset[i] - ai[i];
237     /*
238           Possibly adjust diagonal entry on current row to force
239         LU matrix to have same row sum as initial matrix.
240     */
241     if (pv[diag] == 0.0) {
242       SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,0,"Zero pivot");
243     }
244     if (preserve_row_sums) {
245       pj  = b->j + ai[i] + shift;
246       sum = rowsums[i];
247       for ( j=0; j<diag; j++ ) {
248         u_values  = b->a + diag_offset[pj[j]] + shift;
249         nz        = ai[pj[j]+1] - diag_offset[pj[j]];
250         inner_sum = 0.0;
251         for ( k=0; k<nz; k++ ) {
252           inner_sum += u_values[k];
253         }
254         sum -= pv[j]*inner_sum;
255 
256       }
257       nz       = ai[i+1] - diag_offset[i] - 1;
258       u_values = b->a + diag_offset[i] + 1 + shift;
259       for ( k=0; k<nz; k++ ) {
260         sum -= u_values[k];
261       }
262       ssum = PetscAbsScalar(sum/pv[diag]);
263       if (ssum < 1000. && ssum > .001) pv[diag] = sum;
264     }
265     /* check pivot entry for current row */
266   }
267 
268   /* invert diagonal entries for simplier triangular solves */
269   for ( i=0; i<n; i++ ) {
270     b->a[diag_offset[i]+shift] = 1.0/b->a[diag_offset[i]+shift];
271   }
272 
273   if (preserve_row_sums) PetscFree(rowsums);
274   PetscFree(rtmp);
275   ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr);
276   ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr);
277   C->factor = FACTOR_LU;
278   ierr = Mat_AIJ_CheckInode(C); CHKERRQ(ierr);
279   C->assembled = PETSC_TRUE;
280   PLogFlops(b->n);
281   PetscFunctionReturn(0);
282 }
283 /* ----------------------------------------------------------- */
284 #undef __FUNC__
285 #define __FUNC__ "MatLUFactor_SeqAIJ"
286 int MatLUFactor_SeqAIJ(Mat A,IS row,IS col,double f)
287 {
288   Mat_SeqAIJ     *mat = (Mat_SeqAIJ *) A->data;
289   int            ierr;
290   Mat            C;
291   PetscOps       *Abops;
292   struct _MatOps *Aops;
293 
294   PetscFunctionBegin;
295   ierr = MatLUFactorSymbolic(A,row,col,f,&C); CHKERRQ(ierr);
296   ierr = MatLUFactorNumeric(A,&C); CHKERRQ(ierr);
297 
298   /* free all the data structures from mat */
299   PetscFree(mat->a);
300   if (!mat->singlemalloc) {PetscFree(mat->i); PetscFree(mat->j);}
301   if (mat->diag) PetscFree(mat->diag);
302   if (mat->ilen) PetscFree(mat->ilen);
303   if (mat->imax) PetscFree(mat->imax);
304   if (mat->solve_work) PetscFree(mat->solve_work);
305   if (mat->inode.size) PetscFree(mat->inode.size);
306   PetscFree(mat);
307 
308   /*
309        This is horrible, horrible code. We need to keep the
310     A pointers for the bops and ops but copy everything
311     else from C.
312   */
313   Abops = A->bops;
314   Aops  = A->ops;
315   PetscMemcpy(A,C,sizeof(struct _p_Mat));
316   A->bops = Abops;
317   A->ops  = Aops;
318 
319   PetscHeaderDestroy(C);
320   PetscFunctionReturn(0);
321 }
322 /* ----------------------------------------------------------- */
323 #undef __FUNC__
324 #define __FUNC__ "MatSolve_SeqAIJ"
325 int MatSolve_SeqAIJ(Mat A,Vec bb, Vec xx)
326 {
327   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
328   IS         iscol = a->col, isrow = a->row;
329   int        *r,*c, ierr, i,  n = a->m, *vi, *ai = a->i, *aj = a->j;
330   int        nz,shift = a->indexshift,*rout,*cout;
331   Scalar     *x,*b,*tmp, *tmps, *aa = a->a, sum, *v;
332 
333   PetscFunctionBegin;
334   if (!n) PetscFunctionReturn(0);
335 
336   VecGetArray_Fast(bb,b);
337   VecGetArray_Fast(xx,x);
338   tmp  = a->solve_work;
339 
340   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
341   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
342 
343   /* forward solve the lower triangular */
344   tmp[0] = b[*r++];
345   tmps   = tmp + shift;
346   for ( i=1; i<n; i++ ) {
347     v   = aa + ai[i] + shift;
348     vi  = aj + ai[i] + shift;
349     nz  = a->diag[i] - ai[i];
350     sum = b[*r++];
351     while (nz--) sum -= *v++ * tmps[*vi++];
352     tmp[i] = sum;
353   }
354 
355   /* backward solve the upper triangular */
356   for ( i=n-1; i>=0; i-- ){
357     v   = aa + a->diag[i] + (!shift);
358     vi  = aj + a->diag[i] + (!shift);
359     nz  = ai[i+1] - a->diag[i] - 1;
360     sum = tmp[i];
361     while (nz--) sum -= *v++ * tmps[*vi++];
362     x[*c--] = tmp[i] = sum*aa[a->diag[i]+shift];
363   }
364 
365   ierr = ISRestoreIndices(isrow,&rout); CHKERRQ(ierr);
366   ierr = ISRestoreIndices(iscol,&cout); CHKERRQ(ierr);
367   PLogFlops(2*a->nz - a->n);
368   PetscFunctionReturn(0);
369 }
370 
371 /* ----------------------------------------------------------- */
372 #undef __FUNC__
373 #define __FUNC__ "MatSolve_SeqAIJ_NaturalOrdering"
374 int MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb, Vec xx)
375 {
376   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
377   int        n = a->m, *ai = a->i, *aj = a->j, *adiag = a->diag,ierr;
378   Scalar     *x,*b, *aa = a->a, sum;
379 #if !defined(USE_FORTRAN_KERNELS)
380   int        adiag_i,i,*vi,nz,ai_i;
381   Scalar     *v;
382 #endif
383 
384   PetscFunctionBegin;
385   if (!n) PetscFunctionReturn(0);
386   if (a->indexshift) {
387      ierr = MatSolve_SeqAIJ(A,bb,xx);CHKERRQ(ierr);
388      PetscFunctionReturn(0);
389   }
390 
391   VecGetArray_Fast(bb,b);
392   VecGetArray_Fast(xx,x);
393 
394 #if defined(USE_FORTRAN_KERNELS)
395   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
396 #else
397   /* forward solve the lower triangular */
398   x[0] = b[0];
399   for ( i=1; i<n; i++ ) {
400     ai_i = ai[i];
401     v    = aa + ai_i;
402     vi   = aj + ai_i;
403     nz   = adiag[i] - ai_i;
404     sum  = b[i];
405     while (nz--) sum -= *v++ * x[*vi++];
406     x[i] = sum;
407   }
408 
409   /* backward solve the upper triangular */
410   for ( i=n-1; i>=0; i-- ){
411     adiag_i = adiag[i];
412     v       = aa + adiag_i + 1;
413     vi      = aj + adiag_i + 1;
414     nz      = ai[i+1] - adiag_i - 1;
415     sum     = x[i];
416     while (nz--) sum -= *v++ * x[*vi++];
417     x[i]    = sum*aa[adiag_i];
418   }
419 #endif
420   PLogFlops(2*a->nz - a->n);
421   PetscFunctionReturn(0);
422 }
423 
424 #undef __FUNC__
425 #define __FUNC__ "MatSolveAdd_SeqAIJ"
426 int MatSolveAdd_SeqAIJ(Mat A,Vec bb, Vec yy, Vec xx)
427 {
428   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
429   IS         iscol = a->col, isrow = a->row;
430   int        *r,*c, ierr, i,  n = a->m, *vi, *ai = a->i, *aj = a->j;
431   int        nz, shift = a->indexshift,*rout,*cout;
432   Scalar     *x,*b,*tmp, *aa = a->a, sum, *v;
433 
434   PetscFunctionBegin;
435   if (yy != xx) {ierr = VecCopy(yy,xx); CHKERRQ(ierr);}
436 
437   VecGetArray_Fast(bb,b);
438   VecGetArray_Fast(xx,x);
439   tmp  = a->solve_work;
440 
441   ierr = ISGetIndices(isrow,&rout); CHKERRQ(ierr); r = rout;
442   ierr = ISGetIndices(iscol,&cout); CHKERRQ(ierr); c = cout + (n-1);
443 
444   /* forward solve the lower triangular */
445   tmp[0] = b[*r++];
446   for ( i=1; i<n; i++ ) {
447     v   = aa + ai[i] + shift;
448     vi  = aj + ai[i] + shift;
449     nz  = a->diag[i] - ai[i];
450     sum = b[*r++];
451     while (nz--) sum -= *v++ * tmp[*vi++ + shift];
452     tmp[i] = sum;
453   }
454 
455   /* backward solve the upper triangular */
456   for ( i=n-1; i>=0; i-- ){
457     v   = aa + a->diag[i] + (!shift);
458     vi  = aj + a->diag[i] + (!shift);
459     nz  = ai[i+1] - a->diag[i] - 1;
460     sum = tmp[i];
461     while (nz--) sum -= *v++ * tmp[*vi++ + shift];
462     tmp[i] = sum*aa[a->diag[i]+shift];
463     x[*c--] += tmp[i];
464   }
465 
466   ierr = ISRestoreIndices(isrow,&rout); CHKERRQ(ierr);
467   ierr = ISRestoreIndices(iscol,&cout); CHKERRQ(ierr);
468   PLogFlops(2*a->nz);
469 
470   PetscFunctionReturn(0);
471 }
472 /* -------------------------------------------------------------------*/
473 #undef __FUNC__
474 #define __FUNC__ "MatSolveTrans_SeqAIJ"
475 int MatSolveTrans_SeqAIJ(Mat A,Vec bb, Vec xx)
476 {
477   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
478   IS         iscol = a->col, isrow = a->row, invisrow,inviscol;
479   int        *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j;
480   int        nz,shift = a->indexshift,*rout,*cout;
481   Scalar     *x,*b,*tmp, *aa = a->a, *v;
482 
483   PetscFunctionBegin;
484   VecGetArray_Fast(bb,b);
485   VecGetArray_Fast(xx,x);
486   tmp  = a->solve_work;
487 
488   /* invert the permutations */
489   ierr = ISInvertPermutation(isrow,&invisrow); CHKERRQ(ierr);
490   ierr = ISInvertPermutation(iscol,&inviscol); CHKERRQ(ierr);
491 
492   ierr = ISGetIndices(invisrow,&rout); CHKERRQ(ierr); r = rout;
493   ierr = ISGetIndices(inviscol,&cout); CHKERRQ(ierr); c = cout;
494 
495   /* copy the b into temp work space according to permutation */
496   for ( i=0; i<n; i++ ) tmp[c[i]] = b[i];
497 
498   /* forward solve the U^T */
499   for ( i=0; i<n; i++ ) {
500     v   = aa + a->diag[i] + shift;
501     vi  = aj + a->diag[i] + (!shift);
502     nz  = ai[i+1] - a->diag[i] - 1;
503     tmp[i] *= *v++;
504     while (nz--) {
505       tmp[*vi++ + shift] -= (*v++)*tmp[i];
506     }
507   }
508 
509   /* backward solve the L^T */
510   for ( i=n-1; i>=0; i-- ){
511     v   = aa + a->diag[i] - 1 + shift;
512     vi  = aj + a->diag[i] - 1 + shift;
513     nz  = a->diag[i] - ai[i];
514     while (nz--) {
515       tmp[*vi-- + shift] -= (*v--)*tmp[i];
516     }
517   }
518 
519   /* copy tmp into x according to permutation */
520   for ( i=0; i<n; i++ ) x[r[i]] = tmp[i];
521 
522   ierr = ISRestoreIndices(invisrow,&rout); CHKERRQ(ierr);
523   ierr = ISRestoreIndices(inviscol,&cout); CHKERRQ(ierr);
524   ierr = ISDestroy(invisrow); CHKERRQ(ierr);
525   ierr = ISDestroy(inviscol); CHKERRQ(ierr);
526 
527   PLogFlops(2*a->nz-a->n);
528   PetscFunctionReturn(0);
529 }
530 
531 #undef __FUNC__
532 #define __FUNC__ "MatSolveTransAdd_SeqAIJ"
533 int MatSolveTransAdd_SeqAIJ(Mat A,Vec bb, Vec zz,Vec xx)
534 {
535   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
536   IS         iscol = a->col, isrow = a->row, invisrow,inviscol;
537   int        *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j;
538   int        nz,shift = a->indexshift, *rout, *cout;
539   Scalar     *x,*b,*tmp, *aa = a->a, *v;
540 
541   PetscFunctionBegin;
542   if (zz != xx) VecCopy(zz,xx);
543 
544   VecGetArray_Fast(bb,b);
545   VecGetArray_Fast(xx,x);
546   tmp = a->solve_work;
547 
548   /* invert the permutations */
549   ierr = ISInvertPermutation(isrow,&invisrow); CHKERRQ(ierr);
550   ierr = ISInvertPermutation(iscol,&inviscol); CHKERRQ(ierr);
551   ierr = ISGetIndices(invisrow,&rout); CHKERRQ(ierr); r = rout;
552   ierr = ISGetIndices(inviscol,&cout); CHKERRQ(ierr); c = cout;
553 
554   /* copy the b into temp work space according to permutation */
555   for ( i=0; i<n; i++ ) tmp[c[i]] = b[i];
556 
557   /* forward solve the U^T */
558   for ( i=0; i<n; i++ ) {
559     v   = aa + a->diag[i] + shift;
560     vi  = aj + a->diag[i] + (!shift);
561     nz  = ai[i+1] - a->diag[i] - 1;
562     tmp[i] *= *v++;
563     while (nz--) {
564       tmp[*vi++ + shift] -= (*v++)*tmp[i];
565     }
566   }
567 
568   /* backward solve the L^T */
569   for ( i=n-1; i>=0; i-- ){
570     v   = aa + a->diag[i] - 1 + shift;
571     vi  = aj + a->diag[i] - 1 + shift;
572     nz  = a->diag[i] - ai[i];
573     while (nz--) {
574       tmp[*vi-- + shift] -= (*v--)*tmp[i];
575     }
576   }
577 
578   /* copy tmp into x according to permutation */
579   for ( i=0; i<n; i++ ) x[r[i]] += tmp[i];
580 
581   ierr = ISRestoreIndices(invisrow,&rout); CHKERRQ(ierr);
582   ierr = ISRestoreIndices(inviscol,&cout); CHKERRQ(ierr);
583   ierr = ISDestroy(invisrow); CHKERRQ(ierr);
584   ierr = ISDestroy(inviscol); CHKERRQ(ierr);
585 
586   PLogFlops(2*a->nz);
587   PetscFunctionReturn(0);
588 }
589 /* ----------------------------------------------------------------*/
590 
591 #undef __FUNC__
592 #define __FUNC__ "MatILUFactorSymbolic_SeqAIJ"
593 int MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,double f,int levels,Mat *fact)
594 {
595   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b;
596   IS         isicol;
597   int        *r,*ic, ierr, prow, n = a->m, *ai = a->i, *aj = a->j;
598   int        *ainew,*ajnew, jmax,*fill, *xi, nz, *im,*ajfill,*flev;
599   int        *dloc, idx, row,m,fm, nzf, nzi,len,  realloc = 0;
600   int        incrlev,nnz,i,shift = a->indexshift;
601   PetscTruth col_identity, row_identity;
602 
603   PetscFunctionBegin;
604   ierr = ISInvertPermutation(iscol,&isicol); CHKERRQ(ierr);
605 
606   /* special case that simply copies fill pattern */
607   ISIdentity(isrow,&row_identity); ISIdentity(iscol,&col_identity);
608   if (levels == 0 && row_identity && col_identity) {
609     ierr = MatConvertSameType_SeqAIJ(A,fact,DO_NOT_COPY_VALUES); CHKERRQ(ierr);
610     (*fact)->factor = FACTOR_LU;
611     b               = (Mat_SeqAIJ *) (*fact)->data;
612     if (!b->diag) {
613       ierr = MatMarkDiag_SeqAIJ(*fact); CHKERRQ(ierr);
614     }
615     b->row             = isrow;
616     b->col             = iscol;
617     b->icol            = isicol;
618     b->solve_work      = (Scalar *) PetscMalloc((b->m+1)*sizeof(Scalar));CHKPTRQ(b->solve_work);
619     (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
620     PetscFunctionReturn(0);
621   }
622 
623   ierr = ISGetIndices(isrow,&r); CHKERRQ(ierr);
624   ierr = ISGetIndices(isicol,&ic); CHKERRQ(ierr);
625 
626   /* get new row pointers */
627   ainew = (int *) PetscMalloc( (n+1)*sizeof(int) ); CHKPTRQ(ainew);
628   ainew[0] = -shift;
629   /* don't know how many column pointers are needed so estimate */
630   jmax = (int) (f*(ai[n]+!shift));
631   ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) ); CHKPTRQ(ajnew);
632   /* ajfill is level of fill for each fill entry */
633   ajfill = (int *) PetscMalloc( (jmax)*sizeof(int) ); CHKPTRQ(ajfill);
634   /* fill is a linked list of nonzeros in active row */
635   fill = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(fill);
636   /* im is level for each filled value */
637   im = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(im);
638   /* dloc is location of diagonal in factor */
639   dloc = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(dloc);
640   dloc[0]  = 0;
641   for ( prow=0; prow<n; prow++ ) {
642     /* first copy previous fill into linked list */
643     nzf     = nz  = ai[r[prow]+1] - ai[r[prow]];
644     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix");
645     xi      = aj + ai[r[prow]] + shift;
646     fill[n] = n;
647     while (nz--) {
648       fm  = n;
649       idx = ic[*xi++ + shift];
650       do {
651         m  = fm;
652         fm = fill[m];
653       } while (fm < idx);
654       fill[m]   = idx;
655       fill[idx] = fm;
656       im[idx]   = 0;
657     }
658     nzi = 0;
659     row = fill[n];
660     while ( row < prow ) {
661       incrlev = im[row] + 1;
662       nz      = dloc[row];
663       xi      = ajnew  + ainew[row] + shift + nz;
664       flev    = ajfill + ainew[row] + shift + nz + 1;
665       nnz     = ainew[row+1] - ainew[row] - nz - 1;
666       if (*xi++ + shift != row) {
667         SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,0,"Zero pivot: try running with -pc_ilu_nonzeros_along_diagonal");
668       }
669       fm      = row;
670       while (nnz-- > 0) {
671         idx = *xi++ + shift;
672         if (*flev + incrlev > levels) {
673           flev++;
674           continue;
675         }
676         do {
677           m  = fm;
678           fm = fill[m];
679         } while (fm < idx);
680         if (fm != idx) {
681           im[idx]   = *flev + incrlev;
682           fill[m]   = idx;
683           fill[idx] = fm;
684           fm        = idx;
685           nzf++;
686         }
687         else {
688           if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev;
689         }
690         flev++;
691       }
692       row = fill[row];
693       nzi++;
694     }
695     /* copy new filled row into permanent storage */
696     ainew[prow+1] = ainew[prow] + nzf;
697     if (ainew[prow+1] > jmax-shift) {
698       /* allocate a longer ajnew */
699       int maxadd;
700       maxadd = (int) ((f*(ai[n]+!shift)*(n-prow+5))/n);
701       if (maxadd < nzf) maxadd = (n-prow)*(nzf+1);
702       jmax += maxadd;
703       xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi);
704       PetscMemcpy(xi,ajnew,(ainew[prow]+shift)*sizeof(int));
705       PetscFree(ajnew);
706       ajnew = xi;
707       /* allocate a longer ajfill */
708       xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi);
709       PetscMemcpy(xi,ajfill,(ainew[prow]+shift)*sizeof(int));
710       PetscFree(ajfill);
711       ajfill = xi;
712       realloc++;
713     }
714     xi          = ajnew + ainew[prow] + shift;
715     flev        = ajfill + ainew[prow] + shift;
716     dloc[prow]  = nzi;
717     fm          = fill[n];
718     while (nzf--) {
719       *xi++   = fm - shift;
720       *flev++ = im[fm];
721       fm      = fill[fm];
722     }
723   }
724   PetscFree(ajfill);
725   ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr);
726   ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr);
727   PetscFree(fill); PetscFree(im);
728 
729   {
730     double af = ((double)ainew[n])/((double)ai[n]);
731     PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",
732              realloc,f,af);
733     PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af);
734     PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill(pc,%g);\n",af);
735     PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n");
736   }
737 
738   /* put together the new matrix */
739   ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact); CHKERRQ(ierr);
740   b = (Mat_SeqAIJ *) (*fact)->data;
741   PetscFree(b->imax);
742   b->singlemalloc = 0;
743   len = (ainew[n] + shift)*sizeof(Scalar);
744   /* the next line frees the default space generated by the Create() */
745   PetscFree(b->a); PetscFree(b->ilen);
746   b->a          = (Scalar *) PetscMalloc( len+1 ); CHKPTRQ(b->a);
747   b->j          = ajnew;
748   b->i          = ainew;
749   for ( i=0; i<n; i++ ) dloc[i] += ainew[i];
750   b->diag       = dloc;
751   b->ilen       = 0;
752   b->imax       = 0;
753   b->row        = isrow;
754   b->col        = iscol;
755   b->icol       = isicol;
756   b->solve_work = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar)); CHKPTRQ(b->solve_work);
757   /* In b structure:  Free imax, ilen, old a, old j.
758      Allocate dloc, solve_work, new a, new j */
759   PLogObjectMemory(*fact,(ainew[n]+shift-n) * (sizeof(int)+sizeof(Scalar)));
760   b->maxnz          = b->nz = ainew[n] + shift;
761   (*fact)->factor   = FACTOR_LU;
762 
763   (*fact)->info.factor_mallocs    = realloc;
764   (*fact)->info.fill_ratio_given  = f;
765   (*fact)->info.fill_ratio_needed = ((double)ainew[n])/((double)ai[prow]);
766 
767   PetscFunctionReturn(0);
768 }
769 
770 
771 
772 
773