xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision c31ba22a230fe2c23596c66917081dfec3ccf152)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: aijfact.c,v 1.94 1998/03/06 00:14:28 bsmith Exp bsmith $";
3 #endif
4 
5 #include "src/mat/impls/aij/seq/aij.h"
6 #include "src/vec/vecimpl.h"
7 #include "src/inline/dot.h"
8 
9 #undef __FUNC__
10 #define __FUNC__ "MatOrder_Flow_SeqAIJ"
11 int MatOrder_Flow_SeqAIJ(Mat mat,MatReorderingType type,IS *irow,IS *icol)
12 {
13   PetscFunctionBegin;
14 
15   SETERRQ(PETSC_ERR_SUP,0,"Code not written");
16 #if !defined(USE_PETSC_DEBUG)
17   PetscFunctionReturn(0);
18 #endif
19 }
20 
21 /*
22     Factorization code for AIJ format.
23 */
24 #undef __FUNC__
25 #define __FUNC__ "MatLUFactorSymbolic_SeqAIJ"
26 int MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,double f,Mat *B)
27 {
28   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b;
29   IS         isicol;
30   int        *r,*ic, ierr, i, n = a->m, *ai = a->i, *aj = a->j;
31   int        *ainew,*ajnew, jmax,*fill, *ajtmp, nz,shift = a->indexshift;
32   int        *idnew, idx, row,m,fm, nnz, nzi, realloc = 0,nzbd,*im;
33 
34   PetscFunctionBegin;
35   PetscValidHeaderSpecific(isrow,IS_COOKIE);
36   PetscValidHeaderSpecific(iscol,IS_COOKIE);
37 
38   ierr = ISInvertPermutation(iscol,&isicol); CHKERRQ(ierr);
39   ISGetIndices(isrow,&r); ISGetIndices(isicol,&ic);
40 
41   /* get new row pointers */
42   ainew = (int *) PetscMalloc( (n+1)*sizeof(int) ); CHKPTRQ(ainew);
43   ainew[0] = -shift;
44   /* don't know how many column pointers are needed so estimate */
45   jmax = (int) (f*ai[n]+(!shift));
46   ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) ); CHKPTRQ(ajnew);
47   /* fill is a linked list of nonzeros in active row */
48   fill = (int *) PetscMalloc( (2*n+1)*sizeof(int)); CHKPTRQ(fill);
49   im = fill + n + 1;
50   /* idnew is location of diagonal in factor */
51   idnew = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(idnew);
52   idnew[0] = -shift;
53 
54   for ( i=0; i<n; i++ ) {
55     /* first copy previous fill into linked list */
56     nnz     = nz    = ai[r[i]+1] - ai[r[i]];
57     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix");
58     ajtmp   = aj + ai[r[i]] + shift;
59     fill[n] = n;
60     while (nz--) {
61       fm  = n;
62       idx = ic[*ajtmp++ + shift];
63       do {
64         m  = fm;
65         fm = fill[m];
66       } while (fm < idx);
67       fill[m]   = idx;
68       fill[idx] = fm;
69     }
70     row = fill[n];
71     while ( row < i ) {
72       ajtmp = ajnew + idnew[row] + (!shift);
73       nzbd  = 1 + idnew[row] - ainew[row];
74       nz    = im[row] - nzbd;
75       fm    = row;
76       while (nz-- > 0) {
77         idx = *ajtmp++ + shift;
78         nzbd++;
79         if (idx == i) im[row] = nzbd;
80         do {
81           m  = fm;
82           fm = fill[m];
83         } while (fm < idx);
84         if (fm != idx) {
85           fill[m]   = idx;
86           fill[idx] = fm;
87           fm        = idx;
88           nnz++;
89         }
90       }
91       row = fill[row];
92     }
93     /* copy new filled row into permanent storage */
94     ainew[i+1] = ainew[i] + nnz;
95     if (ainew[i+1] > jmax) {
96       /* allocate a longer ajnew */
97       int maxadd;
98       maxadd = (int) ((f*(ai[n]+(!shift))*(n-i+5))/n);
99       if (maxadd < nnz) maxadd = (n-i)*(nnz+1);
100       jmax += maxadd;
101       ajtmp = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(ajtmp);
102       PetscMemcpy(ajtmp,ajnew,(ainew[i]+shift)*sizeof(int));
103       PetscFree(ajnew);
104       ajnew = ajtmp;
105       realloc++; /* count how many times we realloc */
106     }
107     ajtmp = ajnew + ainew[i] + shift;
108     fm    = fill[n];
109     nzi   = 0;
110     im[i] = nnz;
111     while (nnz--) {
112       if (fm < i) nzi++;
113       *ajtmp++ = fm - shift;
114       fm       = fill[fm];
115     }
116     idnew[i] = ainew[i] + nzi;
117   }
118   if (ai[n] != 0) {
119     double af = ((double)ainew[n])/((double)ai[n]);
120     PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",
121              realloc,f,af);
122     PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af);
123     PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af);
124     PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n");
125   } else {
126     PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n");
127   }
128 
129   ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr);
130   ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr);
131 
132   PetscFree(fill);
133 
134   /* put together the new matrix */
135   ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,B); CHKERRQ(ierr);
136   PLogObjectParent(*B,isicol);
137   b = (Mat_SeqAIJ *) (*B)->data;
138   PetscFree(b->imax);
139   b->singlemalloc = 0;
140   /* the next line frees the default space generated by the Create() */
141   PetscFree(b->a); PetscFree(b->ilen);
142   b->a          = (Scalar *) PetscMalloc((ainew[n]+shift+1)*sizeof(Scalar));CHKPTRQ(b->a);
143   b->j          = ajnew;
144   b->i          = ainew;
145   b->diag       = idnew;
146   b->ilen       = 0;
147   b->imax       = 0;
148   b->row        = isrow;
149   b->col        = iscol;
150   b->icol       = isicol;
151   b->solve_work = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work);
152   /* In b structure:  Free imax, ilen, old a, old j.
153      Allocate idnew, solve_work, new a, new j */
154   PLogObjectMemory(*B,(ainew[n]+shift-n)*(sizeof(int)+sizeof(Scalar)));
155   b->maxnz = b->nz = ainew[n] + shift;
156 
157   (*B)->info.factor_mallocs    = realloc;
158   (*B)->info.fill_ratio_given  = f;
159   if (ai[i] != 0) {
160     (*B)->info.fill_ratio_needed = ((double)ainew[n])/((double)ai[i]);
161   } else {
162     (*B)->info.fill_ratio_needed = 0.0;
163   }
164 
165   PetscFunctionReturn(0);
166 }
167 /* ----------------------------------------------------------- */
168 int Mat_AIJ_CheckInode(Mat);
169 
170 #undef __FUNC__
171 #define __FUNC__ "MatLUFactorNumeric_SeqAIJ"
172 int MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B)
173 {
174   Mat        C = *B;
175   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b = (Mat_SeqAIJ *)C->data;
176   IS         isrow = b->row, isicol = b->icol;
177   int        *r,*ic, ierr, i, j, n = a->m, *ai = b->i, *aj = b->j;
178   int        *ajtmpold, *ajtmp, nz, row, *ics, shift = a->indexshift;
179   int        *diag_offset = b->diag,diag,k;
180   int        preserve_row_sums = (int) a->ilu_preserve_row_sums;
181   register   int    *pj;
182   Scalar     *rtmp,*v, *pc, multiplier,sum,inner_sum,*rowsums = 0;
183   double     ssum;
184   register   Scalar *pv, *rtmps,*u_values;
185 
186   PetscFunctionBegin;
187 
188   ierr  = ISGetIndices(isrow,&r); CHKERRQ(ierr);
189   ierr  = ISGetIndices(isicol,&ic); CHKERRQ(ierr);
190   rtmp  = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar) ); CHKPTRQ(rtmp);
191   PetscMemzero(rtmp,(n+1)*sizeof(Scalar));
192   rtmps = rtmp + shift; ics = ic + shift;
193 
194   /* precalcuate row sums */
195   if (preserve_row_sums) {
196     rowsums = (Scalar *) PetscMalloc( n*sizeof(Scalar) ); CHKPTRQ(rowsums);
197     for ( i=0; i<n; i++ ) {
198       nz  = a->i[r[i]+1] - a->i[r[i]];
199       v   = a->a + a->i[r[i]] + shift;
200       sum = 0.0;
201       for ( j=0; j<nz; j++ ) sum += v[j];
202       rowsums[i] = sum;
203     }
204   }
205 
206   for ( i=0; i<n; i++ ) {
207     nz    = ai[i+1] - ai[i];
208     ajtmp = aj + ai[i] + shift;
209     for  ( j=0; j<nz; j++ ) rtmps[ajtmp[j]] = 0.0;
210 
211     /* load in initial (unfactored row) */
212     nz       = a->i[r[i]+1] - a->i[r[i]];
213     ajtmpold = a->j + a->i[r[i]] + shift;
214     v        = a->a + a->i[r[i]] + shift;
215     for ( j=0; j<nz; j++ ) rtmp[ics[ajtmpold[j]]] =  v[j];
216 
217     row = *ajtmp++ + shift;
218       while  (row < i ) {
219       pc = rtmp + row;
220       if (*pc != 0.0) {
221         pv         = b->a + diag_offset[row] + shift;
222         pj         = b->j + diag_offset[row] + (!shift);
223         multiplier = *pc / *pv++;
224         *pc        = multiplier;
225         nz         = ai[row+1] - diag_offset[row] - 1;
226         for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
227         PLogFlops(2*nz);
228       }
229       row = *ajtmp++ + shift;
230     }
231     /* finished row so stick it into b->a */
232     pv = b->a + ai[i] + shift;
233     pj = b->j + ai[i] + shift;
234     nz = ai[i+1] - ai[i];
235     for ( j=0; j<nz; j++ ) {pv[j] = rtmps[pj[j]];}
236     diag = diag_offset[i] - ai[i];
237     /*
238           Possibly adjust diagonal entry on current row to force
239         LU matrix to have same row sum as initial matrix.
240     */
241     if (pv[diag] == 0.0) {
242       SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,0,"Zero pivot");
243     }
244     if (preserve_row_sums) {
245       pj  = b->j + ai[i] + shift;
246       sum = rowsums[i];
247       for ( j=0; j<diag; j++ ) {
248         u_values  = b->a + diag_offset[pj[j]] + shift;
249         nz        = ai[pj[j]+1] - diag_offset[pj[j]];
250         inner_sum = 0.0;
251         for ( k=0; k<nz; k++ ) {
252           inner_sum += u_values[k];
253         }
254         sum -= pv[j]*inner_sum;
255 
256       }
257       nz       = ai[i+1] - diag_offset[i] - 1;
258       u_values = b->a + diag_offset[i] + 1 + shift;
259       for ( k=0; k<nz; k++ ) {
260         sum -= u_values[k];
261       }
262       ssum = PetscAbsScalar(sum/pv[diag]);
263       if (ssum < 1000. && ssum > .001) pv[diag] = sum;
264     }
265     /* check pivot entry for current row */
266   }
267 
268   /* invert diagonal entries for simplier triangular solves */
269   for ( i=0; i<n; i++ ) {
270     b->a[diag_offset[i]+shift] = 1.0/b->a[diag_offset[i]+shift];
271   }
272 
273   if (preserve_row_sums) PetscFree(rowsums);
274   PetscFree(rtmp);
275   ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr);
276   ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr);
277   C->factor = FACTOR_LU;
278   ierr = Mat_AIJ_CheckInode(C); CHKERRQ(ierr);
279   C->assembled = PETSC_TRUE;
280   PLogFlops(b->n);
281   PetscFunctionReturn(0);
282 }
283 /* ----------------------------------------------------------- */
284 #undef __FUNC__
285 #define __FUNC__ "MatLUFactor_SeqAIJ"
286 int MatLUFactor_SeqAIJ(Mat A,IS row,IS col,double f)
287 {
288   Mat_SeqAIJ     *mat = (Mat_SeqAIJ *) A->data;
289   int            ierr;
290   Mat            C;
291   PetscOps       *Abops;
292   struct _MatOps *Aops;
293 
294   PetscFunctionBegin;
295   ierr = MatLUFactorSymbolic(A,row,col,f,&C); CHKERRQ(ierr);
296   ierr = MatLUFactorNumeric(A,&C); CHKERRQ(ierr);
297 
298   /* free all the data structures from mat */
299   PetscFree(mat->a);
300   if (!mat->singlemalloc) {PetscFree(mat->i); PetscFree(mat->j);}
301   if (mat->diag) PetscFree(mat->diag);
302   if (mat->ilen) PetscFree(mat->ilen);
303   if (mat->imax) PetscFree(mat->imax);
304   if (mat->solve_work) PetscFree(mat->solve_work);
305   if (mat->inode.size) PetscFree(mat->inode.size);
306   PetscFree(mat);
307 
308   /*
309        This is horrible, horrible code. We need to keep the
310     A pointers for the bops and ops but copy everything
311     else from C.
312   */
313   Abops = A->bops;
314   Aops  = A->ops;
315   PetscMemcpy(A,C,sizeof(struct _p_Mat));
316   A->bops = Abops;
317   A->ops  = Aops;
318 
319   PetscHeaderDestroy(C);
320   PetscFunctionReturn(0);
321 }
322 /* ----------------------------------------------------------- */
323 #undef __FUNC__
324 #define __FUNC__ "MatSolve_SeqAIJ"
325 int MatSolve_SeqAIJ(Mat A,Vec bb, Vec xx)
326 {
327   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
328   IS         iscol = a->col, isrow = a->row;
329   int        *r,*c, ierr, i,  n = a->m, *vi, *ai = a->i, *aj = a->j;
330   int        nz,shift = a->indexshift,*rout,*cout;
331   Scalar     *x,*b,*tmp, *tmps, *aa = a->a, sum, *v;
332 
333   PetscFunctionBegin;
334   if (!n) PetscFunctionReturn(0);
335 
336   VecGetArray_Fast(bb,b);
337   VecGetArray_Fast(xx,x);
338   tmp  = a->solve_work;
339 
340   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
341   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
342 
343   /* forward solve the lower triangular */
344   tmp[0] = b[*r++];
345   tmps   = tmp + shift;
346   for ( i=1; i<n; i++ ) {
347     v   = aa + ai[i] + shift;
348     vi  = aj + ai[i] + shift;
349     nz  = a->diag[i] - ai[i];
350     sum = b[*r++];
351     while (nz--) sum -= *v++ * tmps[*vi++];
352     tmp[i] = sum;
353   }
354 
355   /* backward solve the upper triangular */
356   for ( i=n-1; i>=0; i-- ){
357     v   = aa + a->diag[i] + (!shift);
358     vi  = aj + a->diag[i] + (!shift);
359     nz  = ai[i+1] - a->diag[i] - 1;
360     sum = tmp[i];
361     while (nz--) sum -= *v++ * tmps[*vi++];
362     x[*c--] = tmp[i] = sum*aa[a->diag[i]+shift];
363   }
364 
365   ierr = ISRestoreIndices(isrow,&rout); CHKERRQ(ierr);
366   ierr = ISRestoreIndices(iscol,&cout); CHKERRQ(ierr);
367   PLogFlops(2*a->nz - a->n);
368   PetscFunctionReturn(0);
369 }
370 
371 /* ----------------------------------------------------------- */
372 #undef __FUNC__
373 #define __FUNC__ "MatSolve_SeqAIJ_NaturalOrdering"
374 int MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb, Vec xx)
375 {
376   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
377   int        i,  n = a->m, *vi, *ai = a->i, *aj = a->j,nz, *adiag = a->diag;
378   int        ai_i, adiag_i,ierr;
379   Scalar     *x,*b, *aa = a->a, sum, *v;
380 
381   PetscFunctionBegin;
382   if (!n) PetscFunctionReturn(0);
383   if (a->indexshift) {
384      ierr = MatSolve_SeqAIJ(A,bb,xx);CHKERRQ(ierr);
385      PetscFunctionReturn(0);
386   }
387 
388   VecGetArray_Fast(bb,b);
389   VecGetArray_Fast(xx,x);
390 
391 #if defined(USE_FORTRAN_KERNELS)
392   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
393 #else
394   /* forward solve the lower triangular */
395   x[0] = b[0];
396   for ( i=1; i<n; i++ ) {
397     ai_i = ai[i];
398     v    = aa + ai_i;
399     vi   = aj + ai_i;
400     nz   = adiag[i] - ai_i;
401     sum  = b[i];
402     while (nz--) sum -= *v++ * x[*vi++];
403     x[i] = sum;
404   }
405 
406   /* backward solve the upper triangular */
407   for ( i=n-1; i>=0; i-- ){
408     adiag_i = adiag[i];
409     v       = aa + adiag_i + 1;
410     vi      = aj + adiag_i + 1;
411     nz      = ai[i+1] - adiag_i - 1;
412     sum     = x[i];
413     while (nz--) sum -= *v++ * x[*vi++];
414     x[i]    = sum*aa[adiag_i];
415   }
416 #endif
417   PLogFlops(2*a->nz - a->n);
418   PetscFunctionReturn(0);
419 }
420 
421 #undef __FUNC__
422 #define __FUNC__ "MatSolveAdd_SeqAIJ"
423 int MatSolveAdd_SeqAIJ(Mat A,Vec bb, Vec yy, Vec xx)
424 {
425   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
426   IS         iscol = a->col, isrow = a->row;
427   int        *r,*c, ierr, i,  n = a->m, *vi, *ai = a->i, *aj = a->j;
428   int        nz, shift = a->indexshift,*rout,*cout;
429   Scalar     *x,*b,*tmp, *aa = a->a, sum, *v;
430 
431   PetscFunctionBegin;
432   if (yy != xx) {ierr = VecCopy(yy,xx); CHKERRQ(ierr);}
433 
434   VecGetArray_Fast(bb,b);
435   VecGetArray_Fast(xx,x);
436   tmp  = a->solve_work;
437 
438   ierr = ISGetIndices(isrow,&rout); CHKERRQ(ierr); r = rout;
439   ierr = ISGetIndices(iscol,&cout); CHKERRQ(ierr); c = cout + (n-1);
440 
441   /* forward solve the lower triangular */
442   tmp[0] = b[*r++];
443   for ( i=1; i<n; i++ ) {
444     v   = aa + ai[i] + shift;
445     vi  = aj + ai[i] + shift;
446     nz  = a->diag[i] - ai[i];
447     sum = b[*r++];
448     while (nz--) sum -= *v++ * tmp[*vi++ + shift];
449     tmp[i] = sum;
450   }
451 
452   /* backward solve the upper triangular */
453   for ( i=n-1; i>=0; i-- ){
454     v   = aa + a->diag[i] + (!shift);
455     vi  = aj + a->diag[i] + (!shift);
456     nz  = ai[i+1] - a->diag[i] - 1;
457     sum = tmp[i];
458     while (nz--) sum -= *v++ * tmp[*vi++ + shift];
459     tmp[i] = sum*aa[a->diag[i]+shift];
460     x[*c--] += tmp[i];
461   }
462 
463   ierr = ISRestoreIndices(isrow,&rout); CHKERRQ(ierr);
464   ierr = ISRestoreIndices(iscol,&cout); CHKERRQ(ierr);
465   PLogFlops(2*a->nz);
466 
467   PetscFunctionReturn(0);
468 }
469 /* -------------------------------------------------------------------*/
470 #undef __FUNC__
471 #define __FUNC__ "MatSolveTrans_SeqAIJ"
472 int MatSolveTrans_SeqAIJ(Mat A,Vec bb, Vec xx)
473 {
474   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
475   IS         iscol = a->col, isrow = a->row, invisrow,inviscol;
476   int        *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j;
477   int        nz,shift = a->indexshift,*rout,*cout;
478   Scalar     *x,*b,*tmp, *aa = a->a, *v;
479 
480   PetscFunctionBegin;
481   VecGetArray_Fast(bb,b);
482   VecGetArray_Fast(xx,x);
483   tmp  = a->solve_work;
484 
485   /* invert the permutations */
486   ierr = ISInvertPermutation(isrow,&invisrow); CHKERRQ(ierr);
487   ierr = ISInvertPermutation(iscol,&inviscol); CHKERRQ(ierr);
488 
489   ierr = ISGetIndices(invisrow,&rout); CHKERRQ(ierr); r = rout;
490   ierr = ISGetIndices(inviscol,&cout); CHKERRQ(ierr); c = cout;
491 
492   /* copy the b into temp work space according to permutation */
493   for ( i=0; i<n; i++ ) tmp[c[i]] = b[i];
494 
495   /* forward solve the U^T */
496   for ( i=0; i<n; i++ ) {
497     v   = aa + a->diag[i] + shift;
498     vi  = aj + a->diag[i] + (!shift);
499     nz  = ai[i+1] - a->diag[i] - 1;
500     tmp[i] *= *v++;
501     while (nz--) {
502       tmp[*vi++ + shift] -= (*v++)*tmp[i];
503     }
504   }
505 
506   /* backward solve the L^T */
507   for ( i=n-1; i>=0; i-- ){
508     v   = aa + a->diag[i] - 1 + shift;
509     vi  = aj + a->diag[i] - 1 + shift;
510     nz  = a->diag[i] - ai[i];
511     while (nz--) {
512       tmp[*vi-- + shift] -= (*v--)*tmp[i];
513     }
514   }
515 
516   /* copy tmp into x according to permutation */
517   for ( i=0; i<n; i++ ) x[r[i]] = tmp[i];
518 
519   ierr = ISRestoreIndices(invisrow,&rout); CHKERRQ(ierr);
520   ierr = ISRestoreIndices(inviscol,&cout); CHKERRQ(ierr);
521   ierr = ISDestroy(invisrow); CHKERRQ(ierr);
522   ierr = ISDestroy(inviscol); CHKERRQ(ierr);
523 
524   PLogFlops(2*a->nz-a->n);
525   PetscFunctionReturn(0);
526 }
527 
528 #undef __FUNC__
529 #define __FUNC__ "MatSolveTransAdd_SeqAIJ"
530 int MatSolveTransAdd_SeqAIJ(Mat A,Vec bb, Vec zz,Vec xx)
531 {
532   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
533   IS         iscol = a->col, isrow = a->row, invisrow,inviscol;
534   int        *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j;
535   int        nz,shift = a->indexshift, *rout, *cout;
536   Scalar     *x,*b,*tmp, *aa = a->a, *v;
537 
538   PetscFunctionBegin;
539   if (zz != xx) VecCopy(zz,xx);
540 
541   VecGetArray_Fast(bb,b);
542   VecGetArray_Fast(xx,x);
543   tmp = a->solve_work;
544 
545   /* invert the permutations */
546   ierr = ISInvertPermutation(isrow,&invisrow); CHKERRQ(ierr);
547   ierr = ISInvertPermutation(iscol,&inviscol); CHKERRQ(ierr);
548   ierr = ISGetIndices(invisrow,&rout); CHKERRQ(ierr); r = rout;
549   ierr = ISGetIndices(inviscol,&cout); CHKERRQ(ierr); c = cout;
550 
551   /* copy the b into temp work space according to permutation */
552   for ( i=0; i<n; i++ ) tmp[c[i]] = b[i];
553 
554   /* forward solve the U^T */
555   for ( i=0; i<n; i++ ) {
556     v   = aa + a->diag[i] + shift;
557     vi  = aj + a->diag[i] + (!shift);
558     nz  = ai[i+1] - a->diag[i] - 1;
559     tmp[i] *= *v++;
560     while (nz--) {
561       tmp[*vi++ + shift] -= (*v++)*tmp[i];
562     }
563   }
564 
565   /* backward solve the L^T */
566   for ( i=n-1; i>=0; i-- ){
567     v   = aa + a->diag[i] - 1 + shift;
568     vi  = aj + a->diag[i] - 1 + shift;
569     nz  = a->diag[i] - ai[i];
570     while (nz--) {
571       tmp[*vi-- + shift] -= (*v--)*tmp[i];
572     }
573   }
574 
575   /* copy tmp into x according to permutation */
576   for ( i=0; i<n; i++ ) x[r[i]] += tmp[i];
577 
578   ierr = ISRestoreIndices(invisrow,&rout); CHKERRQ(ierr);
579   ierr = ISRestoreIndices(inviscol,&cout); CHKERRQ(ierr);
580   ierr = ISDestroy(invisrow); CHKERRQ(ierr);
581   ierr = ISDestroy(inviscol); CHKERRQ(ierr);
582 
583   PLogFlops(2*a->nz);
584   PetscFunctionReturn(0);
585 }
586 /* ----------------------------------------------------------------*/
587 
588 #undef __FUNC__
589 #define __FUNC__ "MatILUFactorSymbolic_SeqAIJ"
590 int MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,double f,int levels,Mat *fact)
591 {
592   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b;
593   IS         isicol;
594   int        *r,*ic, ierr, prow, n = a->m, *ai = a->i, *aj = a->j;
595   int        *ainew,*ajnew, jmax,*fill, *xi, nz, *im,*ajfill,*flev;
596   int        *dloc, idx, row,m,fm, nzf, nzi,len,  realloc = 0;
597   int        incrlev,nnz,i,shift = a->indexshift;
598   PetscTruth col_identity, row_identity;
599 
600   PetscFunctionBegin;
601   ierr = ISInvertPermutation(iscol,&isicol); CHKERRQ(ierr);
602 
603   /* special case that simply copies fill pattern */
604   ISIdentity(isrow,&row_identity); ISIdentity(iscol,&col_identity);
605   if (levels == 0 && row_identity && col_identity) {
606     ierr = MatConvertSameType_SeqAIJ(A,fact,DO_NOT_COPY_VALUES); CHKERRQ(ierr);
607     (*fact)->factor = FACTOR_LU;
608     b               = (Mat_SeqAIJ *) (*fact)->data;
609     if (!b->diag) {
610       ierr = MatMarkDiag_SeqAIJ(*fact); CHKERRQ(ierr);
611     }
612     b->row             = isrow;
613     b->col             = iscol;
614     b->icol            = isicol;
615     b->solve_work      = (Scalar *) PetscMalloc((b->m+1)*sizeof(Scalar));CHKPTRQ(b->solve_work);
616     (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
617     PetscFunctionReturn(0);
618   }
619 
620   ierr = ISGetIndices(isrow,&r); CHKERRQ(ierr);
621   ierr = ISGetIndices(isicol,&ic); CHKERRQ(ierr);
622 
623   /* get new row pointers */
624   ainew = (int *) PetscMalloc( (n+1)*sizeof(int) ); CHKPTRQ(ainew);
625   ainew[0] = -shift;
626   /* don't know how many column pointers are needed so estimate */
627   jmax = (int) (f*(ai[n]+!shift));
628   ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) ); CHKPTRQ(ajnew);
629   /* ajfill is level of fill for each fill entry */
630   ajfill = (int *) PetscMalloc( (jmax)*sizeof(int) ); CHKPTRQ(ajfill);
631   /* fill is a linked list of nonzeros in active row */
632   fill = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(fill);
633   /* im is level for each filled value */
634   im = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(im);
635   /* dloc is location of diagonal in factor */
636   dloc = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(dloc);
637   dloc[0]  = 0;
638   for ( prow=0; prow<n; prow++ ) {
639     /* first copy previous fill into linked list */
640     nzf     = nz  = ai[r[prow]+1] - ai[r[prow]];
641     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix");
642     xi      = aj + ai[r[prow]] + shift;
643     fill[n] = n;
644     while (nz--) {
645       fm  = n;
646       idx = ic[*xi++ + shift];
647       do {
648         m  = fm;
649         fm = fill[m];
650       } while (fm < idx);
651       fill[m]   = idx;
652       fill[idx] = fm;
653       im[idx]   = 0;
654     }
655     nzi = 0;
656     row = fill[n];
657     while ( row < prow ) {
658       incrlev = im[row] + 1;
659       nz      = dloc[row];
660       xi      = ajnew  + ainew[row] + shift + nz;
661       flev    = ajfill + ainew[row] + shift + nz + 1;
662       nnz     = ainew[row+1] - ainew[row] - nz - 1;
663       if (*xi++ + shift != row) {
664         SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,0,"Zero pivot: try running with -pc_ilu_nonzeros_along_diagonal");
665       }
666       fm      = row;
667       while (nnz-- > 0) {
668         idx = *xi++ + shift;
669         if (*flev + incrlev > levels) {
670           flev++;
671           continue;
672         }
673         do {
674           m  = fm;
675           fm = fill[m];
676         } while (fm < idx);
677         if (fm != idx) {
678           im[idx]   = *flev + incrlev;
679           fill[m]   = idx;
680           fill[idx] = fm;
681           fm        = idx;
682           nzf++;
683         }
684         else {
685           if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev;
686         }
687         flev++;
688       }
689       row = fill[row];
690       nzi++;
691     }
692     /* copy new filled row into permanent storage */
693     ainew[prow+1] = ainew[prow] + nzf;
694     if (ainew[prow+1] > jmax-shift) {
695       /* allocate a longer ajnew */
696       int maxadd;
697       maxadd = (int) ((f*(ai[n]+!shift)*(n-prow+5))/n);
698       if (maxadd < nzf) maxadd = (n-prow)*(nzf+1);
699       jmax += maxadd;
700       xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi);
701       PetscMemcpy(xi,ajnew,(ainew[prow]+shift)*sizeof(int));
702       PetscFree(ajnew);
703       ajnew = xi;
704       /* allocate a longer ajfill */
705       xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi);
706       PetscMemcpy(xi,ajfill,(ainew[prow]+shift)*sizeof(int));
707       PetscFree(ajfill);
708       ajfill = xi;
709       realloc++;
710     }
711     xi          = ajnew + ainew[prow] + shift;
712     flev        = ajfill + ainew[prow] + shift;
713     dloc[prow]  = nzi;
714     fm          = fill[n];
715     while (nzf--) {
716       *xi++   = fm - shift;
717       *flev++ = im[fm];
718       fm      = fill[fm];
719     }
720   }
721   PetscFree(ajfill);
722   ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr);
723   ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr);
724   PetscFree(fill); PetscFree(im);
725 
726   {
727     double af = ((double)ainew[n])/((double)ai[n]);
728     PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",
729              realloc,f,af);
730     PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af);
731     PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill(pc,%g);\n",af);
732     PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n");
733   }
734 
735   /* put together the new matrix */
736   ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact); CHKERRQ(ierr);
737   b = (Mat_SeqAIJ *) (*fact)->data;
738   PetscFree(b->imax);
739   b->singlemalloc = 0;
740   len = (ainew[n] + shift)*sizeof(Scalar);
741   /* the next line frees the default space generated by the Create() */
742   PetscFree(b->a); PetscFree(b->ilen);
743   b->a          = (Scalar *) PetscMalloc( len+1 ); CHKPTRQ(b->a);
744   b->j          = ajnew;
745   b->i          = ainew;
746   for ( i=0; i<n; i++ ) dloc[i] += ainew[i];
747   b->diag       = dloc;
748   b->ilen       = 0;
749   b->imax       = 0;
750   b->row        = isrow;
751   b->col        = iscol;
752   b->icol       = isicol;
753   b->solve_work = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar)); CHKPTRQ(b->solve_work);
754   /* In b structure:  Free imax, ilen, old a, old j.
755      Allocate dloc, solve_work, new a, new j */
756   PLogObjectMemory(*fact,(ainew[n]+shift-n) * (sizeof(int)+sizeof(Scalar)));
757   b->maxnz          = b->nz = ainew[n] + shift;
758   (*fact)->factor   = FACTOR_LU;
759 
760   (*fact)->info.factor_mallocs    = realloc;
761   (*fact)->info.fill_ratio_given  = f;
762   (*fact)->info.fill_ratio_needed = ((double)ainew[n])/((double)ai[prow]);
763 
764   PetscFunctionReturn(0);
765 }
766 
767 
768 
769 
770