xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision a5ff213d897f5fcaef42ae26f8ae0a8e003d03e2)
1 
2 #include "src/mat/impls/aij/seq/aij.h"
3 #include "src/inline/dot.h"
4 #include "src/inline/spops.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ"
8 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
9 {
10   PetscFunctionBegin;
11 
12   SETERRQ(PETSC_ERR_SUP,"Code not written");
13 #if !defined(PETSC_USE_DEBUG)
14   PetscFunctionReturn(0);
15 #endif
16 }
17 
18 
19 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat);
20 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth);
21 
22 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
23 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
24 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*);
25 
26 #undef __FUNCT__
27 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
28   /* ------------------------------------------------------------
29 
30           This interface was contribed by Tony Caola
31 
32      This routine is an interface to the pivoting drop-tolerance
33      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
34      SPARSEKIT2.
35 
36      The SPARSEKIT2 routines used here are covered by the GNU
37      copyright; see the file gnu in this directory.
38 
39      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
40      help in getting this routine ironed out.
41 
42      The major drawback to this routine is that if info->fill is
43      not large enough it fails rather than allocating more space;
44      this can be fixed by hacking/improving the f2c version of
45      Yousef Saad's code.
46 
47      ------------------------------------------------------------
48 */
49 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact)
50 {
51 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
52   PetscFunctionBegin;
53   SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
54   You can obtain the drop tolerance routines by installing PETSc from\n\
55   www.mcs.anl.gov/petsc\n");
56 #else
57   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
58   IS             iscolf,isicol,isirow;
59   PetscTruth     reorder;
60   PetscErrorCode ierr,sierr;
61   PetscInt       *c,*r,*ic,i,n = A->m;
62   PetscInt       *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
63   PetscInt       *ordcol,*iwk,*iperm,*jw;
64   PetscInt       jmax,lfill,job,*o_i,*o_j;
65   PetscScalar    *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
66   PetscReal      af;
67 
68   PetscFunctionBegin;
69 
70   if (info->dt == PETSC_DEFAULT)      info->dt      = .005;
71   if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax);
72   if (info->dtcol == PETSC_DEFAULT)   info->dtcol   = .01;
73   if (info->fill == PETSC_DEFAULT)    info->fill    = ((double)(n*(info->dtcount+1)))/a->nz;
74   lfill   = (PetscInt)(info->dtcount/2.0);
75   jmax    = (PetscInt)(info->fill*a->nz);
76 
77 
78   /* ------------------------------------------------------------
79      If reorder=.TRUE., then the original matrix has to be
80      reordered to reflect the user selected ordering scheme, and
81      then de-reordered so it is in it's original format.
82      Because Saad's dperm() is NOT in place, we have to copy
83      the original matrix and allocate more storage. . .
84      ------------------------------------------------------------
85   */
86 
87   /* set reorder to true if either isrow or iscol is not identity */
88   ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr);
89   if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);}
90   reorder = PetscNot(reorder);
91 
92 
93   /* storage for ilu factor */
94   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr);
95   ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr);
96   ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr);
97   ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr);
98 
99   /* ------------------------------------------------------------
100      Make sure that everything is Fortran formatted (1-Based)
101      ------------------------------------------------------------
102   */
103   for (i=old_i[0];i<old_i[n];i++) {
104     old_j[i]++;
105   }
106   for(i=0;i<n+1;i++) {
107     old_i[i]++;
108   };
109 
110 
111   if (reorder) {
112     ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr);
113     ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
114     for(i=0;i<n;i++) {
115       r[i]  = r[i]+1;
116       c[i]  = c[i]+1;
117     }
118     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr);
119     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr);
120     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr);
121     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
122     for (i=0;i<n;i++) {
123       r[i]  = r[i]-1;
124       c[i]  = c[i]-1;
125     }
126     ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr);
127     ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
128     o_a = old_a2;
129     o_j = old_j2;
130     o_i = old_i2;
131   } else {
132     o_a = old_a;
133     o_j = old_j;
134     o_i = old_i;
135   }
136 
137   /* ------------------------------------------------------------
138      Call Saad's ilutp() routine to generate the factorization
139      ------------------------------------------------------------
140   */
141 
142   ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr);
143   ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr);
144   ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr);
145 
146   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
147   if (sierr) {
148     switch (sierr) {
149       case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
150       case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
151       case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
152       case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
153       case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax);
154       default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
155     }
156   }
157 
158   ierr = PetscFree(w);CHKERRQ(ierr);
159   ierr = PetscFree(jw);CHKERRQ(ierr);
160 
161   /* ------------------------------------------------------------
162      Saad's routine gives the result in Modified Sparse Row (msr)
163      Convert to Compressed Sparse Row format (csr)
164      ------------------------------------------------------------
165   */
166 
167   ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr);
168   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr);
169 
170   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
171 
172   ierr = PetscFree(iwk);CHKERRQ(ierr);
173   ierr = PetscFree(wk);CHKERRQ(ierr);
174 
175   if (reorder) {
176     ierr = PetscFree(old_a2);CHKERRQ(ierr);
177     ierr = PetscFree(old_j2);CHKERRQ(ierr);
178     ierr = PetscFree(old_i2);CHKERRQ(ierr);
179   } else {
180     /* fix permutation of old_j that the factorization introduced */
181     for (i=old_i[0]; i<old_i[n]; i++) {
182       old_j[i-1] = iperm[old_j[i-1]-1];
183     }
184   }
185 
186   /* get rid of the shift to indices starting at 1 */
187   for (i=0; i<n+1; i++) {
188     old_i[i]--;
189   }
190   for (i=old_i[0];i<old_i[n];i++) {
191     old_j[i]--;
192   }
193 
194   /* Make the factored matrix 0-based */
195   for (i=0; i<n+1; i++) {
196     new_i[i]--;
197   }
198   for (i=new_i[0];i<new_i[n];i++) {
199     new_j[i]--;
200   }
201 
202   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
203   /*-- permute the right-hand-side and solution vectors           --*/
204   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
205   ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr);
206   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
207   for(i=0; i<n; i++) {
208     ordcol[i] = ic[iperm[i]-1];
209   };
210   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
211   ierr = ISDestroy(isicol);CHKERRQ(ierr);
212 
213   ierr = PetscFree(iperm);CHKERRQ(ierr);
214 
215   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr);
216   ierr = PetscFree(ordcol);CHKERRQ(ierr);
217 
218   /*----- put together the new matrix -----*/
219 
220   ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr);
221   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
222   ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr);
223   (*fact)->factor    = FACTOR_LU;
224   (*fact)->assembled = PETSC_TRUE;
225 
226   b = (Mat_SeqAIJ*)(*fact)->data;
227   ierr = PetscFree(b->imax);CHKERRQ(ierr);
228   b->sorted        = PETSC_FALSE;
229   b->singlemalloc  = PETSC_FALSE;
230   /* the next line frees the default space generated by the MatCreate() */
231   ierr             = PetscFree(b->a);CHKERRQ(ierr);
232   ierr             = PetscFree(b->ilen);CHKERRQ(ierr);
233   b->a             = new_a;
234   b->j             = new_j;
235   b->i             = new_i;
236   b->ilen          = 0;
237   b->imax          = 0;
238   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
239   b->row           = isirow;
240   b->col           = iscolf;
241   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
242   b->maxnz = b->nz = new_i[n];
243   ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
244   (*fact)->info.factor_mallocs = 0;
245 
246   ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
247 
248   /* check out for identical nodes. If found, use inode functions */
249   ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr);
250 
251   af = ((double)b->nz)/((double)a->nz) + .001;
252   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af);
253   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af);
254   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af);
255   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n");
256 
257   PetscFunctionReturn(0);
258 #endif
259 }
260 
261 /*
262     Factorization code for AIJ format.
263 */
264 #undef __FUNCT__
265 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
266 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
267 {
268   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
269   IS             isicol;
270   PetscErrorCode ierr;
271   PetscInt       *r,*ic,i,n = A->m,*ai = a->i,*aj = a->j;
272   PetscInt       *ainew,*ajnew,jmax,*fill,*ajtmp,nz;
273   PetscInt       *idnew,idx,row,m,fm,nnz,nzi,reallocs = 0,nzbd,*im;
274   PetscReal      f;
275 
276   PetscFunctionBegin;
277   if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
278   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
279   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
280   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
281 
282   /* get new row pointers */
283   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr);
284   ainew[0] = 0;
285   /* don't know how many column pointers are needed so estimate */
286   f = info->fill;
287   jmax  = (PetscInt)(f*ai[n]+1);
288   ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr);
289   /* fill is a linked list of nonzeros in active row */
290   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr);
291   im   = fill + n + 1;
292   /* idnew is location of diagonal in factor */
293   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&idnew);CHKERRQ(ierr);
294   idnew[0] = 0;
295 
296   for (i=0; i<n; i++) {
297     /* first copy previous fill into linked list */
298     nnz     = nz    = ai[r[i]+1] - ai[r[i]];
299     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
300     ajtmp   = aj + ai[r[i]];
301     fill[n] = n;
302     while (nz--) {
303       fm  = n;
304       idx = ic[*ajtmp++];
305       do {
306         m  = fm;
307         fm = fill[m];
308       } while (fm < idx);
309       fill[m]   = idx;
310       fill[idx] = fm;
311     }
312     row = fill[n];
313     while (row < i) {
314       ajtmp = ajnew + idnew[row] + 1;
315       nzbd  = 1 + idnew[row] - ainew[row];
316       nz    = im[row] - nzbd;
317       fm    = row;
318       while (nz-- > 0) {
319         idx = *ajtmp++ ;
320         nzbd++;
321         if (idx == i) im[row] = nzbd;
322         do {
323           m  = fm;
324           fm = fill[m];
325         } while (fm < idx);
326         if (fm != idx) {
327           fill[m]   = idx;
328           fill[idx] = fm;
329           fm        = idx;
330           nnz++;
331         }
332       }
333       row = fill[row];
334     }
335     /* copy new filled row into permanent storage */
336     ainew[i+1] = ainew[i] + nnz;
337     if (ainew[i+1] > jmax) {
338 
339       /* estimate how much additional space we will need */
340       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
341       /* just double the memory each time */
342       PetscInt maxadd = jmax;
343       /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */
344       if (maxadd < nnz) maxadd = (n-i)*(nnz+1);
345       jmax += maxadd;
346 
347       /* allocate a longer ajnew */
348       ierr  = PetscMalloc(jmax*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
349       ierr  = PetscMemcpy(ajtmp,ajnew,(ainew[i])*sizeof(PetscInt));CHKERRQ(ierr);
350       ierr  = PetscFree(ajnew);CHKERRQ(ierr);
351       ajnew = ajtmp;
352       reallocs++; /* count how many times we realloc */
353     }
354     ajtmp = ajnew + ainew[i];
355     fm    = fill[n];
356     nzi   = 0;
357     im[i] = nnz;
358     while (nnz--) {
359       if (fm < i) nzi++;
360       *ajtmp++ = fm ;
361       fm       = fill[fm];
362     }
363     idnew[i] = ainew[i] + nzi;
364   }
365   if (ai[n] != 0) {
366     PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
367     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
368     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af);
369     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af);
370     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n");
371   } else {
372     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n");
373   }
374 
375   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
376   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
377 
378   ierr = PetscFree(fill);CHKERRQ(ierr);
379 
380   /* put together the new matrix */
381   ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr);
382   ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr);
383   ierr = MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);CHKERRQ(ierr);
384   PetscLogObjectParent(*B,isicol);
385   b    = (Mat_SeqAIJ*)(*B)->data;
386   ierr = PetscFree(b->imax);CHKERRQ(ierr);
387   b->singlemalloc = PETSC_FALSE;
388   /* the next line frees the default space generated by the Create() */
389   ierr = PetscFree(b->a);CHKERRQ(ierr);
390   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
391   ierr          = PetscMalloc((ainew[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
392   b->j          = ajnew;
393   b->i          = ainew;
394   b->diag       = idnew;
395   b->ilen       = 0;
396   b->imax       = 0;
397   b->row        = isrow;
398   b->col        = iscol;
399   b->lu_damping        = info->damping;
400   b->lu_zeropivot      = info->zeropivot;
401   b->lu_shift          = info->shift;
402   b->lu_shift_fraction = info->shift_fraction;
403   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
404   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
405   b->icol       = isicol;
406   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
407   /* In b structure:  Free imax, ilen, old a, old j.
408      Allocate idnew, solve_work, new a, new j */
409   PetscLogObjectMemory(*B,(ainew[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));
410   b->maxnz = b->nz = ainew[n] ;
411 
412   (*B)->factor                 =  FACTOR_LU;
413   (*B)->info.factor_mallocs    = reallocs;
414   (*B)->info.fill_ratio_given  = f;
415   ierr = Mat_AIJ_CheckInode(*B,PETSC_FALSE);CHKERRQ(ierr);
416   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
417 
418   if (ai[n] != 0) {
419     (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
420   } else {
421     (*B)->info.fill_ratio_needed = 0.0;
422   }
423   PetscFunctionReturn(0);
424 }
425 /* ----------------------------------------------------------- */
426 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth);
427 
428 #undef __FUNCT__
429 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
430 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
431 {
432   Mat            C=*B;
433   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
434   IS             isrow = b->row,isicol = b->icol;
435   PetscErrorCode ierr;
436   PetscInt       *r,*ic,i,j,n=A->m,*ai=b->i,*aj=b->j;
437   PetscInt       *ajtmpold,*ajtmp,nz,row,*ics;
438   PetscInt       *diag_offset = b->diag,diag,*pj,nshift=0;
439   PetscScalar    *rtmp,*v,*pc,multiplier,*pv,*rtmps;
440   PetscReal      zeropivot=b->lu_zeropivot,rs,d;
441   PetscReal      row_shift,shift_fraction,shift_amount,shift_lo=0.,shift_hi=1.,shift_top=0.;
442   PetscTruth     lushift;
443   PetscReal      shift_pd=b->lu_shift,shift_nonzero=b->lu_damping;
444 
445   PetscFunctionBegin;
446   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
447   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
448   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
449   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
450   rtmps = rtmp; ics = ic;
451 
452   if (!a->diag) {
453     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
454   }
455   /* if both shift schemes are chosen by user, only use shift_pd */
456   if (shift_pd && shift_nonzero) shift_nonzero = 0.0;
457   if (shift_pd) { /* set shift_top=max{row_shift} */
458     PetscInt *aai = a->i,*ddiag = a->diag;
459     shift_top = 0;
460     for (i=0; i<n; i++) {
461       d = PetscAbsScalar((a->a)[ddiag[i]]);
462       /* calculate amt of shift needed for this row */
463       if (d<=0) {
464         row_shift = 0;
465       } else {
466         row_shift = -2*d;
467       }
468       v  = a->a+aai[i];
469       nz = aai[i+1] - aai[i];
470       for (j=0; j<nz; j++)
471 	row_shift += PetscAbsScalar(v[j]);
472       if (row_shift>shift_top) shift_top = row_shift;
473     }
474   }
475 
476   shift_fraction = b->lu_shift_fraction;
477   shift_amount   = 0;
478   do {
479     lushift = PETSC_FALSE;
480     for (i=0; i<n; i++){
481       nz    = ai[i+1] - ai[i];
482       ajtmp = aj + ai[i];
483       for  (j=0; j<nz; j++) rtmps[ajtmp[j]] = 0.0;
484 
485       /* load in initial (unfactored row) */
486       nz       = a->i[r[i]+1] - a->i[r[i]];
487       ajtmpold = a->j + a->i[r[i]];
488       v        = a->a + a->i[r[i]];
489       for (j=0; j<nz; j++) {
490         rtmp[ics[ajtmpold[j]]] = v[j];
491       }
492       rtmp[ics[r[i]]] += shift_amount; /* shift the diagonal of the matrix */
493 
494       row = *ajtmp++;
495       while  (row < i) {
496         pc = rtmp + row;
497         if (*pc != 0.0) {
498           pv         = b->a + diag_offset[row];
499           pj         = b->j + diag_offset[row] + 1;
500           multiplier = *pc / *pv++;
501           *pc        = multiplier;
502           nz         = ai[row+1] - diag_offset[row] - 1;
503           for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
504           PetscLogFlops(2*nz);
505         }
506         row = *ajtmp++;
507       }
508       /* finished row so stick it into b->a */
509       pv   = b->a + ai[i] ;
510       pj   = b->j + ai[i] ;
511       nz   = ai[i+1] - ai[i];
512       diag = diag_offset[i] - ai[i];
513       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
514       rs   = 0.0;
515       for (j=0; j<nz; j++) {
516         pv[j] = rtmps[pj[j]];
517         if (j != diag) rs += PetscAbsScalar(pv[j]);
518       }
519       /* shift the diagonals when zero pivot is detected */
520 #define MAX_NSHIFT 5
521       if (PetscAbsScalar(pv[diag]) <= zeropivot*rs && shift_nonzero){
522         /* force |diag(*B)| > zeropivot*rs */
523         if (!nshift){
524           shift_amount = shift_nonzero;
525         } else {
526           shift_amount *= 2.0;
527         }
528         lushift = PETSC_TRUE;
529         nshift++;
530         break;
531       } else if (PetscRealPart(pv[diag]) <= zeropivot*rs && shift_pd){
532         /* force *B to be diagonally dominant */
533 	if (nshift>MAX_NSHIFT) {
534 	  SETERRQ(PETSC_ERR_CONV_FAILED,"Unable to determine shift to enforce positive definite preconditioner");
535 	} else if (nshift==MAX_NSHIFT) {
536 	  shift_fraction = shift_hi;
537 	  lushift        = PETSC_FALSE;
538 	} else {
539 	  shift_lo = shift_fraction; shift_fraction = (shift_hi+shift_lo)/2.;
540 	  lushift  = PETSC_TRUE;
541 	}
542 	shift_amount = shift_fraction * shift_top;
543         nshift++;
544         break;
545       } else if (PetscAbsScalar(pv[diag]) <= zeropivot*rs){
546         SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(pv[diag]),zeropivot,rs);
547       }
548     }
549     if (shift_pd && !lushift && shift_fraction>0 && nshift<MAX_NSHIFT) {
550       /*
551        * if no shift in this attempt & shifting & started shifting & can refine,
552        * then try lower shift
553        */
554       shift_hi       = shift_fraction;
555       shift_fraction = (shift_hi+shift_lo)/2.;
556       shift_amount   = shift_fraction * shift_top;
557       lushift        = PETSC_TRUE;
558       nshift++;
559     }
560   } while (lushift);
561 
562   /* invert diagonal entries for simplier triangular solves */
563   for (i=0; i<n; i++) {
564     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
565   }
566 
567   ierr = PetscFree(rtmp);CHKERRQ(ierr);
568   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
569   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
570   C->factor = FACTOR_LU;
571   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
572   C->assembled = PETSC_TRUE;
573   PetscLogFlops(C->n);
574   if (nshift){
575     if (shift_nonzero) {
576       PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of shift_nonzero tries %D, shift_ amount%g\n",nshift,shift_amount);
577     } else if (shift_pd) {
578       b->lu_shift_fraction = shift_fraction;
579       PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: diagonal shifted up by %e fraction top_value %e number shifts %D\n",shift_fraction,shift_top,nshift);
580     }
581   }
582   PetscFunctionReturn(0);
583 }
584 
585 #undef __FUNCT__
586 #define __FUNCT__ "MatUsePETSc_SeqAIJ"
587 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A)
588 {
589   PetscFunctionBegin;
590   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
591   A->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
592   PetscFunctionReturn(0);
593 }
594 
595 
596 /* ----------------------------------------------------------- */
597 #undef __FUNCT__
598 #define __FUNCT__ "MatLUFactor_SeqAIJ"
599 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
600 {
601   PetscErrorCode ierr;
602   Mat            C;
603 
604   PetscFunctionBegin;
605   ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr);
606   ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr);
607   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
608   PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);
609   PetscFunctionReturn(0);
610 }
611 /* ----------------------------------------------------------- */
612 #undef __FUNCT__
613 #define __FUNCT__ "MatSolve_SeqAIJ"
614 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
615 {
616   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
617   IS             iscol = a->col,isrow = a->row;
618   PetscErrorCode ierr;
619   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
620   PetscInt       nz,*rout,*cout;
621   PetscScalar    *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
622 
623   PetscFunctionBegin;
624   if (!n) PetscFunctionReturn(0);
625 
626   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
627   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
628   tmp  = a->solve_work;
629 
630   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
631   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
632 
633   /* forward solve the lower triangular */
634   tmp[0] = b[*r++];
635   tmps   = tmp;
636   for (i=1; i<n; i++) {
637     v   = aa + ai[i] ;
638     vi  = aj + ai[i] ;
639     nz  = a->diag[i] - ai[i];
640     sum = b[*r++];
641     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
642     tmp[i] = sum;
643   }
644 
645   /* backward solve the upper triangular */
646   for (i=n-1; i>=0; i--){
647     v   = aa + a->diag[i] + 1;
648     vi  = aj + a->diag[i] + 1;
649     nz  = ai[i+1] - a->diag[i] - 1;
650     sum = tmp[i];
651     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
652     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
653   }
654 
655   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
656   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
657   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
658   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
659   PetscLogFlops(2*a->nz - A->n);
660   PetscFunctionReturn(0);
661 }
662 
663 /* ----------------------------------------------------------- */
664 #undef __FUNCT__
665 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
666 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
667 {
668   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
669   PetscErrorCode ierr;
670   PetscInt       n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag;
671   PetscScalar    *x,*b,*aa = a->a;
672 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
673   PetscInt       adiag_i,i,*vi,nz,ai_i;
674   PetscScalar    *v,sum;
675 #endif
676 
677   PetscFunctionBegin;
678   if (!n) PetscFunctionReturn(0);
679 
680   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
681   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
682 
683 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
684   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
685 #else
686   /* forward solve the lower triangular */
687   x[0] = b[0];
688   for (i=1; i<n; i++) {
689     ai_i = ai[i];
690     v    = aa + ai_i;
691     vi   = aj + ai_i;
692     nz   = adiag[i] - ai_i;
693     sum  = b[i];
694     while (nz--) sum -= *v++ * x[*vi++];
695     x[i] = sum;
696   }
697 
698   /* backward solve the upper triangular */
699   for (i=n-1; i>=0; i--){
700     adiag_i = adiag[i];
701     v       = aa + adiag_i + 1;
702     vi      = aj + adiag_i + 1;
703     nz      = ai[i+1] - adiag_i - 1;
704     sum     = x[i];
705     while (nz--) sum -= *v++ * x[*vi++];
706     x[i]    = sum*aa[adiag_i];
707   }
708 #endif
709   PetscLogFlops(2*a->nz - A->n);
710   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
711   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
712   PetscFunctionReturn(0);
713 }
714 
715 #undef __FUNCT__
716 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
717 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
718 {
719   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
720   IS             iscol = a->col,isrow = a->row;
721   PetscErrorCode ierr;
722   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
723   PetscInt       nz,*rout,*cout;
724   PetscScalar    *x,*b,*tmp,*aa = a->a,sum,*v;
725 
726   PetscFunctionBegin;
727   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
728 
729   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
730   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
731   tmp  = a->solve_work;
732 
733   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
734   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
735 
736   /* forward solve the lower triangular */
737   tmp[0] = b[*r++];
738   for (i=1; i<n; i++) {
739     v   = aa + ai[i] ;
740     vi  = aj + ai[i] ;
741     nz  = a->diag[i] - ai[i];
742     sum = b[*r++];
743     while (nz--) sum -= *v++ * tmp[*vi++ ];
744     tmp[i] = sum;
745   }
746 
747   /* backward solve the upper triangular */
748   for (i=n-1; i>=0; i--){
749     v   = aa + a->diag[i] + 1;
750     vi  = aj + a->diag[i] + 1;
751     nz  = ai[i+1] - a->diag[i] - 1;
752     sum = tmp[i];
753     while (nz--) sum -= *v++ * tmp[*vi++ ];
754     tmp[i] = sum*aa[a->diag[i]];
755     x[*c--] += tmp[i];
756   }
757 
758   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
759   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
760   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
761   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
762   PetscLogFlops(2*a->nz);
763 
764   PetscFunctionReturn(0);
765 }
766 /* -------------------------------------------------------------------*/
767 #undef __FUNCT__
768 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
769 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
770 {
771   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
772   IS             iscol = a->col,isrow = a->row;
773   PetscErrorCode ierr;
774   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
775   PetscInt       nz,*rout,*cout,*diag = a->diag;
776   PetscScalar    *x,*b,*tmp,*aa = a->a,*v,s1;
777 
778   PetscFunctionBegin;
779   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
780   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
781   tmp  = a->solve_work;
782 
783   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
784   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
785 
786   /* copy the b into temp work space according to permutation */
787   for (i=0; i<n; i++) tmp[i] = b[c[i]];
788 
789   /* forward solve the U^T */
790   for (i=0; i<n; i++) {
791     v   = aa + diag[i] ;
792     vi  = aj + diag[i] + 1;
793     nz  = ai[i+1] - diag[i] - 1;
794     s1  = tmp[i];
795     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
796     while (nz--) {
797       tmp[*vi++ ] -= (*v++)*s1;
798     }
799     tmp[i] = s1;
800   }
801 
802   /* backward solve the L^T */
803   for (i=n-1; i>=0; i--){
804     v   = aa + diag[i] - 1 ;
805     vi  = aj + diag[i] - 1 ;
806     nz  = diag[i] - ai[i];
807     s1  = tmp[i];
808     while (nz--) {
809       tmp[*vi-- ] -= (*v--)*s1;
810     }
811   }
812 
813   /* copy tmp into x according to permutation */
814   for (i=0; i<n; i++) x[r[i]] = tmp[i];
815 
816   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
817   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
818   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
819   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
820 
821   PetscLogFlops(2*a->nz-A->n);
822   PetscFunctionReturn(0);
823 }
824 
825 #undef __FUNCT__
826 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
827 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
828 {
829   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
830   IS             iscol = a->col,isrow = a->row;
831   PetscErrorCode ierr;
832   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
833   PetscInt       nz,*rout,*cout,*diag = a->diag;
834   PetscScalar    *x,*b,*tmp,*aa = a->a,*v;
835 
836   PetscFunctionBegin;
837   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
838 
839   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
840   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
841   tmp = a->solve_work;
842 
843   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
844   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
845 
846   /* copy the b into temp work space according to permutation */
847   for (i=0; i<n; i++) tmp[i] = b[c[i]];
848 
849   /* forward solve the U^T */
850   for (i=0; i<n; i++) {
851     v   = aa + diag[i] ;
852     vi  = aj + diag[i] + 1;
853     nz  = ai[i+1] - diag[i] - 1;
854     tmp[i] *= *v++;
855     while (nz--) {
856       tmp[*vi++ ] -= (*v++)*tmp[i];
857     }
858   }
859 
860   /* backward solve the L^T */
861   for (i=n-1; i>=0; i--){
862     v   = aa + diag[i] - 1 ;
863     vi  = aj + diag[i] - 1 ;
864     nz  = diag[i] - ai[i];
865     while (nz--) {
866       tmp[*vi-- ] -= (*v--)*tmp[i];
867     }
868   }
869 
870   /* copy tmp into x according to permutation */
871   for (i=0; i<n; i++) x[r[i]] += tmp[i];
872 
873   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
874   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
875   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
876   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
877 
878   PetscLogFlops(2*a->nz);
879   PetscFunctionReturn(0);
880 }
881 /* ----------------------------------------------------------------*/
882 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat);
883 
884 #undef __FUNCT__
885 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
886 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
887 {
888   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
889   IS             isicol;
890   PetscErrorCode ierr;
891   PetscInt       *r,*ic,prow,n = A->m,*ai = a->i,*aj = a->j;
892   PetscInt       *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev;
893   PetscInt       *dloc,idx,row,m,fm,nzf,nzi,len, reallocs = 0,dcount = 0;
894   PetscInt       incrlev,nnz,i,levels,diagonal_fill;
895   PetscTruth     col_identity,row_identity;
896   PetscReal      f;
897 
898   PetscFunctionBegin;
899   f             = info->fill;
900   levels        = (PetscInt)info->levels;
901   diagonal_fill = (PetscInt)info->diagonal_fill;
902   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
903 
904   /* special case that simply copies fill pattern */
905   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
906   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
907   if (!levels && row_identity && col_identity) {
908     ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
909     (*fact)->factor = FACTOR_LU;
910     b               = (Mat_SeqAIJ*)(*fact)->data;
911     if (!b->diag) {
912       ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
913     }
914     ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
915     b->row              = isrow;
916     b->col              = iscol;
917     b->icol             = isicol;
918     b->lu_damping       = info->damping;
919     b->lu_zeropivot     = info->zeropivot;
920     b->lu_shift         = info->shift;
921     b->lu_shift_fraction= info->shift_fraction;
922     ierr                = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
923     (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
924     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
925     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
926     PetscFunctionReturn(0);
927   }
928 
929   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
930   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
931 
932   /* get new row pointers */
933   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr);
934   ainew[0] = 0;
935   /* don't know how many column pointers are needed so estimate */
936   jmax = (PetscInt)(f*(ai[n]+1));
937   ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr);
938   /* ajfill is level of fill for each fill entry */
939   ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajfill);CHKERRQ(ierr);
940   /* fill is a linked list of nonzeros in active row */
941   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr);
942   /* im is level for each filled value */
943   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&im);CHKERRQ(ierr);
944   /* dloc is location of diagonal in factor */
945   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&dloc);CHKERRQ(ierr);
946   dloc[0]  = 0;
947   for (prow=0; prow<n; prow++) {
948 
949     /* copy current row into linked list */
950     nzf     = nz  = ai[r[prow]+1] - ai[r[prow]];
951     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
952     xi      = aj + ai[r[prow]];
953     fill[n] = n;
954     fill[prow] = -1; /* marker to indicate if diagonal exists */
955     while (nz--) {
956       fm  = n;
957       idx = ic[*xi++ ];
958       do {
959         m  = fm;
960         fm = fill[m];
961       } while (fm < idx);
962       fill[m]   = idx;
963       fill[idx] = fm;
964       im[idx]   = 0;
965     }
966 
967     /* make sure diagonal entry is included */
968     if (diagonal_fill && fill[prow] == -1) {
969       fm = n;
970       while (fill[fm] < prow) fm = fill[fm];
971       fill[prow] = fill[fm]; /* insert diagonal into linked list */
972       fill[fm]   = prow;
973       im[prow]   = 0;
974       nzf++;
975       dcount++;
976     }
977 
978     nzi = 0;
979     row = fill[n];
980     while (row < prow) {
981       incrlev = im[row] + 1;
982       nz      = dloc[row];
983       xi      = ajnew  + ainew[row]  + nz + 1;
984       flev    = ajfill + ainew[row]  + nz + 1;
985       nnz     = ainew[row+1] - ainew[row] - nz - 1;
986       fm      = row;
987       while (nnz-- > 0) {
988         idx = *xi++ ;
989         if (*flev + incrlev > levels) {
990           flev++;
991           continue;
992         }
993         do {
994           m  = fm;
995           fm = fill[m];
996         } while (fm < idx);
997         if (fm != idx) {
998           im[idx]   = *flev + incrlev;
999           fill[m]   = idx;
1000           fill[idx] = fm;
1001           fm        = idx;
1002           nzf++;
1003         } else {
1004           if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev;
1005         }
1006         flev++;
1007       }
1008       row = fill[row];
1009       nzi++;
1010     }
1011     /* copy new filled row into permanent storage */
1012     ainew[prow+1] = ainew[prow] + nzf;
1013     if (ainew[prow+1] > jmax) {
1014 
1015       /* estimate how much additional space we will need */
1016       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
1017       /* just double the memory each time */
1018       /*  maxadd = (PetscInt)((f*(ai[n]+!shift)*(n-prow+5))/n); */
1019       PetscInt maxadd = jmax;
1020       if (maxadd < nzf) maxadd = (n-prow)*(nzf+1);
1021       jmax += maxadd;
1022 
1023       /* allocate a longer ajnew and ajfill */
1024       ierr   = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr);
1025       ierr   = PetscMemcpy(xi,ajnew,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr);
1026       ierr   = PetscFree(ajnew);CHKERRQ(ierr);
1027       ajnew  = xi;
1028       ierr   = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr);
1029       ierr   = PetscMemcpy(xi,ajfill,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr);
1030       ierr   = PetscFree(ajfill);CHKERRQ(ierr);
1031       ajfill = xi;
1032       reallocs++; /* count how many times we realloc */
1033     }
1034     xi          = ajnew + ainew[prow] ;
1035     flev        = ajfill + ainew[prow] ;
1036     dloc[prow]  = nzi;
1037     fm          = fill[n];
1038     while (nzf--) {
1039       *xi++   = fm ;
1040       *flev++ = im[fm];
1041       fm      = fill[fm];
1042     }
1043     /* make sure row has diagonal entry */
1044     if (ajnew[ainew[prow]+dloc[prow]] != prow) {
1045       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
1046     try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow);
1047     }
1048   }
1049   ierr = PetscFree(ajfill);CHKERRQ(ierr);
1050   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1051   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1052   ierr = PetscFree(fill);CHKERRQ(ierr);
1053   ierr = PetscFree(im);CHKERRQ(ierr);
1054 
1055   {
1056     PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
1057     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
1058     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af);
1059     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af);
1060     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n");
1061     if (diagonal_fill) {
1062       PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount);
1063     }
1064   }
1065 
1066   /* put together the new matrix */
1067   ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr);
1068   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
1069   ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr);
1070   PetscLogObjectParent(*fact,isicol);
1071   b = (Mat_SeqAIJ*)(*fact)->data;
1072   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1073   b->singlemalloc = PETSC_FALSE;
1074   len = (ainew[n] )*sizeof(PetscScalar);
1075   /* the next line frees the default space generated by the Create() */
1076   ierr = PetscFree(b->a);CHKERRQ(ierr);
1077   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1078   ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr);
1079   b->j          = ajnew;
1080   b->i          = ainew;
1081   for (i=0; i<n; i++) dloc[i] += ainew[i];
1082   b->diag       = dloc;
1083   b->ilen       = 0;
1084   b->imax       = 0;
1085   b->row        = isrow;
1086   b->col        = iscol;
1087   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1088   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1089   b->icol       = isicol;
1090   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1091   /* In b structure:  Free imax, ilen, old a, old j.
1092      Allocate dloc, solve_work, new a, new j */
1093   PetscLogObjectMemory(*fact,(ainew[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));
1094   b->maxnz             = b->nz = ainew[n] ;
1095   b->lu_damping        = info->damping;
1096   b->lu_shift          = info->shift;
1097   b->lu_shift_fraction = info->shift_fraction;
1098   b->lu_zeropivot = info->zeropivot;
1099   (*fact)->factor = FACTOR_LU;
1100   ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr);
1101   (*fact)->ops->lufactornumeric =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
1102 
1103   (*fact)->info.factor_mallocs    = reallocs;
1104   (*fact)->info.fill_ratio_given  = f;
1105   (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]);
1106   PetscFunctionReturn(0);
1107 }
1108 
1109 #include "src/mat/impls/sbaij/seq/sbaij.h"
1110 #undef __FUNCT__
1111 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
1112 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
1113 {
1114   Mat            C = *B;
1115   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1116   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
1117   IS             ip=b->row;
1118   PetscErrorCode ierr;
1119   PetscInt       *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol;
1120   PetscInt       *ai=a->i,*aj=a->j;
1121   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1122   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1123   PetscReal      zeropivot=b->factor_zeropivot,shift_amount,rs;
1124   PetscTruth     chshift;
1125   PetscInt       nshift=0;
1126   PetscReal      shift_pd=b->factor_shift,shift_nonzero=b->factor_damping;
1127 
1128   PetscFunctionBegin;
1129   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
1130 
1131   /* initialization */
1132   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1133   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
1134   jl   = il + mbs;
1135   rtmp = (MatScalar*)(jl + mbs);
1136 
1137   shift_amount = 0;
1138   do {
1139     chshift = PETSC_FALSE;
1140     for (i=0; i<mbs; i++) {
1141       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1142     }
1143 
1144     for (k = 0; k<mbs; k++){
1145       bval = ba + bi[k];
1146       /* initialize k-th row by the perm[k]-th row of A */
1147       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1148       for (j = jmin; j < jmax; j++){
1149         col = rip[aj[j]];
1150         if (col >= k){ /* only take upper triangular entry */
1151           rtmp[col] = aa[j];
1152           *bval++  = 0.0; /* for in-place factorization */
1153         }
1154       }
1155       /* shift the diagonal of the matrix */
1156       if (nshift) rtmp[k] += shift_amount;
1157 
1158       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1159       dk = rtmp[k];
1160       i = jl[k]; /* first row to be added to k_th row  */
1161 
1162       while (i < k){
1163         nexti = jl[i]; /* next row to be added to k_th row */
1164 
1165         /* compute multiplier, update diag(k) and U(i,k) */
1166         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1167         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
1168         dk += uikdi*ba[ili];
1169         ba[ili] = uikdi; /* -U(i,k) */
1170 
1171         /* add multiple of row i to k-th row */
1172         jmin = ili + 1; jmax = bi[i+1];
1173         if (jmin < jmax){
1174           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1175           /* update il and jl for row i */
1176           il[i] = jmin;
1177           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1178         }
1179         i = nexti;
1180       }
1181 
1182       /* shift the diagonals when zero pivot is detected */
1183       /* compute rs=sum of abs(off-diagonal) */
1184       rs   = 0.0;
1185       jmin = bi[k]+1;
1186       nz   = bi[k+1] - jmin;
1187       if (nz){
1188         bcol = bj + jmin;
1189         while (nz--){
1190           rs += PetscAbsScalar(rtmp[*bcol++]);
1191         }
1192       }
1193       if (PetscAbsScalar(dk) <= zeropivot*rs && shift_nonzero){
1194         /* force |diag(*B)| > zeropivot*rs */
1195         if (!nshift){
1196           shift_amount = shift_nonzero;
1197         } else {
1198           shift_amount *= 2.0;
1199         }
1200         chshift = PETSC_TRUE;
1201         nshift++;
1202         break;
1203       } else if (PetscRealPart(dk) <= zeropivot*rs && shift_pd){
1204         /* calculate a shift that would make this row diagonally dominant */
1205 	shift_amount = PetscMax(rs+PetscAbs(PetscRealPart(dk)),1.1*shift_amount);
1206 	chshift      = PETSC_TRUE;
1207 	/* Unlike in the ILU case there is no exit condition on nshift:
1208 	   we increase the shift until it converges. There is no guarantee that
1209 	   this algorithm converges faster or slower, or is better or worse
1210 	   than the ILU algorithm. */
1211 	nshift++;
1212 	break;
1213       } else if (PetscAbsScalar(dk) <= zeropivot*rs){
1214         SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs);
1215       }
1216 
1217       /* copy data into U(k,:) */
1218       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1219       jmin = bi[k]+1; jmax = bi[k+1];
1220       if (jmin < jmax) {
1221         for (j=jmin; j<jmax; j++){
1222           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1223         }
1224         /* add the k-th row into il and jl */
1225         il[k] = jmin;
1226         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1227       }
1228     }
1229   } while (chshift);
1230   ierr = PetscFree(il);CHKERRQ(ierr);
1231 
1232   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
1233   C->factor       = FACTOR_CHOLESKY;
1234   C->assembled    = PETSC_TRUE;
1235   C->preallocated = PETSC_TRUE;
1236   PetscLogFlops(C->m);
1237   if (nshift){
1238     if (shift_nonzero) {
1239       PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shift_nonzero tries %D, shift_amount %g\n",nshift,shift_amount);
1240     } else if (shift_pd) {
1241       PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shift_pd tries %D, shift_amount %g\n",nshift,shift_amount);
1242     }
1243   }
1244   PetscFunctionReturn(0);
1245 }
1246 
1247 #include "petscbt.h"
1248 #include "src/mat/utils/freespace.h"
1249 #undef __FUNCT__
1250 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
1251 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1252 {
1253   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1254   Mat_SeqSBAIJ   *b;
1255   Mat            B;
1256   PetscErrorCode ierr;
1257   PetscTruth     perm_identity;
1258   PetscInt       reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui;
1259   PetscInt       jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1260   PetscInt       nlnk,*lnk,*lnk_lvl,ncols,ncols_upper,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr;
1261   PetscReal      fill=info->fill,levels=info->levels;
1262   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1263   FreeSpaceList  free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1264   PetscBT        lnkbt;
1265 
1266   PetscFunctionBegin;
1267   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1268   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1269 
1270   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1271   ui[0] = 0;
1272 
1273   /* special case that simply copies fill pattern */
1274   if (!levels && perm_identity) {
1275     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
1276     for (i=0; i<am; i++) {
1277       ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1278     }
1279     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1280     cols = uj;
1281     for (i=0; i<am; i++) {
1282       aj    = a->j + a->diag[i];
1283       ncols = ui[i+1] - ui[i];
1284       for (j=0; j<ncols; j++) *cols++ = *aj++;
1285     }
1286   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1287     /* initialization */
1288     ierr  = PetscMalloc((2*am+1)*sizeof(PetscInt),&cols_lvl);CHKERRQ(ierr);
1289 
1290     /* jl: linked list for storing indices of the pivot rows
1291        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1292     ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr);
1293     il         = jl + am;
1294     uj_ptr     = (PetscInt**)(il + am);
1295     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1296     for (i=0; i<am; i++){
1297       jl[i] = am; il[i] = 0;
1298     }
1299 
1300     /* create and initialize a linked list for storing column indices of the active row k */
1301     nlnk = am + 1;
1302     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1303 
1304     /* initial FreeSpace size is fill*(ai[am]+1) */
1305     ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1306     current_space = free_space;
1307     ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
1308     current_space_lvl = free_space_lvl;
1309 
1310     for (k=0; k<am; k++){  /* for each active row k */
1311       /* initialize lnk by the column indices of row rip[k] of A */
1312       nzk   = 0;
1313       ncols = ai[rip[k]+1] - ai[rip[k]];
1314       ncols_upper = 0;
1315       cols        = cols_lvl + am;
1316       for (j=0; j<ncols; j++){
1317         i = rip[*(aj + ai[rip[k]] + j)];
1318         if (i >= k){ /* only take upper triangular entry */
1319           cols[ncols_upper] = i;
1320           cols_lvl[ncols_upper] = -1;  /* initialize level for nonzero entries */
1321           ncols_upper++;
1322         }
1323       }
1324       ierr = PetscIncompleteLLAdd(ncols_upper,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1325       nzk += nlnk;
1326 
1327       /* update lnk by computing fill-in for each pivot row to be merged in */
1328       prow = jl[k]; /* 1st pivot row */
1329 
1330       while (prow < k){
1331         nextprow = jl[prow];
1332 
1333         /* merge prow into k-th row */
1334         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1335         jmax = ui[prow+1];
1336         ncols = jmax-jmin;
1337         i     = jmin - ui[prow];
1338         cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1339         for (j=0; j<ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + i + j);
1340         ierr = PetscIncompleteLLAdd(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1341         nzk += nlnk;
1342 
1343         /* update il and jl for prow */
1344         if (jmin < jmax){
1345           il[prow] = jmin;
1346           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1347         }
1348         prow = nextprow;
1349       }
1350 
1351       /* if free space is not available, make more free space */
1352       if (current_space->local_remaining<nzk) {
1353         i = am - k + 1; /* num of unfactored rows */
1354         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1355         ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
1356         ierr = GetMoreSpace(i,&current_space_lvl);CHKERRQ(ierr);
1357         reallocs++;
1358       }
1359 
1360       /* copy data into free_space and free_space_lvl, then initialize lnk */
1361       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1362 
1363       /* add the k-th row into il and jl */
1364       if (nzk > 1){
1365         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1366         jl[k] = jl[i]; jl[i] = k;
1367         il[k] = ui[k] + 1;
1368       }
1369       uj_ptr[k]     = current_space->array;
1370       uj_lvl_ptr[k] = current_space_lvl->array;
1371 
1372       current_space->array           += nzk;
1373       current_space->local_used      += nzk;
1374       current_space->local_remaining -= nzk;
1375 
1376       current_space_lvl->array           += nzk;
1377       current_space_lvl->local_used      += nzk;
1378       current_space_lvl->local_remaining -= nzk;
1379 
1380       ui[k+1] = ui[k] + nzk;
1381     }
1382 
1383     if (ai[am] != 0) {
1384       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1385       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af);
1386       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af);
1387       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af);
1388     } else {
1389       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n");
1390     }
1391 
1392     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1393     ierr = PetscFree(jl);CHKERRQ(ierr);
1394     ierr = PetscFree(cols_lvl);CHKERRQ(ierr);
1395 
1396     /* destroy list of free space and other temporary array(s) */
1397     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1398     ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1399     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1400     ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr);
1401 
1402   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
1403 
1404   /* put together the new matrix in MATSEQSBAIJ format */
1405   ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr);
1406   B = *fact;
1407   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1408   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
1409 
1410   b    = (Mat_SeqSBAIJ*)B->data;
1411   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1412   b->singlemalloc = PETSC_FALSE;
1413   /* the next line frees the default space generated by the Create() */
1414   ierr = PetscFree(b->a);CHKERRQ(ierr);
1415   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1416   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1417   b->j    = uj;
1418   b->i    = ui;
1419   b->diag = 0;
1420   b->ilen = 0;
1421   b->imax = 0;
1422   b->row  = perm;
1423   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1424   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1425   b->icol = perm;
1426   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1427   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1428   PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
1429   b->maxnz = b->nz = ui[am];
1430 
1431   B->factor                 = FACTOR_CHOLESKY;
1432   B->info.factor_mallocs    = reallocs;
1433   B->info.fill_ratio_given  = fill;
1434   if (ai[am] != 0) {
1435     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1436   } else {
1437     B->info.fill_ratio_needed = 0.0;
1438   }
1439 
1440   b->factor_zeropivot      = info->zeropivot;
1441   b->factor_damping        = info->damping;
1442   b->factor_shift          = info->shift;
1443   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1444   if (perm_identity){
1445     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1446     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1447   }
1448   PetscFunctionReturn(0);
1449 }
1450 
1451 #undef __FUNCT__
1452 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
1453 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1454 {
1455   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1456   Mat_SeqSBAIJ   *b;
1457   Mat            B;
1458   PetscErrorCode ierr;
1459   PetscTruth     perm_identity;
1460   PetscReal      fill = info->fill;
1461   PetscInt       *rip,*riip,i,mbs=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow;
1462   PetscInt       *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1463   PetscInt       nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1464   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1465   PetscBT        lnkbt;
1466   IS             iperm;
1467 
1468   PetscFunctionBegin;
1469   /* check whether perm is the identity mapping */
1470   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1471   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1472 
1473   if (!perm_identity){
1474     /* check if perm is symmetric! */
1475     ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1476     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1477     for (i=0; i<mbs; i++) {
1478       if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation");
1479     }
1480     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1481     ierr = ISDestroy(iperm);CHKERRQ(ierr);
1482   }
1483 
1484   /* initialization */
1485   ierr  = PetscMalloc((mbs+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1486   ui[0] = 0;
1487 
1488   /* jl: linked list for storing indices of the pivot rows
1489      il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
1490   ierr = PetscMalloc((3*mbs+1)*sizeof(PetscInt)+mbs*sizeof(PetscInt*),&jl);CHKERRQ(ierr);
1491   il     = jl + mbs;
1492   cols   = il + mbs;
1493   ui_ptr = (PetscInt**)(cols + mbs);
1494   for (i=0; i<mbs; i++){
1495     jl[i] = mbs; il[i] = 0;
1496   }
1497 
1498   /* create and initialize a linked list for storing column indices of the active row k */
1499   nlnk = mbs + 1;
1500   ierr = PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1501 
1502   /* initial FreeSpace size is fill*(ai[mbs]+1) */
1503   ierr = GetMoreSpace((PetscInt)(fill*(ai[mbs]+1)),&free_space);CHKERRQ(ierr);
1504   current_space = free_space;
1505 
1506   for (k=0; k<mbs; k++){  /* for each active row k */
1507     /* initialize lnk by the column indices of row rip[k] of A */
1508     nzk   = 0;
1509     ncols = ai[rip[k]+1] - ai[rip[k]];
1510     ncols_upper = 0;
1511     for (j=0; j<ncols; j++){
1512       i = rip[*(aj + ai[rip[k]] + j)];
1513       if (i >= k){ /* only take upper triangular entry */
1514         cols[ncols_upper] = i;
1515         ncols_upper++;
1516       }
1517     }
1518     ierr = PetscLLAdd(ncols_upper,cols,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1519     nzk += nlnk;
1520 
1521     /* update lnk by computing fill-in for each pivot row to be merged in */
1522     prow = jl[k]; /* 1st pivot row */
1523 
1524     while (prow < k){
1525       nextprow = jl[prow];
1526       /* merge prow into k-th row */
1527       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
1528       jmax = ui[prow+1];
1529       ncols = jmax-jmin;
1530       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
1531       ierr = PetscLLAdd(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1532       nzk += nlnk;
1533 
1534       /* update il and jl for prow */
1535       if (jmin < jmax){
1536         il[prow] = jmin;
1537         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1538       }
1539       prow = nextprow;
1540     }
1541 
1542     /* if free space is not available, make more free space */
1543     if (current_space->local_remaining<nzk) {
1544       i = mbs - k + 1; /* num of unfactored rows */
1545       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1546       ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
1547       reallocs++;
1548     }
1549 
1550     /* copy data into free space, then initialize lnk */
1551     ierr = PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1552 
1553     /* add the k-th row into il and jl */
1554     if (nzk-1 > 0){
1555       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
1556       jl[k] = jl[i]; jl[i] = k;
1557       il[k] = ui[k] + 1;
1558     }
1559     ui_ptr[k] = current_space->array;
1560     current_space->array           += nzk;
1561     current_space->local_used      += nzk;
1562     current_space->local_remaining -= nzk;
1563 
1564     ui[k+1] = ui[k] + nzk;
1565   }
1566 
1567   if (ai[mbs] != 0) {
1568     PetscReal af = ((PetscReal)(2*ui[mbs]-mbs))/((PetscReal)ai[mbs]);
1569     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af);
1570     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af);
1571     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af);
1572   } else {
1573      PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n");
1574   }
1575 
1576   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1577   ierr = PetscFree(jl);CHKERRQ(ierr);
1578 
1579   /* destroy list of free space and other temporary array(s) */
1580   ierr = PetscMalloc((ui[mbs]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1581   ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1582   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1583 
1584   /* put together the new matrix in MATSEQSBAIJ format */
1585   ierr = MatCreate(PETSC_COMM_SELF,mbs,mbs,mbs,mbs,fact);CHKERRQ(ierr);
1586   B    = *fact;
1587   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1588   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
1589 
1590   b = (Mat_SeqSBAIJ*)B->data;
1591   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1592   b->singlemalloc = PETSC_FALSE;
1593   /* the next line frees the default space generated by the Create() */
1594   ierr = PetscFree(b->a);CHKERRQ(ierr);
1595   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1596   ierr = PetscMalloc((ui[mbs]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1597   b->j    = uj;
1598   b->i    = ui;
1599   b->diag = 0;
1600   b->ilen = 0;
1601   b->imax = 0;
1602   b->row  = perm;
1603   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1604   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1605   b->icol = perm;
1606   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1607   ierr    = PetscMalloc((mbs+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1608   PetscLogObjectMemory(B,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));
1609   b->maxnz = b->nz = ui[mbs];
1610 
1611   B->factor                 = FACTOR_CHOLESKY;
1612   B->info.factor_mallocs    = reallocs;
1613   B->info.fill_ratio_given  = fill;
1614   if (ai[mbs] != 0) {
1615     B->info.fill_ratio_needed = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]);
1616   } else {
1617     B->info.fill_ratio_needed = 0.0;
1618   }
1619   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1620   if (perm_identity){
1621     (*fact)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1622     (*fact)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1623   }
1624   PetscFunctionReturn(0);
1625 }
1626