xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision e24cfe645c937aecbdecb5c9899a85396cdbd15f)
1 
2 #include "src/mat/impls/aij/seq/aij.h"
3 #include "src/inline/dot.h"
4 #include "src/inline/spops.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ"
8 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
9 {
10   PetscFunctionBegin;
11 
12   SETERRQ(PETSC_ERR_SUP,"Code not written");
13 #if !defined(PETSC_USE_DEBUG)
14   PetscFunctionReturn(0);
15 #endif
16 }
17 
18 
19 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat);
20 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth);
21 
22 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
23 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
24 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*);
25 
26 #undef __FUNCT__
27 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
28   /* ------------------------------------------------------------
29 
30           This interface was contribed by Tony Caola
31 
32      This routine is an interface to the pivoting drop-tolerance
33      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
34      SPARSEKIT2.
35 
36      The SPARSEKIT2 routines used here are covered by the GNU
37      copyright; see the file gnu in this directory.
38 
39      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
40      help in getting this routine ironed out.
41 
42      The major drawback to this routine is that if info->fill is
43      not large enough it fails rather than allocating more space;
44      this can be fixed by hacking/improving the f2c version of
45      Yousef Saad's code.
46 
47      ------------------------------------------------------------
48 */
49 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact)
50 {
51 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
52   PetscFunctionBegin;
53   SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
54   You can obtain the drop tolerance routines by installing PETSc from\n\
55   www.mcs.anl.gov/petsc\n");
56 #else
57   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
58   IS             iscolf,isicol,isirow;
59   PetscTruth     reorder;
60   PetscErrorCode ierr,sierr;
61   PetscInt       *c,*r,*ic,i,n = A->m;
62   PetscInt       *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
63   PetscInt       *ordcol,*iwk,*iperm,*jw;
64   PetscInt       jmax,lfill,job,*o_i,*o_j;
65   PetscScalar    *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
66   PetscReal      af;
67 
68   PetscFunctionBegin;
69 
70   if (info->dt == PETSC_DEFAULT)      info->dt      = .005;
71   if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax);
72   if (info->dtcol == PETSC_DEFAULT)   info->dtcol   = .01;
73   if (info->fill == PETSC_DEFAULT)    info->fill    = ((double)(n*(info->dtcount+1)))/a->nz;
74   lfill   = (PetscInt)(info->dtcount/2.0);
75   jmax    = (PetscInt)(info->fill*a->nz);
76 
77 
78   /* ------------------------------------------------------------
79      If reorder=.TRUE., then the original matrix has to be
80      reordered to reflect the user selected ordering scheme, and
81      then de-reordered so it is in it's original format.
82      Because Saad's dperm() is NOT in place, we have to copy
83      the original matrix and allocate more storage. . .
84      ------------------------------------------------------------
85   */
86 
87   /* set reorder to true if either isrow or iscol is not identity */
88   ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr);
89   if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);}
90   reorder = PetscNot(reorder);
91 
92 
93   /* storage for ilu factor */
94   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr);
95   ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr);
96   ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr);
97   ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr);
98 
99   /* ------------------------------------------------------------
100      Make sure that everything is Fortran formatted (1-Based)
101      ------------------------------------------------------------
102   */
103   for (i=old_i[0];i<old_i[n];i++) {
104     old_j[i]++;
105   }
106   for(i=0;i<n+1;i++) {
107     old_i[i]++;
108   };
109 
110 
111   if (reorder) {
112     ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr);
113     ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
114     for(i=0;i<n;i++) {
115       r[i]  = r[i]+1;
116       c[i]  = c[i]+1;
117     }
118     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr);
119     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr);
120     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr);
121     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
122     for (i=0;i<n;i++) {
123       r[i]  = r[i]-1;
124       c[i]  = c[i]-1;
125     }
126     ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr);
127     ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
128     o_a = old_a2;
129     o_j = old_j2;
130     o_i = old_i2;
131   } else {
132     o_a = old_a;
133     o_j = old_j;
134     o_i = old_i;
135   }
136 
137   /* ------------------------------------------------------------
138      Call Saad's ilutp() routine to generate the factorization
139      ------------------------------------------------------------
140   */
141 
142   ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr);
143   ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr);
144   ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr);
145 
146   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
147   if (sierr) {
148     switch (sierr) {
149       case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
150       case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
151       case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
152       case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
153       case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax);
154       default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
155     }
156   }
157 
158   ierr = PetscFree(w);CHKERRQ(ierr);
159   ierr = PetscFree(jw);CHKERRQ(ierr);
160 
161   /* ------------------------------------------------------------
162      Saad's routine gives the result in Modified Sparse Row (msr)
163      Convert to Compressed Sparse Row format (csr)
164      ------------------------------------------------------------
165   */
166 
167   ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr);
168   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr);
169 
170   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
171 
172   ierr = PetscFree(iwk);CHKERRQ(ierr);
173   ierr = PetscFree(wk);CHKERRQ(ierr);
174 
175   if (reorder) {
176     ierr = PetscFree(old_a2);CHKERRQ(ierr);
177     ierr = PetscFree(old_j2);CHKERRQ(ierr);
178     ierr = PetscFree(old_i2);CHKERRQ(ierr);
179   } else {
180     /* fix permutation of old_j that the factorization introduced */
181     for (i=old_i[0]; i<old_i[n]; i++) {
182       old_j[i-1] = iperm[old_j[i-1]-1];
183     }
184   }
185 
186   /* get rid of the shift to indices starting at 1 */
187   for (i=0; i<n+1; i++) {
188     old_i[i]--;
189   }
190   for (i=old_i[0];i<old_i[n];i++) {
191     old_j[i]--;
192   }
193 
194   /* Make the factored matrix 0-based */
195   for (i=0; i<n+1; i++) {
196     new_i[i]--;
197   }
198   for (i=new_i[0];i<new_i[n];i++) {
199     new_j[i]--;
200   }
201 
202   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
203   /*-- permute the right-hand-side and solution vectors           --*/
204   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
205   ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr);
206   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
207   for(i=0; i<n; i++) {
208     ordcol[i] = ic[iperm[i]-1];
209   };
210   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
211   ierr = ISDestroy(isicol);CHKERRQ(ierr);
212 
213   ierr = PetscFree(iperm);CHKERRQ(ierr);
214 
215   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr);
216   ierr = PetscFree(ordcol);CHKERRQ(ierr);
217 
218   /*----- put together the new matrix -----*/
219 
220   ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr);
221   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
222   ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr);
223   (*fact)->factor    = FACTOR_LU;
224   (*fact)->assembled = PETSC_TRUE;
225 
226   b = (Mat_SeqAIJ*)(*fact)->data;
227   ierr = PetscFree(b->imax);CHKERRQ(ierr);
228   b->sorted        = PETSC_FALSE;
229   b->singlemalloc  = PETSC_FALSE;
230   /* the next line frees the default space generated by the MatCreate() */
231   ierr             = PetscFree(b->a);CHKERRQ(ierr);
232   ierr             = PetscFree(b->ilen);CHKERRQ(ierr);
233   b->a             = new_a;
234   b->j             = new_j;
235   b->i             = new_i;
236   b->ilen          = 0;
237   b->imax          = 0;
238   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
239   b->row           = isirow;
240   b->col           = iscolf;
241   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
242   b->maxnz = b->nz = new_i[n];
243   ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
244   (*fact)->info.factor_mallocs = 0;
245 
246   ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
247 
248   /* check out for identical nodes. If found, use inode functions */
249   ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr);
250 
251   af = ((double)b->nz)/((double)a->nz) + .001;
252   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af);
253   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af);
254   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af);
255   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n");
256 
257   PetscFunctionReturn(0);
258 #endif
259 }
260 
261 /*
262     Factorization code for AIJ format.
263 */
264 #undef __FUNCT__
265 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
266 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
267 {
268   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
269   IS             isicol;
270   PetscErrorCode ierr;
271   PetscInt       *r,*ic,i,n = A->m,*ai = a->i,*aj = a->j;
272   PetscInt       *ainew,*ajnew,jmax,*fill,*ajtmp,nz;
273   PetscInt       *idnew,idx,row,m,fm,nnz,nzi,reallocs = 0,nzbd,*im;
274   PetscReal      f;
275 
276   PetscFunctionBegin;
277   if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
278   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
279   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
280   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
281 
282   /* get new row pointers */
283   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr);
284   ainew[0] = 0;
285   /* don't know how many column pointers are needed so estimate */
286   f = info->fill;
287   jmax  = (PetscInt)(f*ai[n]+1);
288   ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr);
289   /* fill is a linked list of nonzeros in active row */
290   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr);
291   im   = fill + n + 1;
292   /* idnew is location of diagonal in factor */
293   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&idnew);CHKERRQ(ierr);
294   idnew[0] = 0;
295 
296   for (i=0; i<n; i++) {
297     /* first copy previous fill into linked list */
298     nnz     = nz    = ai[r[i]+1] - ai[r[i]];
299     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
300     ajtmp   = aj + ai[r[i]];
301     fill[n] = n;
302     while (nz--) {
303       fm  = n;
304       idx = ic[*ajtmp++];
305       do {
306         m  = fm;
307         fm = fill[m];
308       } while (fm < idx);
309       fill[m]   = idx;
310       fill[idx] = fm;
311     }
312     row = fill[n];
313     while (row < i) {
314       ajtmp = ajnew + idnew[row] + 1;
315       nzbd  = 1 + idnew[row] - ainew[row];
316       nz    = im[row] - nzbd;
317       fm    = row;
318       while (nz-- > 0) {
319         idx = *ajtmp++ ;
320         nzbd++;
321         if (idx == i) im[row] = nzbd;
322         do {
323           m  = fm;
324           fm = fill[m];
325         } while (fm < idx);
326         if (fm != idx) {
327           fill[m]   = idx;
328           fill[idx] = fm;
329           fm        = idx;
330           nnz++;
331         }
332       }
333       row = fill[row];
334     }
335     /* copy new filled row into permanent storage */
336     ainew[i+1] = ainew[i] + nnz;
337     if (ainew[i+1] > jmax) {
338 
339       /* estimate how much additional space we will need */
340       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
341       /* just double the memory each time */
342       PetscInt maxadd = jmax;
343       /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */
344       if (maxadd < nnz) maxadd = (n-i)*(nnz+1);
345       jmax += maxadd;
346 
347       /* allocate a longer ajnew */
348       ierr  = PetscMalloc(jmax*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
349       ierr  = PetscMemcpy(ajtmp,ajnew,(ainew[i])*sizeof(PetscInt));CHKERRQ(ierr);
350       ierr  = PetscFree(ajnew);CHKERRQ(ierr);
351       ajnew = ajtmp;
352       reallocs++; /* count how many times we realloc */
353     }
354     ajtmp = ajnew + ainew[i];
355     fm    = fill[n];
356     nzi   = 0;
357     im[i] = nnz;
358     while (nnz--) {
359       if (fm < i) nzi++;
360       *ajtmp++ = fm ;
361       fm       = fill[fm];
362     }
363     idnew[i] = ainew[i] + nzi;
364   }
365   if (ai[n] != 0) {
366     PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
367     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
368     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af);
369     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af);
370     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n");
371   } else {
372     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n");
373   }
374 
375   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
376   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
377 
378   ierr = PetscFree(fill);CHKERRQ(ierr);
379 
380   /* put together the new matrix */
381   ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr);
382   ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr);
383   ierr = MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);CHKERRQ(ierr);
384   PetscLogObjectParent(*B,isicol);
385   b    = (Mat_SeqAIJ*)(*B)->data;
386   ierr = PetscFree(b->imax);CHKERRQ(ierr);
387   b->singlemalloc = PETSC_FALSE;
388   /* the next line frees the default space generated by the Create() */
389   ierr = PetscFree(b->a);CHKERRQ(ierr);
390   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
391   ierr          = PetscMalloc((ainew[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
392   b->j          = ajnew;
393   b->i          = ainew;
394   b->diag       = idnew;
395   b->ilen       = 0;
396   b->imax       = 0;
397   b->row        = isrow;
398   b->col        = iscol;
399   b->lu_damping        = info->damping;
400   b->lu_zeropivot      = info->zeropivot;
401   b->lu_shift          = info->shift;
402   b->lu_shift_fraction = info->shift_fraction;
403   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
404   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
405   b->icol       = isicol;
406   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
407   /* In b structure:  Free imax, ilen, old a, old j.
408      Allocate idnew, solve_work, new a, new j */
409   PetscLogObjectMemory(*B,(ainew[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));
410   b->maxnz = b->nz = ainew[n] ;
411 
412   (*B)->factor                 =  FACTOR_LU;
413   (*B)->info.factor_mallocs    = reallocs;
414   (*B)->info.fill_ratio_given  = f;
415   ierr = Mat_AIJ_CheckInode(*B,PETSC_FALSE);CHKERRQ(ierr);
416   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
417 
418   if (ai[n] != 0) {
419     (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
420   } else {
421     (*B)->info.fill_ratio_needed = 0.0;
422   }
423   PetscFunctionReturn(0);
424 }
425 /* ----------------------------------------------------------- */
426 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth);
427 
428 #undef __FUNCT__
429 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
430 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B)
431 {
432   Mat            C=*B;
433   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
434   IS             isrow = b->row,isicol = b->icol;
435   PetscErrorCode ierr;
436   PetscInt       *r,*ic,i,j,n=A->m,*ai=b->i,*aj=b->j;
437   PetscInt       *ajtmpold,*ajtmp,nz,row,*ics;
438   PetscInt       *diag_offset = b->diag,diag,*pj,nshift=0;
439   PetscScalar    *rtmp,*v,*pc,multiplier,*pv,*rtmps;
440   PetscReal      zeropivot=b->lu_zeropivot,rs,d;
441   PetscReal      row_shift,shift_fraction,shift_amount,shift_lo=0.,shift_hi=1.,shift_top=0.;
442   PetscTruth     lushift;
443   PetscReal      shift_pd=b->lu_shift,shift_nonzero=b->lu_damping;
444 
445   PetscFunctionBegin;
446   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
447   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
448   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
449   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
450   rtmps = rtmp; ics = ic;
451 
452   if (!a->diag) {
453     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
454   }
455   /* if both shift schemes are chosen by user, only use shift_pd */
456   if (shift_pd && shift_nonzero) shift_nonzero = 0.0;
457   if (shift_pd) { /* set shift_top=max{row_shift} */
458     PetscInt *aai = a->i,*ddiag = a->diag;
459     shift_top = 0;
460     for (i=0; i<n; i++) {
461       d = PetscAbsScalar((a->a)[ddiag[i]]);
462       /* calculate amt of shift needed for this row */
463       if (d<=0) {
464         row_shift = 0;
465       } else {
466         row_shift = -2*d;
467       }
468       v  = a->a+aai[i];
469       nz = aai[i+1] - aai[i];
470       for (j=0; j<nz; j++)
471 	row_shift += PetscAbsScalar(v[j]);
472       if (row_shift>shift_top) shift_top = row_shift;
473     }
474   }
475 
476   shift_fraction = 0; shift_amount = 0;
477   do {
478     lushift = PETSC_FALSE;
479     for (i=0; i<n; i++){
480       nz    = ai[i+1] - ai[i];
481       ajtmp = aj + ai[i];
482       for  (j=0; j<nz; j++) rtmps[ajtmp[j]] = 0.0;
483 
484       /* load in initial (unfactored row) */
485       nz       = a->i[r[i]+1] - a->i[r[i]];
486       ajtmpold = a->j + a->i[r[i]];
487       v        = a->a + a->i[r[i]];
488       for (j=0; j<nz; j++) {
489         rtmp[ics[ajtmpold[j]]] = v[j];
490       }
491       rtmp[ics[r[i]]] += shift_amount; /* shift the diagonal of the matrix */
492 
493       row = *ajtmp++;
494       while  (row < i) {
495         pc = rtmp + row;
496         if (*pc != 0.0) {
497           pv         = b->a + diag_offset[row];
498           pj         = b->j + diag_offset[row] + 1;
499           multiplier = *pc / *pv++;
500           *pc        = multiplier;
501           nz         = ai[row+1] - diag_offset[row] - 1;
502           for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
503           PetscLogFlops(2*nz);
504         }
505         row = *ajtmp++;
506       }
507       /* finished row so stick it into b->a */
508       pv   = b->a + ai[i] ;
509       pj   = b->j + ai[i] ;
510       nz   = ai[i+1] - ai[i];
511       diag = diag_offset[i] - ai[i];
512       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
513       rs   = 0.0;
514       for (j=0; j<nz; j++) {
515         pv[j] = rtmps[pj[j]];
516         if (j != diag) rs += PetscAbsScalar(pv[j]);
517       }
518       /* shift the diagonals when zero pivot is detected */
519 #define MAX_NSHIFT 5
520       if (PetscAbsScalar(pv[diag]) <= zeropivot*rs && shift_nonzero){
521         /* force |diag(*B)| > zeropivot*rs */
522         if (!nshift){
523           shift_amount = shift_nonzero;
524         } else {
525           shift_amount *= 2.0;
526         }
527         lushift = PETSC_TRUE;
528         nshift++;
529         break;
530       } else if (PetscRealPart(pv[diag]) <= zeropivot*rs && shift_pd){
531         /* force *B to be diagonally dominant */
532 	if (nshift>MAX_NSHIFT) {
533 	  SETERRQ(PETSC_ERR_CONV_FAILED,"Unable to determine shift to enforce positive definite preconditioner");
534 	} else if (nshift==MAX_NSHIFT) {
535 	  shift_fraction = shift_hi;
536 	  lushift        = PETSC_FALSE;
537 	} else {
538 	  shift_lo = shift_fraction; shift_fraction = (shift_hi+shift_lo)/2.;
539 	  lushift  = PETSC_TRUE;
540 	}
541 	shift_amount = shift_fraction * shift_top;
542         nshift++;
543         break;
544       } else if (PetscAbsScalar(pv[diag]) <= zeropivot*rs){
545         SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(pv[diag]),zeropivot,rs);
546       }
547     }
548     if (shift_pd && !lushift && shift_fraction>0 && nshift<MAX_NSHIFT) {
549       /*
550        * if no shift in this attempt & shifting & started shifting & can refine,
551        * then try lower shift
552        */
553       shift_hi       = shift_fraction;
554       shift_fraction = (shift_hi+shift_lo)/2.;
555       shift_amount   = shift_fraction * shift_top;
556       lushift        = PETSC_TRUE;
557       nshift++;
558     }
559   } while (lushift);
560 
561   /* invert diagonal entries for simplier triangular solves */
562   for (i=0; i<n; i++) {
563     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
564   }
565 
566   ierr = PetscFree(rtmp);CHKERRQ(ierr);
567   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
568   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
569   C->factor = FACTOR_LU;
570   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
571   C->assembled = PETSC_TRUE;
572   PetscLogFlops(C->n);
573   if (nshift && shift_nonzero) {
574     PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of shift_nonzero tries %D shift_nonzero value %g\n",nshift,shift_amount);
575   }
576   if (nshift && shift_pd) {
577     b->lu_shift_fraction = shift_fraction;
578     PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: diagonal shifted up by %e fraction top_value %e number shifts %D\n",shift_fraction,shift_top,nshift);
579   }
580   PetscFunctionReturn(0);
581 }
582 
583 #undef __FUNCT__
584 #define __FUNCT__ "MatUsePETSc_SeqAIJ"
585 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A)
586 {
587   PetscFunctionBegin;
588   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
589   A->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
590   PetscFunctionReturn(0);
591 }
592 
593 
594 /* ----------------------------------------------------------- */
595 #undef __FUNCT__
596 #define __FUNCT__ "MatLUFactor_SeqAIJ"
597 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
598 {
599   PetscErrorCode ierr;
600   Mat            C;
601 
602   PetscFunctionBegin;
603   ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr);
604   ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr);
605   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
606   PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);
607   PetscFunctionReturn(0);
608 }
609 /* ----------------------------------------------------------- */
610 #undef __FUNCT__
611 #define __FUNCT__ "MatSolve_SeqAIJ"
612 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
613 {
614   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
615   IS             iscol = a->col,isrow = a->row;
616   PetscErrorCode ierr;
617   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
618   PetscInt       nz,*rout,*cout;
619   PetscScalar    *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
620 
621   PetscFunctionBegin;
622   if (!n) PetscFunctionReturn(0);
623 
624   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
625   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
626   tmp  = a->solve_work;
627 
628   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
629   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
630 
631   /* forward solve the lower triangular */
632   tmp[0] = b[*r++];
633   tmps   = tmp;
634   for (i=1; i<n; i++) {
635     v   = aa + ai[i] ;
636     vi  = aj + ai[i] ;
637     nz  = a->diag[i] - ai[i];
638     sum = b[*r++];
639     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
640     tmp[i] = sum;
641   }
642 
643   /* backward solve the upper triangular */
644   for (i=n-1; i>=0; i--){
645     v   = aa + a->diag[i] + 1;
646     vi  = aj + a->diag[i] + 1;
647     nz  = ai[i+1] - a->diag[i] - 1;
648     sum = tmp[i];
649     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
650     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
651   }
652 
653   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
654   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
655   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
656   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
657   PetscLogFlops(2*a->nz - A->n);
658   PetscFunctionReturn(0);
659 }
660 
661 /* ----------------------------------------------------------- */
662 #undef __FUNCT__
663 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
664 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
665 {
666   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
667   PetscErrorCode ierr;
668   PetscInt       n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag;
669   PetscScalar    *x,*b,*aa = a->a;
670 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
671   PetscInt       adiag_i,i,*vi,nz,ai_i;
672   PetscScalar    *v,sum;
673 #endif
674 
675   PetscFunctionBegin;
676   if (!n) PetscFunctionReturn(0);
677 
678   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
679   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
680 
681 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
682   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
683 #else
684   /* forward solve the lower triangular */
685   x[0] = b[0];
686   for (i=1; i<n; i++) {
687     ai_i = ai[i];
688     v    = aa + ai_i;
689     vi   = aj + ai_i;
690     nz   = adiag[i] - ai_i;
691     sum  = b[i];
692     while (nz--) sum -= *v++ * x[*vi++];
693     x[i] = sum;
694   }
695 
696   /* backward solve the upper triangular */
697   for (i=n-1; i>=0; i--){
698     adiag_i = adiag[i];
699     v       = aa + adiag_i + 1;
700     vi      = aj + adiag_i + 1;
701     nz      = ai[i+1] - adiag_i - 1;
702     sum     = x[i];
703     while (nz--) sum -= *v++ * x[*vi++];
704     x[i]    = sum*aa[adiag_i];
705   }
706 #endif
707   PetscLogFlops(2*a->nz - A->n);
708   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
709   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
710   PetscFunctionReturn(0);
711 }
712 
713 #undef __FUNCT__
714 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
715 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
716 {
717   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
718   IS             iscol = a->col,isrow = a->row;
719   PetscErrorCode ierr;
720   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
721   PetscInt       nz,*rout,*cout;
722   PetscScalar    *x,*b,*tmp,*aa = a->a,sum,*v;
723 
724   PetscFunctionBegin;
725   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
726 
727   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
728   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
729   tmp  = a->solve_work;
730 
731   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
732   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
733 
734   /* forward solve the lower triangular */
735   tmp[0] = b[*r++];
736   for (i=1; i<n; i++) {
737     v   = aa + ai[i] ;
738     vi  = aj + ai[i] ;
739     nz  = a->diag[i] - ai[i];
740     sum = b[*r++];
741     while (nz--) sum -= *v++ * tmp[*vi++ ];
742     tmp[i] = sum;
743   }
744 
745   /* backward solve the upper triangular */
746   for (i=n-1; i>=0; i--){
747     v   = aa + a->diag[i] + 1;
748     vi  = aj + a->diag[i] + 1;
749     nz  = ai[i+1] - a->diag[i] - 1;
750     sum = tmp[i];
751     while (nz--) sum -= *v++ * tmp[*vi++ ];
752     tmp[i] = sum*aa[a->diag[i]];
753     x[*c--] += tmp[i];
754   }
755 
756   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
757   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
758   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
759   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
760   PetscLogFlops(2*a->nz);
761 
762   PetscFunctionReturn(0);
763 }
764 /* -------------------------------------------------------------------*/
765 #undef __FUNCT__
766 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
767 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
768 {
769   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
770   IS             iscol = a->col,isrow = a->row;
771   PetscErrorCode ierr;
772   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
773   PetscInt       nz,*rout,*cout,*diag = a->diag;
774   PetscScalar    *x,*b,*tmp,*aa = a->a,*v,s1;
775 
776   PetscFunctionBegin;
777   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
778   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
779   tmp  = a->solve_work;
780 
781   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
782   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
783 
784   /* copy the b into temp work space according to permutation */
785   for (i=0; i<n; i++) tmp[i] = b[c[i]];
786 
787   /* forward solve the U^T */
788   for (i=0; i<n; i++) {
789     v   = aa + diag[i] ;
790     vi  = aj + diag[i] + 1;
791     nz  = ai[i+1] - diag[i] - 1;
792     s1  = tmp[i];
793     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
794     while (nz--) {
795       tmp[*vi++ ] -= (*v++)*s1;
796     }
797     tmp[i] = s1;
798   }
799 
800   /* backward solve the L^T */
801   for (i=n-1; i>=0; i--){
802     v   = aa + diag[i] - 1 ;
803     vi  = aj + diag[i] - 1 ;
804     nz  = diag[i] - ai[i];
805     s1  = tmp[i];
806     while (nz--) {
807       tmp[*vi-- ] -= (*v--)*s1;
808     }
809   }
810 
811   /* copy tmp into x according to permutation */
812   for (i=0; i<n; i++) x[r[i]] = tmp[i];
813 
814   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
815   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
816   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
817   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
818 
819   PetscLogFlops(2*a->nz-A->n);
820   PetscFunctionReturn(0);
821 }
822 
823 #undef __FUNCT__
824 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
825 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
826 {
827   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
828   IS             iscol = a->col,isrow = a->row;
829   PetscErrorCode ierr;
830   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
831   PetscInt       nz,*rout,*cout,*diag = a->diag;
832   PetscScalar    *x,*b,*tmp,*aa = a->a,*v;
833 
834   PetscFunctionBegin;
835   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
836 
837   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
838   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
839   tmp = a->solve_work;
840 
841   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
842   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
843 
844   /* copy the b into temp work space according to permutation */
845   for (i=0; i<n; i++) tmp[i] = b[c[i]];
846 
847   /* forward solve the U^T */
848   for (i=0; i<n; i++) {
849     v   = aa + diag[i] ;
850     vi  = aj + diag[i] + 1;
851     nz  = ai[i+1] - diag[i] - 1;
852     tmp[i] *= *v++;
853     while (nz--) {
854       tmp[*vi++ ] -= (*v++)*tmp[i];
855     }
856   }
857 
858   /* backward solve the L^T */
859   for (i=n-1; i>=0; i--){
860     v   = aa + diag[i] - 1 ;
861     vi  = aj + diag[i] - 1 ;
862     nz  = diag[i] - ai[i];
863     while (nz--) {
864       tmp[*vi-- ] -= (*v--)*tmp[i];
865     }
866   }
867 
868   /* copy tmp into x according to permutation */
869   for (i=0; i<n; i++) x[r[i]] += tmp[i];
870 
871   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
872   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
873   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
874   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
875 
876   PetscLogFlops(2*a->nz);
877   PetscFunctionReturn(0);
878 }
879 /* ----------------------------------------------------------------*/
880 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat);
881 
882 #undef __FUNCT__
883 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
884 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
885 {
886   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
887   IS             isicol;
888   PetscErrorCode ierr;
889   PetscInt       *r,*ic,prow,n = A->m,*ai = a->i,*aj = a->j;
890   PetscInt       *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev;
891   PetscInt       *dloc,idx,row,m,fm,nzf,nzi,len, reallocs = 0,dcount = 0;
892   PetscInt       incrlev,nnz,i,levels,diagonal_fill;
893   PetscTruth     col_identity,row_identity;
894   PetscReal      f;
895 
896   PetscFunctionBegin;
897   f             = info->fill;
898   levels        = (PetscInt)info->levels;
899   diagonal_fill = (PetscInt)info->diagonal_fill;
900   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
901 
902   /* special case that simply copies fill pattern */
903   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
904   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
905   if (!levels && row_identity && col_identity) {
906     ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
907     (*fact)->factor = FACTOR_LU;
908     b               = (Mat_SeqAIJ*)(*fact)->data;
909     if (!b->diag) {
910       ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
911     }
912     ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
913     b->row              = isrow;
914     b->col              = iscol;
915     b->icol             = isicol;
916     b->lu_damping       = info->damping;
917     b->lu_zeropivot     = info->zeropivot;
918     b->lu_shift         = info->shift;
919     b->lu_shift_fraction= info->shift_fraction;
920     ierr                = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
921     (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
922     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
923     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
924     PetscFunctionReturn(0);
925   }
926 
927   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
928   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
929 
930   /* get new row pointers */
931   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr);
932   ainew[0] = 0;
933   /* don't know how many column pointers are needed so estimate */
934   jmax = (PetscInt)(f*(ai[n]+1));
935   ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr);
936   /* ajfill is level of fill for each fill entry */
937   ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajfill);CHKERRQ(ierr);
938   /* fill is a linked list of nonzeros in active row */
939   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr);
940   /* im is level for each filled value */
941   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&im);CHKERRQ(ierr);
942   /* dloc is location of diagonal in factor */
943   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&dloc);CHKERRQ(ierr);
944   dloc[0]  = 0;
945   for (prow=0; prow<n; prow++) {
946 
947     /* copy current row into linked list */
948     nzf     = nz  = ai[r[prow]+1] - ai[r[prow]];
949     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
950     xi      = aj + ai[r[prow]] ;
951     fill[n]    = n;
952     fill[prow] = -1; /* marker to indicate if diagonal exists */
953     while (nz--) {
954       fm  = n;
955       idx = ic[*xi++ ];
956       do {
957         m  = fm;
958         fm = fill[m];
959       } while (fm < idx);
960       fill[m]   = idx;
961       fill[idx] = fm;
962       im[idx]   = 0;
963     }
964 
965     /* make sure diagonal entry is included */
966     if (diagonal_fill && fill[prow] == -1) {
967       fm = n;
968       while (fill[fm] < prow) fm = fill[fm];
969       fill[prow] = fill[fm]; /* insert diagonal into linked list */
970       fill[fm]   = prow;
971       im[prow]   = 0;
972       nzf++;
973       dcount++;
974     }
975 
976     nzi = 0;
977     row = fill[n];
978     while (row < prow) {
979       incrlev = im[row] + 1;
980       nz      = dloc[row];
981       xi      = ajnew  + ainew[row]  + nz + 1;
982       flev    = ajfill + ainew[row]  + nz + 1;
983       nnz     = ainew[row+1] - ainew[row] - nz - 1;
984       fm      = row;
985       while (nnz-- > 0) {
986         idx = *xi++ ;
987         if (*flev + incrlev > levels) {
988           flev++;
989           continue;
990         }
991         do {
992           m  = fm;
993           fm = fill[m];
994         } while (fm < idx);
995         if (fm != idx) {
996           im[idx]   = *flev + incrlev;
997           fill[m]   = idx;
998           fill[idx] = fm;
999           fm        = idx;
1000           nzf++;
1001         } else {
1002           if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev;
1003         }
1004         flev++;
1005       }
1006       row = fill[row];
1007       nzi++;
1008     }
1009     /* copy new filled row into permanent storage */
1010     ainew[prow+1] = ainew[prow] + nzf;
1011     if (ainew[prow+1] > jmax) {
1012 
1013       /* estimate how much additional space we will need */
1014       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
1015       /* just double the memory each time */
1016       /*  maxadd = (PetscInt)((f*(ai[n]+!shift)*(n-prow+5))/n); */
1017       PetscInt maxadd = jmax;
1018       if (maxadd < nzf) maxadd = (n-prow)*(nzf+1);
1019       jmax += maxadd;
1020 
1021       /* allocate a longer ajnew and ajfill */
1022       ierr   = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr);
1023       ierr   = PetscMemcpy(xi,ajnew,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr);
1024       ierr   = PetscFree(ajnew);CHKERRQ(ierr);
1025       ajnew  = xi;
1026       ierr   = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr);
1027       ierr   = PetscMemcpy(xi,ajfill,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr);
1028       ierr   = PetscFree(ajfill);CHKERRQ(ierr);
1029       ajfill = xi;
1030       reallocs++; /* count how many times we realloc */
1031     }
1032     xi          = ajnew + ainew[prow] ;
1033     flev        = ajfill + ainew[prow] ;
1034     dloc[prow]  = nzi;
1035     fm          = fill[n];
1036     while (nzf--) {
1037       *xi++   = fm ;
1038       *flev++ = im[fm];
1039       fm      = fill[fm];
1040     }
1041     /* make sure row has diagonal entry */
1042     if (ajnew[ainew[prow]+dloc[prow]] != prow) {
1043       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
1044     try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow);
1045     }
1046   }
1047   ierr = PetscFree(ajfill);CHKERRQ(ierr);
1048   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1049   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1050   ierr = PetscFree(fill);CHKERRQ(ierr);
1051   ierr = PetscFree(im);CHKERRQ(ierr);
1052 
1053   {
1054     PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
1055     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
1056     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af);
1057     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af);
1058     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n");
1059     if (diagonal_fill) {
1060       PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount);
1061     }
1062   }
1063 
1064   /* put together the new matrix */
1065   ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr);
1066   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
1067   ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr);
1068   PetscLogObjectParent(*fact,isicol);
1069   b = (Mat_SeqAIJ*)(*fact)->data;
1070   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1071   b->singlemalloc = PETSC_FALSE;
1072   len = (ainew[n] )*sizeof(PetscScalar);
1073   /* the next line frees the default space generated by the Create() */
1074   ierr = PetscFree(b->a);CHKERRQ(ierr);
1075   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1076   ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr);
1077   b->j          = ajnew;
1078   b->i          = ainew;
1079   for (i=0; i<n; i++) dloc[i] += ainew[i];
1080   b->diag       = dloc;
1081   b->ilen       = 0;
1082   b->imax       = 0;
1083   b->row        = isrow;
1084   b->col        = iscol;
1085   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1086   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1087   b->icol       = isicol;
1088   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1089   /* In b structure:  Free imax, ilen, old a, old j.
1090      Allocate dloc, solve_work, new a, new j */
1091   PetscLogObjectMemory(*fact,(ainew[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));
1092   b->maxnz             = b->nz = ainew[n] ;
1093   b->lu_damping        = info->damping;
1094   b->lu_shift          = info->shift;
1095   b->lu_shift_fraction = info->shift_fraction;
1096   b->lu_zeropivot = info->zeropivot;
1097   (*fact)->factor = FACTOR_LU;
1098   ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr);
1099   (*fact)->ops->lufactornumeric =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
1100 
1101   (*fact)->info.factor_mallocs    = reallocs;
1102   (*fact)->info.fill_ratio_given  = f;
1103   (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]);
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 #include "src/mat/impls/sbaij/seq/sbaij.h"
1108 #undef __FUNCT__
1109 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
1110 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,Mat *B)
1111 {
1112   Mat            C = *B;
1113   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1114   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
1115   IS             ip=b->row;
1116   PetscErrorCode ierr;
1117   PetscInt       *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol;
1118   PetscInt       *ai=a->i,*aj=a->j;
1119   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1120   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1121   PetscReal      damping=b->factor_damping,zeropivot=b->factor_zeropivot,shift_amount;
1122   PetscTruth     damp,chshift;
1123   PetscInt       nshift=0,ndamp=0;
1124 
1125   PetscFunctionBegin;
1126   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
1127 
1128   /* initialization */
1129   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1130   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
1131   jl   = il + mbs;
1132   rtmp = (MatScalar*)(jl + mbs);
1133 
1134   shift_amount = 0;
1135   do {
1136     /*
1137     damp = PETSC_FALSE;
1138     chshift = PETSC_FALSE;
1139     */
1140     for (i=0; i<mbs; i++) {
1141       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1142     }
1143 
1144     for (k = 0; k<mbs; k++){
1145       bval = ba + bi[k];
1146       /* initialize k-th row by the perm[k]-th row of A */
1147       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1148       for (j = jmin; j < jmax; j++){
1149         col = rip[aj[j]];
1150         if (col >= k){ /* only take upper triangular entry */
1151           rtmp[col] = aa[j];
1152           *bval++  = 0.0; /* for in-place factorization */
1153         }
1154       }
1155       /* damp the diagonal of the matrix */
1156       if (ndamp||nshift) rtmp[k] += damping+shift_amount;
1157 
1158       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1159       dk = rtmp[k];
1160       i = jl[k]; /* first row to be added to k_th row  */
1161 
1162       while (i < k){
1163         nexti = jl[i]; /* next row to be added to k_th row */
1164 
1165         /* compute multiplier, update diag(k) and U(i,k) */
1166         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1167         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
1168         dk += uikdi*ba[ili];
1169         ba[ili] = uikdi; /* -U(i,k) */
1170 
1171         /* add multiple of row i to k-th row */
1172         jmin = ili + 1; jmax = bi[i+1];
1173         if (jmin < jmax){
1174           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1175           /* update il and jl for row i */
1176           il[i] = jmin;
1177           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1178         }
1179         i = nexti;
1180       }
1181 
1182       if (PetscRealPart(dk) < zeropivot && b->factor_shift){
1183 	/* calculate a shift that would make this row diagonally dominant */
1184 	PetscReal rs = PetscAbs(PetscRealPart(dk));
1185 	jmin      = bi[k]+1;
1186 	nz        = bi[k+1] - jmin;
1187 	if (nz){
1188 	  bcol = bj + jmin;
1189 	  bval = ba + jmin;
1190 	  while (nz--){
1191 	    rs += PetscAbsScalar(rtmp[*bcol++]);
1192 	  }
1193 	}
1194 	/* if this shift is less than the previous, just up the previous
1195 	   one by a bit */
1196 	shift_amount = PetscMax(rs,1.1*shift_amount);
1197 	chshift  = PETSC_TRUE;
1198 	/* Unlike in the ILU case there is no exit condition on nshift:
1199 	   we increase the shift until it converges. There is no guarantee that
1200 	   this algorithm converges faster or slower, or is better or worse
1201 	   than the ILU algorithm. */
1202 	nshift++;
1203 	break;
1204       }
1205       if (PetscRealPart(dk) < zeropivot){
1206         if (damping == (PetscReal) PETSC_DECIDE) damping = -PetscRealPart(dk)/(k+1);
1207         if (damping > 0.0) {
1208           if (ndamp) damping *= 2.0;
1209           damp = PETSC_TRUE;
1210           ndamp++;
1211           break;
1212         } else if (PetscAbsScalar(dk) < zeropivot){
1213           SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g",k,PetscRealPart(dk),zeropivot);
1214         } else {
1215           PetscLogInfo((PetscObject)A,"Negative pivot %g in row %D of Cholesky factorization\n",PetscRealPart(dk),k);
1216         }
1217       }
1218 
1219       /* copy data into U(k,:) */
1220       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1221       jmin = bi[k]+1; jmax = bi[k+1];
1222       if (jmin < jmax) {
1223         for (j=jmin; j<jmax; j++){
1224           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1225         }
1226         /* add the k-th row into il and jl */
1227         il[k] = jmin;
1228         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1229       }
1230     }
1231   } while (damp||chshift);
1232   ierr = PetscFree(il);CHKERRQ(ierr);
1233 
1234   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
1235   C->factor       = FACTOR_CHOLESKY;
1236   C->assembled    = PETSC_TRUE;
1237   C->preallocated = PETSC_TRUE;
1238   PetscLogFlops(C->m);
1239   if (ndamp) {
1240     PetscLogInfo(0,"MatCholeskyFactorNumerical_SeqAIJ: number of damping tries %D damping value %g\n",ndamp,damping);
1241   }
1242   if (nshift) {
1243     PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ diagonal shifted %D shifts\n",nshift);
1244   }
1245   PetscFunctionReturn(0);
1246 }
1247 #undef __FUNCT__
1248 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering"
1249 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering(Mat A,Mat *fact)
1250 {
1251   Mat            C = *fact;
1252   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1253   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
1254   PetscErrorCode ierr;
1255   PetscInt       i,j,am = A->m;
1256   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
1257   PetscInt       k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz,ndamp = 0;
1258   MatScalar      *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval;
1259   PetscReal      damping=b->factor_damping, zeropivot=b->factor_zeropivot,shift_amount;
1260   PetscTruth     damp,chshift;
1261   PetscInt       nshift=0;
1262 
1263   PetscFunctionBegin;
1264   /* initialization */
1265   nz   = (2*am+1)*sizeof(PetscInt)+am*sizeof(MatScalar);
1266   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
1267   jl   = il + am;
1268   rtmp = (MatScalar*)(jl + am);
1269 
1270   shift_amount = 0;
1271   do {
1272     damp = PETSC_FALSE;
1273     chshift = PETSC_FALSE;
1274     for (i=0; i<am; i++) {
1275       rtmp[i] = 0.0; jl[i] = am; il[0] = 0;
1276     }
1277 
1278     for (k = 0; k<am; k++){
1279     /* initialize k-th row with elements nonzero in row perm(k) of A */
1280       nz   = ai[k+1] - ai[k];
1281       acol = aj + ai[k];
1282       aval = aa + ai[k];
1283       bval = ba + bi[k];
1284       while (nz -- ){
1285         if (*acol < k) { /* skip lower triangular entries */
1286           acol++; aval++;
1287         } else {
1288           rtmp[*acol++] = *aval++;
1289           *bval++       = 0.0; /* for in-place factorization */
1290         }
1291       }
1292       /* damp the diagonal of the matrix */
1293       if (ndamp||nshift) rtmp[k] += damping+shift_amount;
1294 
1295       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1296       dk = rtmp[k];
1297       i  = jl[k]; /* first row to be added to k_th row  */
1298 
1299       while (i < k){
1300         nexti = jl[i]; /* next row to be added to k_th row */
1301         /* compute multiplier, update D(k) and U(i,k) */
1302         ili   = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1303         uikdi = - ba[ili]*ba[bi[i]];
1304         dk   += uikdi*ba[ili];
1305         ba[ili] = uikdi; /* -U(i,k) */
1306 
1307         /* add multiple of row i to k-th row ... */
1308         jmin = ili + 1;
1309         nz   = bi[i+1] - jmin;
1310         if (nz > 0){
1311           bcol = bj + jmin;
1312           bval = ba + jmin;
1313           while (nz --) rtmp[*bcol++] += uikdi*(*bval++);
1314           /* update il and jl for i-th row */
1315           il[i] = jmin;
1316           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1317         }
1318         i = nexti;
1319       }
1320 
1321       if (PetscRealPart(dk) < zeropivot && b->factor_shift){
1322 	/* calculate a shift that would make this row diagonally dominant */
1323 	PetscReal rs = PetscAbs(PetscRealPart(dk));
1324 	jmin      = bi[k]+1;
1325 	nz        = bi[k+1] - jmin;
1326 	if (nz){
1327 	  bcol = bj + jmin;
1328 	  bval = ba + jmin;
1329 	  while (nz--){
1330 	    rs += PetscAbsScalar(rtmp[*bcol++]);
1331 	  }
1332 	}
1333 	/* if this shift is less than the previous, just up the previous
1334 	   one by a bit */
1335 	shift_amount = PetscMax(rs,1.1*shift_amount);
1336 	chshift  = PETSC_TRUE;
1337 	/* Unlike in the ILU case there is no exit condition on nshift:
1338 	   we increase the shift until it converges. There is no guarantee that
1339 	   this algorithm converges faster or slower, or is better or worse
1340 	   than the ILU algorithm. */
1341 	nshift++;
1342 	break;
1343       }
1344       if (PetscRealPart(dk) < zeropivot){
1345         if (damping == (PetscReal) PETSC_DECIDE) damping = -PetscRealPart(dk)/(k+1);
1346         if (damping > 0.0) {
1347           if (ndamp) damping *= 2.0;
1348           damp = PETSC_TRUE;
1349           ndamp++;
1350           break;
1351         } else if (PetscAbsScalar(dk) < zeropivot){
1352           SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g",k,PetscRealPart(dk),zeropivot);
1353         } else {
1354           PetscLogInfo((PetscObject)A,"Negative pivot %g in row %D of Cholesky factorization\n",PetscRealPart(dk),k);
1355         }
1356       }
1357 
1358       /* copy data into U(k,:) */
1359       ba[bi[k]] = 1.0/dk;
1360       jmin      = bi[k]+1;
1361       nz        = bi[k+1] - jmin;
1362       if (nz){
1363         bcol = bj + jmin;
1364         bval = ba + jmin;
1365         while (nz--){
1366           *bval++       = rtmp[*bcol];
1367           rtmp[*bcol++] = 0.0;
1368         }
1369         /* add k-th row into il and jl */
1370         il[k] = jmin;
1371         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1372       }
1373     }
1374   } while (damp||chshift);
1375   ierr = PetscFree(il);CHKERRQ(ierr);
1376 
1377   C->factor       = FACTOR_CHOLESKY;
1378   C->assembled    = PETSC_TRUE;
1379   C->preallocated = PETSC_TRUE;
1380   PetscLogFlops(C->m);
1381   if (ndamp) {
1382     PetscLogInfo(0,"MatCholeskyFactorNumerical_SeqAIJ_NaturalOrdering: number of damping tries %D damping value %g\n",ndamp,damping);
1383   }
1384   if (nshift) {
1385     PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering diagonal shifted %D shifts\n",nshift);
1386   }
1387   PetscFunctionReturn(0);
1388 }
1389 
1390 #include "petscbt.h"
1391 #include "src/mat/utils/freespace.h"
1392 #undef __FUNCT__
1393 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
1394 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1395 {
1396   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1397   Mat_SeqSBAIJ   *b;
1398   Mat            B;
1399   PetscErrorCode ierr;
1400   PetscTruth     perm_identity;
1401   PetscInt       reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui;
1402   PetscInt       jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1403   PetscInt       nlnk,*lnk,*lnk_lvl,ncols,ncols_upper,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr;
1404   PetscReal      fill=info->fill,levels=info->levels;
1405   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1406   FreeSpaceList  free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1407   PetscBT        lnkbt;
1408 
1409   PetscFunctionBegin;
1410   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1411   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1412 
1413   /* special case that simply copies fill pattern */
1414   if (!levels && perm_identity) {
1415     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
1416     ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1417     for (i=0; i<am; i++) {
1418       ui[i] = ai[i+1] - a->diag[i]; /* ui: rowlengths - changes when !perm_identity */
1419     }
1420     ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr);
1421     B = *fact;
1422     ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1423     ierr = MatSeqSBAIJSetPreallocation(B,1,0,ui);CHKERRQ(ierr);
1424 
1425     b  = (Mat_SeqSBAIJ*)B->data;
1426     uj = b->j;
1427     for (i=0; i<am; i++) {
1428       aj = a->j + a->diag[i];
1429       for (j=0; j<ui[i]; j++){
1430         *uj++ = *aj++;
1431       }
1432       b->ilen[i] = ui[i];
1433     }
1434     ierr = PetscFree(ui);CHKERRQ(ierr);
1435     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1436     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1437 
1438     B->ops->solve                 = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1439     B->ops->solvetranspose        = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1440     ierr = PetscObjectComposeFunction((PetscObject)B,"MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering","dummyname",(FCNVOID)B->ops->choleskyfactornumeric);CHKERRQ(ierr);
1441     B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering;
1442     PetscFunctionReturn(0);
1443   }
1444 
1445   /* initialization */
1446   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1447   ui[0] = 0;
1448   ierr  = PetscMalloc((2*am+1)*sizeof(PetscInt),&cols_lvl);CHKERRQ(ierr);
1449 
1450   /* jl: linked list for storing indices of the pivot rows
1451      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1452   ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr);
1453   il         = jl + am;
1454   uj_ptr     = (PetscInt**)(il + am);
1455   uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1456   for (i=0; i<am; i++){
1457     jl[i] = am; il[i] = 0;
1458   }
1459 
1460   /* create and initialize a linked list for storing column indices of the active row k */
1461   nlnk = am + 1;
1462   ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1463 
1464   /* initial FreeSpace size is fill*(ai[am]+1) */
1465   ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1466   current_space = free_space;
1467   ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
1468   current_space_lvl = free_space_lvl;
1469 
1470   for (k=0; k<am; k++){  /* for each active row k */
1471     /* initialize lnk by the column indices of row rip[k] of A */
1472     nzk   = 0;
1473     ncols = ai[rip[k]+1] - ai[rip[k]];
1474     ncols_upper = 0;
1475     cols        = cols_lvl + am;
1476     for (j=0; j<ncols; j++){
1477       i = rip[*(aj + ai[rip[k]] + j)];
1478       if (i >= k){ /* only take upper triangular entry */
1479         cols[ncols_upper] = i;
1480         cols_lvl[ncols_upper] = -1;  /* initialize level for nonzero entries */
1481         ncols_upper++;
1482       }
1483     }
1484     ierr = PetscIncompleteLLAdd(ncols_upper,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1485     nzk += nlnk;
1486 
1487     /* update lnk by computing fill-in for each pivot row to be merged in */
1488     prow = jl[k]; /* 1st pivot row */
1489 
1490     while (prow < k){
1491       nextprow = jl[prow];
1492 
1493       /* merge prow into k-th row */
1494       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1495       jmax = ui[prow+1];
1496       ncols = jmax-jmin;
1497       i     = jmin - ui[prow];
1498       cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1499       for (j=0; j<ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + i + j);
1500       ierr = PetscIncompleteLLAdd(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1501       nzk += nlnk;
1502 
1503       /* update il and jl for prow */
1504       if (jmin < jmax){
1505         il[prow] = jmin;
1506         j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1507       }
1508       prow = nextprow;
1509     }
1510 
1511     /* if free space is not available, make more free space */
1512     if (current_space->local_remaining<nzk) {
1513       i = am - k + 1; /* num of unfactored rows */
1514       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1515       ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
1516       ierr = GetMoreSpace(i,&current_space_lvl);CHKERRQ(ierr);
1517       reallocs++;
1518     }
1519 
1520     /* copy data into free_space and free_space_lvl, then initialize lnk */
1521     ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1522 
1523     /* add the k-th row into il and jl */
1524     if (nzk-1 > 0){
1525       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1526       jl[k] = jl[i]; jl[i] = k;
1527       il[k] = ui[k] + 1;
1528     }
1529     uj_ptr[k]     = current_space->array;
1530     uj_lvl_ptr[k] = current_space_lvl->array;
1531 
1532     current_space->array           += nzk;
1533     current_space->local_used      += nzk;
1534     current_space->local_remaining -= nzk;
1535 
1536     current_space_lvl->array           += nzk;
1537     current_space_lvl->local_used      += nzk;
1538     current_space_lvl->local_remaining -= nzk;
1539 
1540     ui[k+1] = ui[k] + nzk;
1541   }
1542 
1543   if (ai[am] != 0) {
1544     PetscReal af = ((PetscReal)(2*ui[am]-am))/((PetscReal)ai[am]);
1545     PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af);
1546     PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af);
1547     PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af);
1548   } else {
1549      PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n");
1550   }
1551 
1552   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1553   ierr = PetscFree(jl);CHKERRQ(ierr);
1554   ierr = PetscFree(cols_lvl);CHKERRQ(ierr);
1555 
1556   /* destroy list of free space and other temporary array(s) */
1557   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1558   ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1559   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1560   ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr);
1561 
1562   /* put together the new matrix in MATSEQSBAIJ format */
1563   ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr);
1564   B = *fact;
1565   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1566   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
1567 
1568   b = (Mat_SeqSBAIJ*)B->data;
1569   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1570   b->singlemalloc = PETSC_FALSE;
1571   /* the next line frees the default space generated by the Create() */
1572   ierr = PetscFree(b->a);CHKERRQ(ierr);
1573   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1574   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1575   b->j    = uj;
1576   b->i    = ui;
1577   b->diag = 0;
1578   b->ilen = 0;
1579   b->imax = 0;
1580   b->row  = perm;
1581   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1582   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1583   b->icol = perm;
1584   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1585   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1586   PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
1587   b->maxnz = b->nz = ui[am];
1588 
1589   B->factor                 = FACTOR_CHOLESKY;
1590   B->info.factor_mallocs    = reallocs;
1591   B->info.fill_ratio_given  = fill;
1592   if (ai[am] != 0) {
1593     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1594   } else {
1595     B->info.fill_ratio_needed = 0.0;
1596   }
1597   if (perm_identity){
1598     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1599     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1600     ierr = PetscObjectComposeFunction((PetscObject)B,"MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering","dummyname",(FCNVOID)B->ops->choleskyfactornumeric);CHKERRQ(ierr);
1601     B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering;
1602   } else {
1603     (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1604   }
1605   PetscFunctionReturn(0);
1606 }
1607 
1608 #undef __FUNCT__
1609 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
1610 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1611 {
1612   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1613   Mat_SeqSBAIJ   *b;
1614   Mat            B;
1615   PetscErrorCode ierr;
1616   PetscTruth     perm_identity;
1617   PetscReal      fill = info->fill;
1618   PetscInt       *rip,*riip,i,mbs=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow;
1619   PetscInt       *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1620   PetscInt       nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1621   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1622   PetscBT        lnkbt;
1623   IS             iperm;
1624 
1625   PetscFunctionBegin;
1626   /* check whether perm is the identity mapping */
1627   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1628   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1629 
1630   if (!perm_identity){
1631     /* check if perm is symmetric! */
1632     ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1633     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1634     for (i=0; i<mbs; i++) {
1635       if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation");
1636     }
1637     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1638     ierr = ISDestroy(iperm);CHKERRQ(ierr);
1639   }
1640 
1641   /* initialization */
1642   ierr  = PetscMalloc((mbs+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1643   ui[0] = 0;
1644 
1645   /* jl: linked list for storing indices of the pivot rows
1646      il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
1647   ierr = PetscMalloc((3*mbs+1)*sizeof(PetscInt)+mbs*sizeof(PetscInt*),&jl);CHKERRQ(ierr);
1648   il     = jl + mbs;
1649   cols   = il + mbs;
1650   ui_ptr = (PetscInt**)(cols + mbs);
1651   for (i=0; i<mbs; i++){
1652     jl[i] = mbs; il[i] = 0;
1653   }
1654 
1655   /* create and initialize a linked list for storing column indices of the active row k */
1656   nlnk = mbs + 1;
1657   ierr = PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1658 
1659   /* initial FreeSpace size is fill*(ai[mbs]+1) */
1660   ierr = GetMoreSpace((PetscInt)(fill*(ai[mbs]+1)),&free_space);CHKERRQ(ierr);
1661   current_space = free_space;
1662 
1663   for (k=0; k<mbs; k++){  /* for each active row k */
1664     /* initialize lnk by the column indices of row rip[k] of A */
1665     nzk   = 0;
1666     ncols = ai[rip[k]+1] - ai[rip[k]];
1667     ncols_upper = 0;
1668     for (j=0; j<ncols; j++){
1669       i = rip[*(aj + ai[rip[k]] + j)];
1670       if (i >= k){ /* only take upper triangular entry */
1671         cols[ncols_upper] = i;
1672         ncols_upper++;
1673       }
1674     }
1675     ierr = PetscLLAdd(ncols_upper,cols,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1676     nzk += nlnk;
1677 
1678     /* update lnk by computing fill-in for each pivot row to be merged in */
1679     prow = jl[k]; /* 1st pivot row */
1680 
1681     while (prow < k){
1682       nextprow = jl[prow];
1683       /* merge prow into k-th row */
1684       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
1685       jmax = ui[prow+1];
1686       ncols = jmax-jmin;
1687       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
1688       ierr = PetscLLAdd(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1689       nzk += nlnk;
1690 
1691       /* update il and jl for prow */
1692       if (jmin < jmax){
1693         il[prow] = jmin;
1694         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1695       }
1696       prow = nextprow;
1697     }
1698 
1699     /* if free space is not available, make more free space */
1700     if (current_space->local_remaining<nzk) {
1701       i = mbs - k + 1; /* num of unfactored rows */
1702       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1703       ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
1704       reallocs++;
1705     }
1706 
1707     /* copy data into free space, then initialize lnk */
1708     ierr = PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1709 
1710     /* add the k-th row into il and jl */
1711     if (nzk-1 > 0){
1712       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
1713       jl[k] = jl[i]; jl[i] = k;
1714       il[k] = ui[k] + 1;
1715     }
1716     ui_ptr[k] = current_space->array;
1717     current_space->array           += nzk;
1718     current_space->local_used      += nzk;
1719     current_space->local_remaining -= nzk;
1720 
1721     ui[k+1] = ui[k] + nzk;
1722   }
1723 
1724   if (ai[mbs] != 0) {
1725     PetscReal af = ((PetscReal)(2*ui[mbs]-mbs))/((PetscReal)ai[mbs]);
1726     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af);
1727     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af);
1728     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af);
1729   } else {
1730      PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n");
1731   }
1732 
1733   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1734   ierr = PetscFree(jl);CHKERRQ(ierr);
1735 
1736   /* destroy list of free space and other temporary array(s) */
1737   ierr = PetscMalloc((ui[mbs]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1738   ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1739   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1740 
1741   /* put together the new matrix in MATSEQSBAIJ format */
1742   ierr = MatCreate(PETSC_COMM_SELF,mbs,mbs,mbs,mbs,fact);CHKERRQ(ierr);
1743   B    = *fact;
1744   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1745   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
1746 
1747   b = (Mat_SeqSBAIJ*)B->data;
1748   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1749   b->singlemalloc = PETSC_FALSE;
1750   /* the next line frees the default space generated by the Create() */
1751   ierr = PetscFree(b->a);CHKERRQ(ierr);
1752   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1753   ierr = PetscMalloc((ui[mbs]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1754   b->j    = uj;
1755   b->i    = ui;
1756   b->diag = 0;
1757   b->ilen = 0;
1758   b->imax = 0;
1759   b->row  = perm;
1760   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1761   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1762   b->icol = perm;
1763   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1764   ierr    = PetscMalloc((mbs+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1765   PetscLogObjectMemory(B,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));
1766   b->maxnz = b->nz = ui[mbs];
1767 
1768   B->factor                 = FACTOR_CHOLESKY;
1769   B->info.factor_mallocs    = reallocs;
1770   B->info.fill_ratio_given  = fill;
1771   if (ai[mbs] != 0) {
1772     B->info.fill_ratio_needed = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]);
1773   } else {
1774     B->info.fill_ratio_needed = 0.0;
1775   }
1776   if (perm_identity){
1777     (*fact)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1778     (*fact)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1779     ierr = PetscObjectComposeFunction((PetscObject)(*fact),"MatCholeskyFactorNumeric_NaturalOrdering","dummyname",(FCNVOID)(*fact)->ops->choleskyfactornumeric);CHKERRQ(ierr);
1780     (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering;
1781   } else {
1782     (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1783   }
1784   PetscFunctionReturn(0);
1785 }
1786