xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision 4bcd6097eb738ae87d3212ea8c9aa03077eaf145)
1 
2 #include "src/mat/impls/aij/seq/aij.h"
3 #include "src/inline/dot.h"
4 #include "src/inline/spops.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ"
8 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
9 {
10   PetscFunctionBegin;
11 
12   SETERRQ(PETSC_ERR_SUP,"Code not written");
13 #if !defined(PETSC_USE_DEBUG)
14   PetscFunctionReturn(0);
15 #endif
16 }
17 
18 
19 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat);
20 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth);
21 
22 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
23 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
24 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*);
25 
26 #undef __FUNCT__
27 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
28   /* ------------------------------------------------------------
29 
30           This interface was contribed by Tony Caola
31 
32      This routine is an interface to the pivoting drop-tolerance
33      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
34      SPARSEKIT2.
35 
36      The SPARSEKIT2 routines used here are covered by the GNU
37      copyright; see the file gnu in this directory.
38 
39      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
40      help in getting this routine ironed out.
41 
42      The major drawback to this routine is that if info->fill is
43      not large enough it fails rather than allocating more space;
44      this can be fixed by hacking/improving the f2c version of
45      Yousef Saad's code.
46 
47      ------------------------------------------------------------
48 */
49 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact)
50 {
51 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
52   PetscFunctionBegin;
53   SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
54   You can obtain the drop tolerance routines by installing PETSc from\n\
55   www.mcs.anl.gov/petsc\n");
56 #else
57   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
58   IS             iscolf,isicol,isirow;
59   PetscTruth     reorder;
60   PetscErrorCode ierr,sierr;
61   PetscInt       *c,*r,*ic,i,n = A->m;
62   PetscInt       *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
63   PetscInt       *ordcol,*iwk,*iperm,*jw;
64   PetscInt       jmax,lfill,job,*o_i,*o_j;
65   PetscScalar    *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
66   PetscReal      af;
67 
68   PetscFunctionBegin;
69 
70   if (info->dt == PETSC_DEFAULT)      info->dt      = .005;
71   if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax);
72   if (info->dtcol == PETSC_DEFAULT)   info->dtcol   = .01;
73   if (info->fill == PETSC_DEFAULT)    info->fill    = ((double)(n*(info->dtcount+1)))/a->nz;
74   lfill   = (PetscInt)(info->dtcount/2.0);
75   jmax    = (PetscInt)(info->fill*a->nz);
76 
77 
78   /* ------------------------------------------------------------
79      If reorder=.TRUE., then the original matrix has to be
80      reordered to reflect the user selected ordering scheme, and
81      then de-reordered so it is in it's original format.
82      Because Saad's dperm() is NOT in place, we have to copy
83      the original matrix and allocate more storage. . .
84      ------------------------------------------------------------
85   */
86 
87   /* set reorder to true if either isrow or iscol is not identity */
88   ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr);
89   if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);}
90   reorder = PetscNot(reorder);
91 
92 
93   /* storage for ilu factor */
94   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr);
95   ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr);
96   ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr);
97   ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr);
98 
99   /* ------------------------------------------------------------
100      Make sure that everything is Fortran formatted (1-Based)
101      ------------------------------------------------------------
102   */
103   for (i=old_i[0];i<old_i[n];i++) {
104     old_j[i]++;
105   }
106   for(i=0;i<n+1;i++) {
107     old_i[i]++;
108   };
109 
110 
111   if (reorder) {
112     ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr);
113     ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
114     for(i=0;i<n;i++) {
115       r[i]  = r[i]+1;
116       c[i]  = c[i]+1;
117     }
118     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr);
119     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr);
120     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr);
121     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
122     for (i=0;i<n;i++) {
123       r[i]  = r[i]-1;
124       c[i]  = c[i]-1;
125     }
126     ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr);
127     ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
128     o_a = old_a2;
129     o_j = old_j2;
130     o_i = old_i2;
131   } else {
132     o_a = old_a;
133     o_j = old_j;
134     o_i = old_i;
135   }
136 
137   /* ------------------------------------------------------------
138      Call Saad's ilutp() routine to generate the factorization
139      ------------------------------------------------------------
140   */
141 
142   ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr);
143   ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr);
144   ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr);
145 
146   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
147   if (sierr) {
148     switch (sierr) {
149       case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
150       case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
151       case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
152       case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
153       case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax);
154       default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
155     }
156   }
157 
158   ierr = PetscFree(w);CHKERRQ(ierr);
159   ierr = PetscFree(jw);CHKERRQ(ierr);
160 
161   /* ------------------------------------------------------------
162      Saad's routine gives the result in Modified Sparse Row (msr)
163      Convert to Compressed Sparse Row format (csr)
164      ------------------------------------------------------------
165   */
166 
167   ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr);
168   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr);
169 
170   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
171 
172   ierr = PetscFree(iwk);CHKERRQ(ierr);
173   ierr = PetscFree(wk);CHKERRQ(ierr);
174 
175   if (reorder) {
176     ierr = PetscFree(old_a2);CHKERRQ(ierr);
177     ierr = PetscFree(old_j2);CHKERRQ(ierr);
178     ierr = PetscFree(old_i2);CHKERRQ(ierr);
179   } else {
180     /* fix permutation of old_j that the factorization introduced */
181     for (i=old_i[0]; i<old_i[n]; i++) {
182       old_j[i-1] = iperm[old_j[i-1]-1];
183     }
184   }
185 
186   /* get rid of the shift to indices starting at 1 */
187   for (i=0; i<n+1; i++) {
188     old_i[i]--;
189   }
190   for (i=old_i[0];i<old_i[n];i++) {
191     old_j[i]--;
192   }
193 
194   /* Make the factored matrix 0-based */
195   for (i=0; i<n+1; i++) {
196     new_i[i]--;
197   }
198   for (i=new_i[0];i<new_i[n];i++) {
199     new_j[i]--;
200   }
201 
202   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
203   /*-- permute the right-hand-side and solution vectors           --*/
204   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
205   ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr);
206   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
207   for(i=0; i<n; i++) {
208     ordcol[i] = ic[iperm[i]-1];
209   };
210   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
211   ierr = ISDestroy(isicol);CHKERRQ(ierr);
212 
213   ierr = PetscFree(iperm);CHKERRQ(ierr);
214 
215   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr);
216   ierr = PetscFree(ordcol);CHKERRQ(ierr);
217 
218   /*----- put together the new matrix -----*/
219 
220   ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr);
221   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
222   ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr);
223   (*fact)->factor    = FACTOR_LU;
224   (*fact)->assembled = PETSC_TRUE;
225 
226   b = (Mat_SeqAIJ*)(*fact)->data;
227   ierr = PetscFree(b->imax);CHKERRQ(ierr);
228   b->sorted        = PETSC_FALSE;
229   b->singlemalloc  = PETSC_FALSE;
230   /* the next line frees the default space generated by the MatCreate() */
231   ierr             = PetscFree(b->a);CHKERRQ(ierr);
232   ierr             = PetscFree(b->ilen);CHKERRQ(ierr);
233   b->a             = new_a;
234   b->j             = new_j;
235   b->i             = new_i;
236   b->ilen          = 0;
237   b->imax          = 0;
238   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
239   b->row           = isirow;
240   b->col           = iscolf;
241   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
242   b->maxnz = b->nz = new_i[n];
243   ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
244   (*fact)->info.factor_mallocs = 0;
245 
246   ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
247 
248   /* check out for identical nodes. If found, use inode functions */
249   ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr);
250 
251   af = ((double)b->nz)/((double)a->nz) + .001;
252   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af);
253   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af);
254   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af);
255   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n");
256 
257   PetscFunctionReturn(0);
258 #endif
259 }
260 
261 /*
262     Factorization code for AIJ format.
263 */
264 #undef __FUNCT__
265 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
266 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
267 {
268   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
269   IS             isicol;
270   PetscErrorCode ierr;
271   PetscInt       *r,*ic,i,n = A->m,*ai = a->i,*aj = a->j;
272   PetscInt       *ainew,*ajnew,jmax,*fill,*ajtmp,nz;
273   PetscInt       *idnew,idx,row,m,fm,nnz,nzi,reallocs = 0,nzbd,*im;
274   PetscReal      f;
275 
276   PetscFunctionBegin;
277   if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
278   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
279   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
280   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
281 
282   /* get new row pointers */
283   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr);
284   ainew[0] = 0;
285   /* don't know how many column pointers are needed so estimate */
286   f = info->fill;
287   jmax  = (PetscInt)(f*ai[n]+1);
288   ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr);
289   /* fill is a linked list of nonzeros in active row */
290   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr);
291   im   = fill + n + 1;
292   /* idnew is location of diagonal in factor */
293   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&idnew);CHKERRQ(ierr);
294   idnew[0] = 0;
295 
296   for (i=0; i<n; i++) {
297     /* first copy previous fill into linked list */
298     nnz     = nz    = ai[r[i]+1] - ai[r[i]];
299     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
300     ajtmp   = aj + ai[r[i]];
301     fill[n] = n;
302     while (nz--) {
303       fm  = n;
304       idx = ic[*ajtmp++];
305       do {
306         m  = fm;
307         fm = fill[m];
308       } while (fm < idx);
309       fill[m]   = idx;
310       fill[idx] = fm;
311     }
312     row = fill[n];
313     while (row < i) {
314       ajtmp = ajnew + idnew[row] + 1;
315       nzbd  = 1 + idnew[row] - ainew[row];
316       nz    = im[row] - nzbd;
317       fm    = row;
318       while (nz-- > 0) {
319         idx = *ajtmp++ ;
320         nzbd++;
321         if (idx == i) im[row] = nzbd;
322         do {
323           m  = fm;
324           fm = fill[m];
325         } while (fm < idx);
326         if (fm != idx) {
327           fill[m]   = idx;
328           fill[idx] = fm;
329           fm        = idx;
330           nnz++;
331         }
332       }
333       row = fill[row];
334     }
335     /* copy new filled row into permanent storage */
336     ainew[i+1] = ainew[i] + nnz;
337     if (ainew[i+1] > jmax) {
338 
339       /* estimate how much additional space we will need */
340       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
341       /* just double the memory each time */
342       PetscInt maxadd = jmax;
343       /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */
344       if (maxadd < nnz) maxadd = (n-i)*(nnz+1);
345       jmax += maxadd;
346 
347       /* allocate a longer ajnew */
348       ierr  = PetscMalloc(jmax*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
349       ierr  = PetscMemcpy(ajtmp,ajnew,(ainew[i])*sizeof(PetscInt));CHKERRQ(ierr);
350       ierr  = PetscFree(ajnew);CHKERRQ(ierr);
351       ajnew = ajtmp;
352       reallocs++; /* count how many times we realloc */
353     }
354     ajtmp = ajnew + ainew[i];
355     fm    = fill[n];
356     nzi   = 0;
357     im[i] = nnz;
358     while (nnz--) {
359       if (fm < i) nzi++;
360       *ajtmp++ = fm ;
361       fm       = fill[fm];
362     }
363     idnew[i] = ainew[i] + nzi;
364   }
365   if (ai[n] != 0) {
366     PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
367     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
368     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af);
369     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af);
370     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n");
371   } else {
372     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n");
373   }
374 
375   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
376   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
377 
378   ierr = PetscFree(fill);CHKERRQ(ierr);
379 
380   /* put together the new matrix */
381   ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr);
382   ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr);
383   ierr = MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);CHKERRQ(ierr);
384   PetscLogObjectParent(*B,isicol);
385   b    = (Mat_SeqAIJ*)(*B)->data;
386   ierr = PetscFree(b->imax);CHKERRQ(ierr);
387   b->singlemalloc = PETSC_FALSE;
388   /* the next line frees the default space generated by the Create() */
389   ierr = PetscFree(b->a);CHKERRQ(ierr);
390   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
391   ierr          = PetscMalloc((ainew[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
392   b->j          = ajnew;
393   b->i          = ainew;
394   b->diag       = idnew;
395   b->ilen       = 0;
396   b->imax       = 0;
397   b->row        = isrow;
398   b->col        = iscol;
399   /*
400   b->lu_damping        = info->damping;
401   b->lu_zeropivot      = info->zeropivot;
402   b->lu_shift          = info->shift;
403   b->lu_shift_fraction = info->shift_fraction;
404   */
405   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
406   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
407   b->icol       = isicol;
408   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
409   /* In b structure:  Free imax, ilen, old a, old j.
410      Allocate idnew, solve_work, new a, new j */
411   PetscLogObjectMemory(*B,(ainew[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));
412   b->maxnz = b->nz = ainew[n] ;
413 
414   (*B)->factor                 =  FACTOR_LU;
415   (*B)->info.factor_mallocs    = reallocs;
416   (*B)->info.fill_ratio_given  = f;
417   ierr = Mat_AIJ_CheckInode(*B,PETSC_FALSE);CHKERRQ(ierr);
418   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
419 
420   if (ai[n] != 0) {
421     (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
422   } else {
423     (*B)->info.fill_ratio_needed = 0.0;
424   }
425   PetscFunctionReturn(0);
426 }
427 /* ----------------------------------------------------------- */
428 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth);
429 
430 #undef __FUNCT__
431 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
432 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
433 {
434   Mat            C=*B;
435   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
436   IS             isrow = b->row,isicol = b->icol;
437   PetscErrorCode ierr;
438   PetscInt       *r,*ic,i,j,n=A->m,*ai=b->i,*aj=b->j;
439   PetscInt       *ajtmpold,*ajtmp,nz,row,*ics;
440   PetscInt       *diag_offset = b->diag,diag,*pj,nshift=0;
441   PetscScalar    *rtmp,*v,*pc,multiplier,*pv,*rtmps;
442   PetscReal      zeropivot=b->lu_zeropivot,rs,d;
443   PetscReal      row_shift,shift_fraction,shift_amount,shift_lo=0.,shift_hi=1.,shift_top=0.;
444   PetscTruth     lushift;
445   PetscReal      shift_nonzero; /* shift_pd=b->lu_shift, =b->lu_damping; */
446   PetscTruth     shift_pd;
447 
448   PetscFunctionBegin;
449 
450   shift_nonzero  = info->shiftnz;
451   shift_pd       = info->shiftpd;
452   zeropivot      = info->zeropivot;
453   shift_fraction = info->shift_fraction;
454 
455   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
456   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
457   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
458   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
459   rtmps = rtmp; ics = ic;
460 
461   if (!a->diag) {
462     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
463   }
464   /* if both shift schemes are chosen by user, only use shift_pd */
465   if (shift_pd && shift_nonzero) shift_nonzero = 0.0;
466   if (shift_pd) { /* set shift_top=max{row_shift} */
467     PetscInt *aai = a->i,*ddiag = a->diag;
468     shift_top = 0;
469     for (i=0; i<n; i++) {
470       d = PetscAbsScalar((a->a)[ddiag[i]]);
471       /* calculate amt of shift needed for this row */
472       if (d<=0) {
473         row_shift = 0;
474       } else {
475         row_shift = -2*d;
476       }
477       v  = a->a+aai[i];
478       nz = aai[i+1] - aai[i];
479       for (j=0; j<nz; j++)
480 	row_shift += PetscAbsScalar(v[j]);
481       if (row_shift>shift_top) shift_top = row_shift;
482     }
483   }
484 
485   /* shift_fraction = b->lu_shift_fraction; */
486   shift_amount   = 0;
487   do {
488     lushift = PETSC_FALSE;
489     for (i=0; i<n; i++){
490       nz    = ai[i+1] - ai[i];
491       ajtmp = aj + ai[i];
492       for  (j=0; j<nz; j++) rtmps[ajtmp[j]] = 0.0;
493 
494       /* load in initial (unfactored row) */
495       nz       = a->i[r[i]+1] - a->i[r[i]];
496       ajtmpold = a->j + a->i[r[i]];
497       v        = a->a + a->i[r[i]];
498       for (j=0; j<nz; j++) {
499         rtmp[ics[ajtmpold[j]]] = v[j];
500       }
501       rtmp[ics[r[i]]] += shift_amount; /* shift the diagonal of the matrix */
502 
503       row = *ajtmp++;
504       while  (row < i) {
505         pc = rtmp + row;
506         if (*pc != 0.0) {
507           pv         = b->a + diag_offset[row];
508           pj         = b->j + diag_offset[row] + 1;
509           multiplier = *pc / *pv++;
510           *pc        = multiplier;
511           nz         = ai[row+1] - diag_offset[row] - 1;
512           for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
513           PetscLogFlops(2*nz);
514         }
515         row = *ajtmp++;
516       }
517       /* finished row so stick it into b->a */
518       pv   = b->a + ai[i] ;
519       pj   = b->j + ai[i] ;
520       nz   = ai[i+1] - ai[i];
521       diag = diag_offset[i] - ai[i];
522       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
523       rs   = 0.0;
524       for (j=0; j<nz; j++) {
525         pv[j] = rtmps[pj[j]];
526         if (j != diag) rs += PetscAbsScalar(pv[j]);
527       }
528       /* shift the diagonals when zero pivot is detected */
529 #define MAX_NSHIFT 5
530       if (PetscAbsScalar(pv[diag]) <= zeropivot*rs && shift_nonzero){
531         /* force |diag(*B)| > zeropivot*rs */
532         if (!nshift){
533           shift_amount = shift_nonzero;
534         } else {
535           shift_amount *= 2.0;
536         }
537         lushift = PETSC_TRUE;
538         nshift++;
539         break;
540       } else if (PetscRealPart(pv[diag]) <= zeropivot*rs && shift_pd){
541         /* force *B to be diagonally dominant */
542 	if (nshift>MAX_NSHIFT) {
543 	  SETERRQ(PETSC_ERR_CONV_FAILED,"Unable to determine shift to enforce positive definite preconditioner");
544 	} else if (nshift==MAX_NSHIFT) {
545 	  shift_fraction = shift_hi;
546 	  lushift        = PETSC_FALSE;
547 	} else {
548 	  shift_lo = shift_fraction; shift_fraction = (shift_hi+shift_lo)/2.;
549 	  lushift  = PETSC_TRUE;
550 	}
551 	shift_amount = shift_fraction * shift_top;
552         nshift++;
553         break;
554       } else if (PetscAbsScalar(pv[diag]) <= zeropivot*rs){
555         SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(pv[diag]),zeropivot,rs);
556       }
557     }
558     if (shift_pd && !lushift && shift_fraction>0 && nshift<MAX_NSHIFT) {
559       /*
560        * if no shift in this attempt & shifting & started shifting & can refine,
561        * then try lower shift
562        */
563       shift_hi       = shift_fraction;
564       shift_fraction = (shift_hi+shift_lo)/2.;
565       shift_amount   = shift_fraction * shift_top;
566       lushift        = PETSC_TRUE;
567       nshift++;
568     }
569   } while (lushift);
570 
571   /* invert diagonal entries for simplier triangular solves */
572   for (i=0; i<n; i++) {
573     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
574   }
575 
576   ierr = PetscFree(rtmp);CHKERRQ(ierr);
577   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
578   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
579   C->factor = FACTOR_LU;
580   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
581   C->assembled = PETSC_TRUE;
582   PetscLogFlops(C->n);
583   if (nshift){
584     if (shift_nonzero) {
585       PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of shift_nonzero tries %D, shift_ amount %g\n",nshift,shift_amount);
586     } else if (shift_pd) {
587       b->lu_shift_fraction = shift_fraction;
588       PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: diagonal shifted up by %e fraction top_value %e number shifts %D\n",shift_fraction,shift_top,nshift);
589     }
590   }
591   PetscFunctionReturn(0);
592 }
593 
594 #undef __FUNCT__
595 #define __FUNCT__ "MatUsePETSc_SeqAIJ"
596 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A)
597 {
598   PetscFunctionBegin;
599   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
600   A->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
601   PetscFunctionReturn(0);
602 }
603 
604 
605 /* ----------------------------------------------------------- */
606 #undef __FUNCT__
607 #define __FUNCT__ "MatLUFactor_SeqAIJ"
608 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
609 {
610   PetscErrorCode ierr;
611   Mat            C;
612 
613   PetscFunctionBegin;
614   ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr);
615   ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr);
616   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
617   PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);
618   PetscFunctionReturn(0);
619 }
620 /* ----------------------------------------------------------- */
621 #undef __FUNCT__
622 #define __FUNCT__ "MatSolve_SeqAIJ"
623 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
624 {
625   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
626   IS             iscol = a->col,isrow = a->row;
627   PetscErrorCode ierr;
628   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
629   PetscInt       nz,*rout,*cout;
630   PetscScalar    *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
631 
632   PetscFunctionBegin;
633   if (!n) PetscFunctionReturn(0);
634 
635   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
636   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
637   tmp  = a->solve_work;
638 
639   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
640   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
641 
642   /* forward solve the lower triangular */
643   tmp[0] = b[*r++];
644   tmps   = tmp;
645   for (i=1; i<n; i++) {
646     v   = aa + ai[i] ;
647     vi  = aj + ai[i] ;
648     nz  = a->diag[i] - ai[i];
649     sum = b[*r++];
650     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
651     tmp[i] = sum;
652   }
653 
654   /* backward solve the upper triangular */
655   for (i=n-1; i>=0; i--){
656     v   = aa + a->diag[i] + 1;
657     vi  = aj + a->diag[i] + 1;
658     nz  = ai[i+1] - a->diag[i] - 1;
659     sum = tmp[i];
660     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
661     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
662   }
663 
664   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
665   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
666   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
667   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
668   PetscLogFlops(2*a->nz - A->n);
669   PetscFunctionReturn(0);
670 }
671 
672 /* ----------------------------------------------------------- */
673 #undef __FUNCT__
674 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
675 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
676 {
677   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
678   PetscErrorCode ierr;
679   PetscInt       n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag;
680   PetscScalar    *x,*b,*aa = a->a;
681 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
682   PetscInt       adiag_i,i,*vi,nz,ai_i;
683   PetscScalar    *v,sum;
684 #endif
685 
686   PetscFunctionBegin;
687   if (!n) PetscFunctionReturn(0);
688 
689   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
690   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
691 
692 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
693   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
694 #else
695   /* forward solve the lower triangular */
696   x[0] = b[0];
697   for (i=1; i<n; i++) {
698     ai_i = ai[i];
699     v    = aa + ai_i;
700     vi   = aj + ai_i;
701     nz   = adiag[i] - ai_i;
702     sum  = b[i];
703     while (nz--) sum -= *v++ * x[*vi++];
704     x[i] = sum;
705   }
706 
707   /* backward solve the upper triangular */
708   for (i=n-1; i>=0; i--){
709     adiag_i = adiag[i];
710     v       = aa + adiag_i + 1;
711     vi      = aj + adiag_i + 1;
712     nz      = ai[i+1] - adiag_i - 1;
713     sum     = x[i];
714     while (nz--) sum -= *v++ * x[*vi++];
715     x[i]    = sum*aa[adiag_i];
716   }
717 #endif
718   PetscLogFlops(2*a->nz - A->n);
719   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
720   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
721   PetscFunctionReturn(0);
722 }
723 
724 #undef __FUNCT__
725 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
726 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
727 {
728   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
729   IS             iscol = a->col,isrow = a->row;
730   PetscErrorCode ierr;
731   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
732   PetscInt       nz,*rout,*cout;
733   PetscScalar    *x,*b,*tmp,*aa = a->a,sum,*v;
734 
735   PetscFunctionBegin;
736   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
737 
738   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
739   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
740   tmp  = a->solve_work;
741 
742   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
743   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
744 
745   /* forward solve the lower triangular */
746   tmp[0] = b[*r++];
747   for (i=1; i<n; i++) {
748     v   = aa + ai[i] ;
749     vi  = aj + ai[i] ;
750     nz  = a->diag[i] - ai[i];
751     sum = b[*r++];
752     while (nz--) sum -= *v++ * tmp[*vi++ ];
753     tmp[i] = sum;
754   }
755 
756   /* backward solve the upper triangular */
757   for (i=n-1; i>=0; i--){
758     v   = aa + a->diag[i] + 1;
759     vi  = aj + a->diag[i] + 1;
760     nz  = ai[i+1] - a->diag[i] - 1;
761     sum = tmp[i];
762     while (nz--) sum -= *v++ * tmp[*vi++ ];
763     tmp[i] = sum*aa[a->diag[i]];
764     x[*c--] += tmp[i];
765   }
766 
767   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
768   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
769   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
770   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
771   PetscLogFlops(2*a->nz);
772 
773   PetscFunctionReturn(0);
774 }
775 /* -------------------------------------------------------------------*/
776 #undef __FUNCT__
777 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
778 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
779 {
780   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
781   IS             iscol = a->col,isrow = a->row;
782   PetscErrorCode ierr;
783   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
784   PetscInt       nz,*rout,*cout,*diag = a->diag;
785   PetscScalar    *x,*b,*tmp,*aa = a->a,*v,s1;
786 
787   PetscFunctionBegin;
788   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
789   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
790   tmp  = a->solve_work;
791 
792   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
793   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
794 
795   /* copy the b into temp work space according to permutation */
796   for (i=0; i<n; i++) tmp[i] = b[c[i]];
797 
798   /* forward solve the U^T */
799   for (i=0; i<n; i++) {
800     v   = aa + diag[i] ;
801     vi  = aj + diag[i] + 1;
802     nz  = ai[i+1] - diag[i] - 1;
803     s1  = tmp[i];
804     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
805     while (nz--) {
806       tmp[*vi++ ] -= (*v++)*s1;
807     }
808     tmp[i] = s1;
809   }
810 
811   /* backward solve the L^T */
812   for (i=n-1; i>=0; i--){
813     v   = aa + diag[i] - 1 ;
814     vi  = aj + diag[i] - 1 ;
815     nz  = diag[i] - ai[i];
816     s1  = tmp[i];
817     while (nz--) {
818       tmp[*vi-- ] -= (*v--)*s1;
819     }
820   }
821 
822   /* copy tmp into x according to permutation */
823   for (i=0; i<n; i++) x[r[i]] = tmp[i];
824 
825   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
826   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
827   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
828   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
829 
830   PetscLogFlops(2*a->nz-A->n);
831   PetscFunctionReturn(0);
832 }
833 
834 #undef __FUNCT__
835 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
836 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
837 {
838   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
839   IS             iscol = a->col,isrow = a->row;
840   PetscErrorCode ierr;
841   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
842   PetscInt       nz,*rout,*cout,*diag = a->diag;
843   PetscScalar    *x,*b,*tmp,*aa = a->a,*v;
844 
845   PetscFunctionBegin;
846   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
847 
848   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
849   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
850   tmp = a->solve_work;
851 
852   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
853   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
854 
855   /* copy the b into temp work space according to permutation */
856   for (i=0; i<n; i++) tmp[i] = b[c[i]];
857 
858   /* forward solve the U^T */
859   for (i=0; i<n; i++) {
860     v   = aa + diag[i] ;
861     vi  = aj + diag[i] + 1;
862     nz  = ai[i+1] - diag[i] - 1;
863     tmp[i] *= *v++;
864     while (nz--) {
865       tmp[*vi++ ] -= (*v++)*tmp[i];
866     }
867   }
868 
869   /* backward solve the L^T */
870   for (i=n-1; i>=0; i--){
871     v   = aa + diag[i] - 1 ;
872     vi  = aj + diag[i] - 1 ;
873     nz  = diag[i] - ai[i];
874     while (nz--) {
875       tmp[*vi-- ] -= (*v--)*tmp[i];
876     }
877   }
878 
879   /* copy tmp into x according to permutation */
880   for (i=0; i<n; i++) x[r[i]] += tmp[i];
881 
882   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
883   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
884   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
885   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
886 
887   PetscLogFlops(2*a->nz);
888   PetscFunctionReturn(0);
889 }
890 /* ----------------------------------------------------------------*/
891 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat);
892 
893 #undef __FUNCT__
894 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
895 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
896 {
897   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
898   IS             isicol;
899   PetscErrorCode ierr;
900   PetscInt       *r,*ic,prow,n = A->m,*ai = a->i,*aj = a->j;
901   PetscInt       *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev;
902   PetscInt       *dloc,idx,row,m,fm,nzf,nzi,len, reallocs = 0,dcount = 0;
903   PetscInt       incrlev,nnz,i,levels,diagonal_fill;
904   PetscTruth     col_identity,row_identity;
905   PetscReal      f;
906 
907   PetscFunctionBegin;
908   f             = info->fill;
909   levels        = (PetscInt)info->levels;
910   diagonal_fill = (PetscInt)info->diagonal_fill;
911   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
912 
913   /* special case that simply copies fill pattern */
914   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
915   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
916   if (!levels && row_identity && col_identity) {
917     ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
918     (*fact)->factor = FACTOR_LU;
919     b               = (Mat_SeqAIJ*)(*fact)->data;
920     if (!b->diag) {
921       ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
922     }
923     ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
924     b->row              = isrow;
925     b->col              = iscol;
926     b->icol             = isicol;
927     /*
928     b->lu_damping       = info->damping;
929     b->lu_zeropivot     = info->zeropivot;
930     b->lu_shift         = info->shift;
931     b->lu_shift_fraction= info->shift_fraction;
932     */
933     ierr                = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
934     (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
935     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
936     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
937     PetscFunctionReturn(0);
938   }
939 
940   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
941   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
942 
943   /* get new row pointers */
944   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr);
945   ainew[0] = 0;
946   /* don't know how many column pointers are needed so estimate */
947   jmax = (PetscInt)(f*(ai[n]+1));
948   ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr);
949   /* ajfill is level of fill for each fill entry */
950   ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajfill);CHKERRQ(ierr);
951   /* fill is a linked list of nonzeros in active row */
952   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr);
953   /* im is level for each filled value */
954   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&im);CHKERRQ(ierr);
955   /* dloc is location of diagonal in factor */
956   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&dloc);CHKERRQ(ierr);
957   dloc[0]  = 0;
958   for (prow=0; prow<n; prow++) {
959 
960     /* copy current row into linked list */
961     nzf     = nz  = ai[r[prow]+1] - ai[r[prow]];
962     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
963     xi      = aj + ai[r[prow]];
964     fill[n] = n;
965     fill[prow] = -1; /* marker to indicate if diagonal exists */
966     while (nz--) {
967       fm  = n;
968       idx = ic[*xi++ ];
969       do {
970         m  = fm;
971         fm = fill[m];
972       } while (fm < idx);
973       fill[m]   = idx;
974       fill[idx] = fm;
975       im[idx]   = 0;
976     }
977 
978     /* make sure diagonal entry is included */
979     if (diagonal_fill && fill[prow] == -1) {
980       fm = n;
981       while (fill[fm] < prow) fm = fill[fm];
982       fill[prow] = fill[fm]; /* insert diagonal into linked list */
983       fill[fm]   = prow;
984       im[prow]   = 0;
985       nzf++;
986       dcount++;
987     }
988 
989     nzi = 0;
990     row = fill[n];
991     while (row < prow) {
992       incrlev = im[row] + 1;
993       nz      = dloc[row];
994       xi      = ajnew  + ainew[row]  + nz + 1;
995       flev    = ajfill + ainew[row]  + nz + 1;
996       nnz     = ainew[row+1] - ainew[row] - nz - 1;
997       fm      = row;
998       while (nnz-- > 0) {
999         idx = *xi++ ;
1000         if (*flev + incrlev > levels) {
1001           flev++;
1002           continue;
1003         }
1004         do {
1005           m  = fm;
1006           fm = fill[m];
1007         } while (fm < idx);
1008         if (fm != idx) {
1009           im[idx]   = *flev + incrlev;
1010           fill[m]   = idx;
1011           fill[idx] = fm;
1012           fm        = idx;
1013           nzf++;
1014         } else {
1015           if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev;
1016         }
1017         flev++;
1018       }
1019       row = fill[row];
1020       nzi++;
1021     }
1022     /* copy new filled row into permanent storage */
1023     ainew[prow+1] = ainew[prow] + nzf;
1024     if (ainew[prow+1] > jmax) {
1025 
1026       /* estimate how much additional space we will need */
1027       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
1028       /* just double the memory each time */
1029       /*  maxadd = (PetscInt)((f*(ai[n]+!shift)*(n-prow+5))/n); */
1030       PetscInt maxadd = jmax;
1031       if (maxadd < nzf) maxadd = (n-prow)*(nzf+1);
1032       jmax += maxadd;
1033 
1034       /* allocate a longer ajnew and ajfill */
1035       ierr   = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr);
1036       ierr   = PetscMemcpy(xi,ajnew,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr);
1037       ierr   = PetscFree(ajnew);CHKERRQ(ierr);
1038       ajnew  = xi;
1039       ierr   = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr);
1040       ierr   = PetscMemcpy(xi,ajfill,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr);
1041       ierr   = PetscFree(ajfill);CHKERRQ(ierr);
1042       ajfill = xi;
1043       reallocs++; /* count how many times we realloc */
1044     }
1045     xi          = ajnew + ainew[prow] ;
1046     flev        = ajfill + ainew[prow] ;
1047     dloc[prow]  = nzi;
1048     fm          = fill[n];
1049     while (nzf--) {
1050       *xi++   = fm ;
1051       *flev++ = im[fm];
1052       fm      = fill[fm];
1053     }
1054     /* make sure row has diagonal entry */
1055     if (ajnew[ainew[prow]+dloc[prow]] != prow) {
1056       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
1057     try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow);
1058     }
1059   }
1060   ierr = PetscFree(ajfill);CHKERRQ(ierr);
1061   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1062   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1063   ierr = PetscFree(fill);CHKERRQ(ierr);
1064   ierr = PetscFree(im);CHKERRQ(ierr);
1065 
1066   {
1067     PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
1068     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
1069     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af);
1070     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af);
1071     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n");
1072     if (diagonal_fill) {
1073       PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount);
1074     }
1075   }
1076 
1077   /* put together the new matrix */
1078   ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr);
1079   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
1080   ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr);
1081   PetscLogObjectParent(*fact,isicol);
1082   b = (Mat_SeqAIJ*)(*fact)->data;
1083   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1084   b->singlemalloc = PETSC_FALSE;
1085   len = (ainew[n] )*sizeof(PetscScalar);
1086   /* the next line frees the default space generated by the Create() */
1087   ierr = PetscFree(b->a);CHKERRQ(ierr);
1088   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1089   ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr);
1090   b->j          = ajnew;
1091   b->i          = ainew;
1092   for (i=0; i<n; i++) dloc[i] += ainew[i];
1093   b->diag       = dloc;
1094   b->ilen       = 0;
1095   b->imax       = 0;
1096   b->row        = isrow;
1097   b->col        = iscol;
1098   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1099   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1100   b->icol       = isicol;
1101   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1102   /* In b structure:  Free imax, ilen, old a, old j.
1103      Allocate dloc, solve_work, new a, new j */
1104   PetscLogObjectMemory(*fact,(ainew[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));
1105   b->maxnz             = b->nz = ainew[n] ;
1106   /*
1107   b->lu_damping        = info->damping;
1108   b->lu_shift          = info->shift;
1109   b->lu_shift_fraction = info->shift_fraction;
1110   b->lu_zeropivot = info->zeropivot;
1111   */
1112   (*fact)->factor = FACTOR_LU;
1113   ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr);
1114   (*fact)->ops->lufactornumeric =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
1115 
1116   (*fact)->info.factor_mallocs    = reallocs;
1117   (*fact)->info.fill_ratio_given  = f;
1118   (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]);
1119   PetscFunctionReturn(0);
1120 }
1121 
1122 #include "src/mat/impls/sbaij/seq/sbaij.h"
1123 #undef __FUNCT__
1124 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
1125 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
1126 {
1127   Mat            C = *B;
1128   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1129   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
1130   IS             ip=b->row;
1131   PetscErrorCode ierr;
1132   PetscInt       *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol;
1133   PetscInt       *ai=a->i,*aj=a->j;
1134   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1135   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1136   PetscReal      zeropivot=b->factor_zeropivot,shift_amount,rs;
1137   PetscTruth     chshift;
1138   PetscInt       nshift=0;
1139   PetscReal      shift_pd=b->factor_shift,shift_nonzero=b->factor_damping;
1140 
1141   PetscFunctionBegin;
1142   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
1143 
1144   /* initialization */
1145   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1146   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
1147   jl   = il + mbs;
1148   rtmp = (MatScalar*)(jl + mbs);
1149 
1150   shift_amount = 0;
1151   do {
1152     chshift = PETSC_FALSE;
1153     for (i=0; i<mbs; i++) {
1154       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1155     }
1156 
1157     for (k = 0; k<mbs; k++){
1158       bval = ba + bi[k];
1159       /* initialize k-th row by the perm[k]-th row of A */
1160       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1161       for (j = jmin; j < jmax; j++){
1162         col = rip[aj[j]];
1163         if (col >= k){ /* only take upper triangular entry */
1164           rtmp[col] = aa[j];
1165           *bval++  = 0.0; /* for in-place factorization */
1166         }
1167       }
1168       /* shift the diagonal of the matrix */
1169       if (nshift) rtmp[k] += shift_amount;
1170 
1171       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1172       dk = rtmp[k];
1173       i = jl[k]; /* first row to be added to k_th row  */
1174 
1175       while (i < k){
1176         nexti = jl[i]; /* next row to be added to k_th row */
1177 
1178         /* compute multiplier, update diag(k) and U(i,k) */
1179         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1180         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
1181         dk += uikdi*ba[ili];
1182         ba[ili] = uikdi; /* -U(i,k) */
1183 
1184         /* add multiple of row i to k-th row */
1185         jmin = ili + 1; jmax = bi[i+1];
1186         if (jmin < jmax){
1187           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1188           /* update il and jl for row i */
1189           il[i] = jmin;
1190           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1191         }
1192         i = nexti;
1193       }
1194 
1195       /* shift the diagonals when zero pivot is detected */
1196       /* compute rs=sum of abs(off-diagonal) */
1197       rs   = 0.0;
1198       jmin = bi[k]+1;
1199       nz   = bi[k+1] - jmin;
1200       if (nz){
1201         bcol = bj + jmin;
1202         while (nz--){
1203           rs += PetscAbsScalar(rtmp[*bcol++]);
1204         }
1205       }
1206       if (PetscAbsScalar(dk) <= zeropivot*rs && shift_nonzero){
1207         /* force |diag(*B)| > zeropivot*rs */
1208         if (!nshift){
1209           shift_amount = shift_nonzero;
1210         } else {
1211           shift_amount *= 2.0;
1212         }
1213         chshift = PETSC_TRUE;
1214         nshift++;
1215         break;
1216       } else if (PetscRealPart(dk) <= zeropivot*rs && shift_pd){
1217         /* calculate a shift that would make this row diagonally dominant */
1218 	shift_amount = PetscMax(rs+PetscAbs(PetscRealPart(dk)),1.1*shift_amount);
1219 	chshift      = PETSC_TRUE;
1220 	/* Unlike in the ILU case there is no exit condition on nshift:
1221 	   we increase the shift until it converges. There is no guarantee that
1222 	   this algorithm converges faster or slower, or is better or worse
1223 	   than the ILU algorithm. */
1224 	nshift++;
1225 	break;
1226       } else if (PetscAbsScalar(dk) <= zeropivot*rs){
1227         SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs);
1228       }
1229 
1230       /* copy data into U(k,:) */
1231       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1232       jmin = bi[k]+1; jmax = bi[k+1];
1233       if (jmin < jmax) {
1234         for (j=jmin; j<jmax; j++){
1235           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1236         }
1237         /* add the k-th row into il and jl */
1238         il[k] = jmin;
1239         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1240       }
1241     }
1242   } while (chshift);
1243   ierr = PetscFree(il);CHKERRQ(ierr);
1244 
1245   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
1246   C->factor       = FACTOR_CHOLESKY;
1247   C->assembled    = PETSC_TRUE;
1248   C->preallocated = PETSC_TRUE;
1249   PetscLogFlops(C->m);
1250   if (nshift){
1251     if (shift_nonzero) {
1252       PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shift_nonzero tries %D, shift_amount %g\n",nshift,shift_amount);
1253     } else if (shift_pd) {
1254       PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shift_pd tries %D, shift_amount %g\n",nshift,shift_amount);
1255     }
1256   }
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 #include "petscbt.h"
1261 #include "src/mat/utils/freespace.h"
1262 #undef __FUNCT__
1263 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
1264 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1265 {
1266   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1267   Mat_SeqSBAIJ   *b;
1268   Mat            B;
1269   PetscErrorCode ierr;
1270   PetscTruth     perm_identity;
1271   PetscInt       reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui;
1272   PetscInt       jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1273   PetscInt       nlnk,*lnk,*lnk_lvl,ncols,ncols_upper,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr;
1274   PetscReal      fill=info->fill,levels=info->levels;
1275   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1276   FreeSpaceList  free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1277   PetscBT        lnkbt;
1278 
1279   PetscFunctionBegin;
1280   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1281   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1282 
1283   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1284   ui[0] = 0;
1285 
1286   /* special case that simply copies fill pattern */
1287   if (!levels && perm_identity) {
1288     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
1289     for (i=0; i<am; i++) {
1290       ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1291     }
1292     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1293     cols = uj;
1294     for (i=0; i<am; i++) {
1295       aj    = a->j + a->diag[i];
1296       ncols = ui[i+1] - ui[i];
1297       for (j=0; j<ncols; j++) *cols++ = *aj++;
1298     }
1299   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1300     /* initialization */
1301     ierr  = PetscMalloc((2*am+1)*sizeof(PetscInt),&cols_lvl);CHKERRQ(ierr);
1302 
1303     /* jl: linked list for storing indices of the pivot rows
1304        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1305     ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr);
1306     il         = jl + am;
1307     uj_ptr     = (PetscInt**)(il + am);
1308     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1309     for (i=0; i<am; i++){
1310       jl[i] = am; il[i] = 0;
1311     }
1312 
1313     /* create and initialize a linked list for storing column indices of the active row k */
1314     nlnk = am + 1;
1315     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1316 
1317     /* initial FreeSpace size is fill*(ai[am]+1) */
1318     ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1319     current_space = free_space;
1320     ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
1321     current_space_lvl = free_space_lvl;
1322 
1323     for (k=0; k<am; k++){  /* for each active row k */
1324       /* initialize lnk by the column indices of row rip[k] of A */
1325       nzk   = 0;
1326       ncols = ai[rip[k]+1] - ai[rip[k]];
1327       ncols_upper = 0;
1328       cols        = cols_lvl + am;
1329       for (j=0; j<ncols; j++){
1330         i = rip[*(aj + ai[rip[k]] + j)];
1331         if (i >= k){ /* only take upper triangular entry */
1332           cols[ncols_upper] = i;
1333           cols_lvl[ncols_upper] = -1;  /* initialize level for nonzero entries */
1334           ncols_upper++;
1335         }
1336       }
1337       ierr = PetscIncompleteLLAdd(ncols_upper,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1338       nzk += nlnk;
1339 
1340       /* update lnk by computing fill-in for each pivot row to be merged in */
1341       prow = jl[k]; /* 1st pivot row */
1342 
1343       while (prow < k){
1344         nextprow = jl[prow];
1345 
1346         /* merge prow into k-th row */
1347         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1348         jmax = ui[prow+1];
1349         ncols = jmax-jmin;
1350         i     = jmin - ui[prow];
1351         cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1352         for (j=0; j<ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + i + j);
1353         ierr = PetscIncompleteLLAdd(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1354         nzk += nlnk;
1355 
1356         /* update il and jl for prow */
1357         if (jmin < jmax){
1358           il[prow] = jmin;
1359           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1360         }
1361         prow = nextprow;
1362       }
1363 
1364       /* if free space is not available, make more free space */
1365       if (current_space->local_remaining<nzk) {
1366         i = am - k + 1; /* num of unfactored rows */
1367         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1368         ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
1369         ierr = GetMoreSpace(i,&current_space_lvl);CHKERRQ(ierr);
1370         reallocs++;
1371       }
1372 
1373       /* copy data into free_space and free_space_lvl, then initialize lnk */
1374       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1375 
1376       /* add the k-th row into il and jl */
1377       if (nzk > 1){
1378         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1379         jl[k] = jl[i]; jl[i] = k;
1380         il[k] = ui[k] + 1;
1381       }
1382       uj_ptr[k]     = current_space->array;
1383       uj_lvl_ptr[k] = current_space_lvl->array;
1384 
1385       current_space->array           += nzk;
1386       current_space->local_used      += nzk;
1387       current_space->local_remaining -= nzk;
1388 
1389       current_space_lvl->array           += nzk;
1390       current_space_lvl->local_used      += nzk;
1391       current_space_lvl->local_remaining -= nzk;
1392 
1393       ui[k+1] = ui[k] + nzk;
1394     }
1395 
1396     if (ai[am] != 0) {
1397       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1398       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af);
1399       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af);
1400       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af);
1401     } else {
1402       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n");
1403     }
1404 
1405     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1406     ierr = PetscFree(jl);CHKERRQ(ierr);
1407     ierr = PetscFree(cols_lvl);CHKERRQ(ierr);
1408 
1409     /* destroy list of free space and other temporary array(s) */
1410     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1411     ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1412     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1413     ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr);
1414 
1415   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
1416 
1417   /* put together the new matrix in MATSEQSBAIJ format */
1418   ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr);
1419   B = *fact;
1420   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1421   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
1422 
1423   b    = (Mat_SeqSBAIJ*)B->data;
1424   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1425   b->singlemalloc = PETSC_FALSE;
1426   /* the next line frees the default space generated by the Create() */
1427   ierr = PetscFree(b->a);CHKERRQ(ierr);
1428   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1429   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1430   b->j    = uj;
1431   b->i    = ui;
1432   b->diag = 0;
1433   b->ilen = 0;
1434   b->imax = 0;
1435   b->row  = perm;
1436   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1437   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1438   b->icol = perm;
1439   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1440   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1441   PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
1442   b->maxnz = b->nz = ui[am];
1443 
1444   B->factor                 = FACTOR_CHOLESKY;
1445   B->info.factor_mallocs    = reallocs;
1446   B->info.fill_ratio_given  = fill;
1447   if (ai[am] != 0) {
1448     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1449   } else {
1450     B->info.fill_ratio_needed = 0.0;
1451   }
1452   /*
1453   b->factor_zeropivot      = info->zeropivot;
1454   b->factor_damping        = info->damping;
1455   b->factor_shift          = info->shift;
1456   */
1457   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1458   if (perm_identity){
1459     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1460     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1461   }
1462   PetscFunctionReturn(0);
1463 }
1464 
1465 #undef __FUNCT__
1466 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
1467 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1468 {
1469   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1470   Mat_SeqSBAIJ   *b;
1471   Mat            B;
1472   PetscErrorCode ierr;
1473   PetscTruth     perm_identity;
1474   PetscReal      fill = info->fill;
1475   PetscInt       *rip,*riip,i,mbs=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow;
1476   PetscInt       *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1477   PetscInt       nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1478   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1479   PetscBT        lnkbt;
1480   IS             iperm;
1481 
1482   PetscFunctionBegin;
1483   /* check whether perm is the identity mapping */
1484   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1485   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1486 
1487   if (!perm_identity){
1488     /* check if perm is symmetric! */
1489     ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1490     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1491     for (i=0; i<mbs; i++) {
1492       if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation");
1493     }
1494     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1495     ierr = ISDestroy(iperm);CHKERRQ(ierr);
1496   }
1497 
1498   /* initialization */
1499   ierr  = PetscMalloc((mbs+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1500   ui[0] = 0;
1501 
1502   /* jl: linked list for storing indices of the pivot rows
1503      il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
1504   ierr = PetscMalloc((3*mbs+1)*sizeof(PetscInt)+mbs*sizeof(PetscInt*),&jl);CHKERRQ(ierr);
1505   il     = jl + mbs;
1506   cols   = il + mbs;
1507   ui_ptr = (PetscInt**)(cols + mbs);
1508   for (i=0; i<mbs; i++){
1509     jl[i] = mbs; il[i] = 0;
1510   }
1511 
1512   /* create and initialize a linked list for storing column indices of the active row k */
1513   nlnk = mbs + 1;
1514   ierr = PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1515 
1516   /* initial FreeSpace size is fill*(ai[mbs]+1) */
1517   ierr = GetMoreSpace((PetscInt)(fill*(ai[mbs]+1)),&free_space);CHKERRQ(ierr);
1518   current_space = free_space;
1519 
1520   for (k=0; k<mbs; k++){  /* for each active row k */
1521     /* initialize lnk by the column indices of row rip[k] of A */
1522     nzk   = 0;
1523     ncols = ai[rip[k]+1] - ai[rip[k]];
1524     ncols_upper = 0;
1525     for (j=0; j<ncols; j++){
1526       i = rip[*(aj + ai[rip[k]] + j)];
1527       if (i >= k){ /* only take upper triangular entry */
1528         cols[ncols_upper] = i;
1529         ncols_upper++;
1530       }
1531     }
1532     ierr = PetscLLAdd(ncols_upper,cols,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1533     nzk += nlnk;
1534 
1535     /* update lnk by computing fill-in for each pivot row to be merged in */
1536     prow = jl[k]; /* 1st pivot row */
1537 
1538     while (prow < k){
1539       nextprow = jl[prow];
1540       /* merge prow into k-th row */
1541       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
1542       jmax = ui[prow+1];
1543       ncols = jmax-jmin;
1544       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
1545       ierr = PetscLLAdd(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1546       nzk += nlnk;
1547 
1548       /* update il and jl for prow */
1549       if (jmin < jmax){
1550         il[prow] = jmin;
1551         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1552       }
1553       prow = nextprow;
1554     }
1555 
1556     /* if free space is not available, make more free space */
1557     if (current_space->local_remaining<nzk) {
1558       i = mbs - k + 1; /* num of unfactored rows */
1559       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1560       ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
1561       reallocs++;
1562     }
1563 
1564     /* copy data into free space, then initialize lnk */
1565     ierr = PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1566 
1567     /* add the k-th row into il and jl */
1568     if (nzk-1 > 0){
1569       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
1570       jl[k] = jl[i]; jl[i] = k;
1571       il[k] = ui[k] + 1;
1572     }
1573     ui_ptr[k] = current_space->array;
1574     current_space->array           += nzk;
1575     current_space->local_used      += nzk;
1576     current_space->local_remaining -= nzk;
1577 
1578     ui[k+1] = ui[k] + nzk;
1579   }
1580 
1581   if (ai[mbs] != 0) {
1582     PetscReal af = ((PetscReal)(2*ui[mbs]-mbs))/((PetscReal)ai[mbs]);
1583     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af);
1584     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af);
1585     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af);
1586   } else {
1587      PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n");
1588   }
1589 
1590   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1591   ierr = PetscFree(jl);CHKERRQ(ierr);
1592 
1593   /* destroy list of free space and other temporary array(s) */
1594   ierr = PetscMalloc((ui[mbs]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1595   ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1596   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1597 
1598   /* put together the new matrix in MATSEQSBAIJ format */
1599   ierr = MatCreate(PETSC_COMM_SELF,mbs,mbs,mbs,mbs,fact);CHKERRQ(ierr);
1600   B    = *fact;
1601   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1602   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
1603 
1604   b = (Mat_SeqSBAIJ*)B->data;
1605   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1606   b->singlemalloc = PETSC_FALSE;
1607   /* the next line frees the default space generated by the Create() */
1608   ierr = PetscFree(b->a);CHKERRQ(ierr);
1609   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1610   ierr = PetscMalloc((ui[mbs]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1611   b->j    = uj;
1612   b->i    = ui;
1613   b->diag = 0;
1614   b->ilen = 0;
1615   b->imax = 0;
1616   b->row  = perm;
1617   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1618   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1619   b->icol = perm;
1620   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1621   ierr    = PetscMalloc((mbs+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1622   PetscLogObjectMemory(B,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));
1623   b->maxnz = b->nz = ui[mbs];
1624 
1625   B->factor                 = FACTOR_CHOLESKY;
1626   B->info.factor_mallocs    = reallocs;
1627   B->info.fill_ratio_given  = fill;
1628   if (ai[mbs] != 0) {
1629     B->info.fill_ratio_needed = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]);
1630   } else {
1631     B->info.fill_ratio_needed = 0.0;
1632   }
1633   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1634   if (perm_identity){
1635     (*fact)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1636     (*fact)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1637   }
1638   PetscFunctionReturn(0);
1639 }
1640