xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision 0697b6cae9af508b3938b6fbd8a1890c7adeb4de)
1 
2 #include "src/mat/impls/aij/seq/aij.h"
3 #include "src/inline/dot.h"
4 #include "src/inline/spops.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ"
8 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
9 {
10   PetscFunctionBegin;
11 
12   SETERRQ(PETSC_ERR_SUP,"Code not written");
13 #if !defined(PETSC_USE_DEBUG)
14   PetscFunctionReturn(0);
15 #endif
16 }
17 
18 
19 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat);
20 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth);
21 
22 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
23 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
24 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*);
25 
26 #undef __FUNCT__
27 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
28   /* ------------------------------------------------------------
29 
30           This interface was contribed by Tony Caola
31 
32      This routine is an interface to the pivoting drop-tolerance
33      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
34      SPARSEKIT2.
35 
36      The SPARSEKIT2 routines used here are covered by the GNU
37      copyright; see the file gnu in this directory.
38 
39      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
40      help in getting this routine ironed out.
41 
42      The major drawback to this routine is that if info->fill is
43      not large enough it fails rather than allocating more space;
44      this can be fixed by hacking/improving the f2c version of
45      Yousef Saad's code.
46 
47      ------------------------------------------------------------
48 */
49 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact)
50 {
51 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
52   PetscFunctionBegin;
53   SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
54   You can obtain the drop tolerance routines by installing PETSc from\n\
55   www.mcs.anl.gov/petsc\n");
56 #else
57   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
58   IS             iscolf,isicol,isirow;
59   PetscTruth     reorder;
60   PetscErrorCode ierr,sierr;
61   PetscInt       *c,*r,*ic,i,n = A->m;
62   PetscInt       *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
63   PetscInt       *ordcol,*iwk,*iperm,*jw;
64   PetscInt       jmax,lfill,job,*o_i,*o_j;
65   PetscScalar    *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
66   PetscReal      af;
67 
68   PetscFunctionBegin;
69 
70   if (info->dt == PETSC_DEFAULT)      info->dt      = .005;
71   if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax);
72   if (info->dtcol == PETSC_DEFAULT)   info->dtcol   = .01;
73   if (info->fill == PETSC_DEFAULT)    info->fill    = ((double)(n*(info->dtcount+1)))/a->nz;
74   lfill   = (PetscInt)(info->dtcount/2.0);
75   jmax    = (PetscInt)(info->fill*a->nz);
76 
77 
78   /* ------------------------------------------------------------
79      If reorder=.TRUE., then the original matrix has to be
80      reordered to reflect the user selected ordering scheme, and
81      then de-reordered so it is in it's original format.
82      Because Saad's dperm() is NOT in place, we have to copy
83      the original matrix and allocate more storage. . .
84      ------------------------------------------------------------
85   */
86 
87   /* set reorder to true if either isrow or iscol is not identity */
88   ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr);
89   if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);}
90   reorder = PetscNot(reorder);
91 
92 
93   /* storage for ilu factor */
94   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr);
95   ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr);
96   ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr);
97   ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr);
98 
99   /* ------------------------------------------------------------
100      Make sure that everything is Fortran formatted (1-Based)
101      ------------------------------------------------------------
102   */
103   for (i=old_i[0];i<old_i[n];i++) {
104     old_j[i]++;
105   }
106   for(i=0;i<n+1;i++) {
107     old_i[i]++;
108   };
109 
110 
111   if (reorder) {
112     ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr);
113     ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
114     for(i=0;i<n;i++) {
115       r[i]  = r[i]+1;
116       c[i]  = c[i]+1;
117     }
118     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr);
119     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr);
120     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr);
121     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
122     for (i=0;i<n;i++) {
123       r[i]  = r[i]-1;
124       c[i]  = c[i]-1;
125     }
126     ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr);
127     ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
128     o_a = old_a2;
129     o_j = old_j2;
130     o_i = old_i2;
131   } else {
132     o_a = old_a;
133     o_j = old_j;
134     o_i = old_i;
135   }
136 
137   /* ------------------------------------------------------------
138      Call Saad's ilutp() routine to generate the factorization
139      ------------------------------------------------------------
140   */
141 
142   ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr);
143   ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr);
144   ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr);
145 
146   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
147   if (sierr) {
148     switch (sierr) {
149       case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
150       case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
151       case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
152       case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
153       case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax);
154       default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
155     }
156   }
157 
158   ierr = PetscFree(w);CHKERRQ(ierr);
159   ierr = PetscFree(jw);CHKERRQ(ierr);
160 
161   /* ------------------------------------------------------------
162      Saad's routine gives the result in Modified Sparse Row (msr)
163      Convert to Compressed Sparse Row format (csr)
164      ------------------------------------------------------------
165   */
166 
167   ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr);
168   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr);
169 
170   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
171 
172   ierr = PetscFree(iwk);CHKERRQ(ierr);
173   ierr = PetscFree(wk);CHKERRQ(ierr);
174 
175   if (reorder) {
176     ierr = PetscFree(old_a2);CHKERRQ(ierr);
177     ierr = PetscFree(old_j2);CHKERRQ(ierr);
178     ierr = PetscFree(old_i2);CHKERRQ(ierr);
179   } else {
180     /* fix permutation of old_j that the factorization introduced */
181     for (i=old_i[0]; i<old_i[n]; i++) {
182       old_j[i-1] = iperm[old_j[i-1]-1];
183     }
184   }
185 
186   /* get rid of the shift to indices starting at 1 */
187   for (i=0; i<n+1; i++) {
188     old_i[i]--;
189   }
190   for (i=old_i[0];i<old_i[n];i++) {
191     old_j[i]--;
192   }
193 
194   /* Make the factored matrix 0-based */
195   for (i=0; i<n+1; i++) {
196     new_i[i]--;
197   }
198   for (i=new_i[0];i<new_i[n];i++) {
199     new_j[i]--;
200   }
201 
202   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
203   /*-- permute the right-hand-side and solution vectors           --*/
204   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
205   ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr);
206   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
207   for(i=0; i<n; i++) {
208     ordcol[i] = ic[iperm[i]-1];
209   };
210   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
211   ierr = ISDestroy(isicol);CHKERRQ(ierr);
212 
213   ierr = PetscFree(iperm);CHKERRQ(ierr);
214 
215   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr);
216   ierr = PetscFree(ordcol);CHKERRQ(ierr);
217 
218   /*----- put together the new matrix -----*/
219 
220   ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr);
221   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
222   ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr);
223   (*fact)->factor    = FACTOR_LU;
224   (*fact)->assembled = PETSC_TRUE;
225 
226   b = (Mat_SeqAIJ*)(*fact)->data;
227   ierr = PetscFree(b->imax);CHKERRQ(ierr);
228   b->sorted        = PETSC_FALSE;
229   b->singlemalloc  = PETSC_FALSE;
230   /* the next line frees the default space generated by the MatCreate() */
231   ierr             = PetscFree(b->a);CHKERRQ(ierr);
232   ierr             = PetscFree(b->ilen);CHKERRQ(ierr);
233   b->a             = new_a;
234   b->j             = new_j;
235   b->i             = new_i;
236   b->ilen          = 0;
237   b->imax          = 0;
238   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
239   b->row           = isirow;
240   b->col           = iscolf;
241   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
242   b->maxnz = b->nz = new_i[n];
243   ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
244   (*fact)->info.factor_mallocs = 0;
245 
246   ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
247 
248   /* check out for identical nodes. If found, use inode functions */
249   ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr);
250 
251   af = ((double)b->nz)/((double)a->nz) + .001;
252   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af);
253   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af);
254   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af);
255   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n");
256 
257   PetscFunctionReturn(0);
258 #endif
259 }
260 
261 /*
262     Factorization code for AIJ format.
263 */
264 #undef __FUNCT__
265 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
266 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
267 {
268   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
269   IS             isicol;
270   PetscErrorCode ierr;
271   PetscInt       *r,*ic,i,n = A->m,*ai = a->i,*aj = a->j;
272   PetscInt       *ainew,*ajnew,jmax,*fill,*ajtmp,nz;
273   PetscInt       *idnew,idx,row,m,fm,nnz,nzi,reallocs = 0,nzbd,*im;
274   PetscReal      f;
275 
276   PetscFunctionBegin;
277   if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
278   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
279   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
280   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
281 
282   /* get new row pointers */
283   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr);
284   ainew[0] = 0;
285   /* don't know how many column pointers are needed so estimate */
286   f = info->fill;
287   jmax  = (PetscInt)(f*ai[n]+1);
288   ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr);
289   /* fill is a linked list of nonzeros in active row */
290   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr);
291   im   = fill + n + 1;
292   /* idnew is location of diagonal in factor */
293   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&idnew);CHKERRQ(ierr);
294   idnew[0] = 0;
295 
296   for (i=0; i<n; i++) {
297     /* first copy previous fill into linked list */
298     nnz     = nz    = ai[r[i]+1] - ai[r[i]];
299     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
300     ajtmp   = aj + ai[r[i]];
301     fill[n] = n;
302     while (nz--) {
303       fm  = n;
304       idx = ic[*ajtmp++];
305       do {
306         m  = fm;
307         fm = fill[m];
308       } while (fm < idx);
309       fill[m]   = idx;
310       fill[idx] = fm;
311     }
312     row = fill[n];
313     while (row < i) {
314       ajtmp = ajnew + idnew[row] + 1;
315       nzbd  = 1 + idnew[row] - ainew[row];
316       nz    = im[row] - nzbd;
317       fm    = row;
318       while (nz-- > 0) {
319         idx = *ajtmp++ ;
320         nzbd++;
321         if (idx == i) im[row] = nzbd;
322         do {
323           m  = fm;
324           fm = fill[m];
325         } while (fm < idx);
326         if (fm != idx) {
327           fill[m]   = idx;
328           fill[idx] = fm;
329           fm        = idx;
330           nnz++;
331         }
332       }
333       row = fill[row];
334     }
335     /* copy new filled row into permanent storage */
336     ainew[i+1] = ainew[i] + nnz;
337     if (ainew[i+1] > jmax) {
338 
339       /* estimate how much additional space we will need */
340       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
341       /* just double the memory each time */
342       PetscInt maxadd = jmax;
343       /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */
344       if (maxadd < nnz) maxadd = (n-i)*(nnz+1);
345       jmax += maxadd;
346 
347       /* allocate a longer ajnew */
348       ierr  = PetscMalloc(jmax*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
349       ierr  = PetscMemcpy(ajtmp,ajnew,(ainew[i])*sizeof(PetscInt));CHKERRQ(ierr);
350       ierr  = PetscFree(ajnew);CHKERRQ(ierr);
351       ajnew = ajtmp;
352       reallocs++; /* count how many times we realloc */
353     }
354     ajtmp = ajnew + ainew[i];
355     fm    = fill[n];
356     nzi   = 0;
357     im[i] = nnz;
358     while (nnz--) {
359       if (fm < i) nzi++;
360       *ajtmp++ = fm ;
361       fm       = fill[fm];
362     }
363     idnew[i] = ainew[i] + nzi;
364   }
365   if (ai[n] != 0) {
366     PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
367     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
368     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af);
369     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af);
370     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n");
371   } else {
372     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n");
373   }
374 
375   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
376   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
377 
378   ierr = PetscFree(fill);CHKERRQ(ierr);
379 
380   /* put together the new matrix */
381   ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr);
382   ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr);
383   ierr = MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);CHKERRQ(ierr);
384   PetscLogObjectParent(*B,isicol);
385   b    = (Mat_SeqAIJ*)(*B)->data;
386   ierr = PetscFree(b->imax);CHKERRQ(ierr);
387   b->singlemalloc = PETSC_FALSE;
388   /* the next line frees the default space generated by the Create() */
389   ierr = PetscFree(b->a);CHKERRQ(ierr);
390   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
391   ierr          = PetscMalloc((ainew[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
392   b->j          = ajnew;
393   b->i          = ainew;
394   b->diag       = idnew;
395   b->ilen       = 0;
396   b->imax       = 0;
397   b->row        = isrow;
398   b->col        = iscol;
399   b->lu_damping        = info->damping;
400   b->lu_zeropivot      = info->zeropivot;
401   b->lu_shift          = info->shift;
402   b->lu_shift_fraction = info->shift_fraction;
403   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
404   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
405   b->icol       = isicol;
406   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
407   /* In b structure:  Free imax, ilen, old a, old j.
408      Allocate idnew, solve_work, new a, new j */
409   PetscLogObjectMemory(*B,(ainew[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));
410   b->maxnz = b->nz = ainew[n] ;
411 
412   (*B)->factor                 =  FACTOR_LU;
413   (*B)->info.factor_mallocs    = reallocs;
414   (*B)->info.fill_ratio_given  = f;
415   ierr = Mat_AIJ_CheckInode(*B,PETSC_FALSE);CHKERRQ(ierr);
416   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
417 
418   if (ai[n] != 0) {
419     (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
420   } else {
421     (*B)->info.fill_ratio_needed = 0.0;
422   }
423   PetscFunctionReturn(0);
424 }
425 /* ----------------------------------------------------------- */
426 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth);
427 
428 #undef __FUNCT__
429 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
430 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B)
431 {
432   Mat            C=*B;
433   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
434   IS             isrow = b->row,isicol = b->icol;
435   PetscErrorCode ierr;
436   PetscInt       *r,*ic,i,j,n=A->m,*ai=b->i,*aj=b->j;
437   PetscInt       *ajtmpold,*ajtmp,nz,row,*ics;
438   PetscInt       *diag_offset = b->diag,diag,*pj,nshift=0;
439   PetscScalar    *rtmp,*v,*pc,multiplier,*pv,*rtmps;
440   PetscReal      zeropivot=b->lu_zeropivot,rs,d;
441   PetscReal      row_shift,shift_fraction,shift_amount,shift_lo=0.,shift_hi=1.,shift_top=0.;
442   PetscTruth     lushift;
443   PetscReal      shift_pd=b->lu_shift,shift_nonzero=b->lu_damping;
444 
445   PetscFunctionBegin;
446   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
447   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
448   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
449   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
450   rtmps = rtmp; ics = ic;
451 
452   if (!a->diag) {
453     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
454   }
455   /* if both shift schemes are chosen by user, only use shift_pd */
456   if (shift_pd && shift_nonzero) shift_nonzero = 0.0;
457   if (shift_pd) { /* set shift_top=max{row_shift} */
458     PetscInt *aai = a->i,*ddiag = a->diag;
459     shift_top = 0;
460     for (i=0; i<n; i++) {
461       d = PetscAbsScalar((a->a)[ddiag[i]]);
462       /* calculate amt of shift needed for this row */
463       if (d<=0) {
464         row_shift = 0;
465       } else {
466         row_shift = -2*d;
467       }
468       v  = a->a+aai[i];
469       nz = aai[i+1] - aai[i];
470       for (j=0; j<nz; j++)
471 	row_shift += PetscAbsScalar(v[j]);
472       if (row_shift>shift_top) shift_top = row_shift;
473     }
474   }
475 
476   shift_fraction = b->lu_shift_fraction;
477   shift_amount   = 0;
478   do {
479     lushift = PETSC_FALSE;
480     for (i=0; i<n; i++){
481       nz    = ai[i+1] - ai[i];
482       ajtmp = aj + ai[i];
483       for  (j=0; j<nz; j++) rtmps[ajtmp[j]] = 0.0;
484 
485       /* load in initial (unfactored row) */
486       nz       = a->i[r[i]+1] - a->i[r[i]];
487       ajtmpold = a->j + a->i[r[i]];
488       v        = a->a + a->i[r[i]];
489       for (j=0; j<nz; j++) {
490         rtmp[ics[ajtmpold[j]]] = v[j];
491       }
492       rtmp[ics[r[i]]] += shift_amount; /* shift the diagonal of the matrix */
493 
494       row = *ajtmp++;
495       while  (row < i) {
496         pc = rtmp + row;
497         if (*pc != 0.0) {
498           pv         = b->a + diag_offset[row];
499           pj         = b->j + diag_offset[row] + 1;
500           multiplier = *pc / *pv++;
501           *pc        = multiplier;
502           nz         = ai[row+1] - diag_offset[row] - 1;
503           for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
504           PetscLogFlops(2*nz);
505         }
506         row = *ajtmp++;
507       }
508       /* finished row so stick it into b->a */
509       pv   = b->a + ai[i] ;
510       pj   = b->j + ai[i] ;
511       nz   = ai[i+1] - ai[i];
512       diag = diag_offset[i] - ai[i];
513       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
514       rs   = 0.0;
515       for (j=0; j<nz; j++) {
516         pv[j] = rtmps[pj[j]];
517         if (j != diag) rs += PetscAbsScalar(pv[j]);
518       }
519       /* shift the diagonals when zero pivot is detected */
520 #define MAX_NSHIFT 5
521       if (PetscAbsScalar(pv[diag]) <= zeropivot*rs && shift_nonzero){
522         /* force |diag(*B)| > zeropivot*rs */
523         if (!nshift){
524           shift_amount = shift_nonzero;
525         } else {
526           shift_amount *= 2.0;
527         }
528         lushift = PETSC_TRUE;
529         nshift++;
530         break;
531       } else if (PetscRealPart(pv[diag]) <= zeropivot*rs && shift_pd){
532         /* force *B to be diagonally dominant */
533 	if (nshift>MAX_NSHIFT) {
534 	  SETERRQ(PETSC_ERR_CONV_FAILED,"Unable to determine shift to enforce positive definite preconditioner");
535 	} else if (nshift==MAX_NSHIFT) {
536 	  shift_fraction = shift_hi;
537 	  lushift        = PETSC_FALSE;
538 	} else {
539 	  shift_lo = shift_fraction; shift_fraction = (shift_hi+shift_lo)/2.;
540 	  lushift  = PETSC_TRUE;
541 	}
542 	shift_amount = shift_fraction * shift_top;
543         nshift++;
544         break;
545       } else if (PetscAbsScalar(pv[diag]) <= zeropivot*rs){
546         SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(pv[diag]),zeropivot,rs);
547       }
548     }
549     if (shift_pd && !lushift && shift_fraction>0 && nshift<MAX_NSHIFT) {
550       /*
551        * if no shift in this attempt & shifting & started shifting & can refine,
552        * then try lower shift
553        */
554       shift_hi       = shift_fraction;
555       shift_fraction = (shift_hi+shift_lo)/2.;
556       shift_amount   = shift_fraction * shift_top;
557       lushift        = PETSC_TRUE;
558       nshift++;
559     }
560   } while (lushift);
561 
562   /* invert diagonal entries for simplier triangular solves */
563   for (i=0; i<n; i++) {
564     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
565   }
566 
567   ierr = PetscFree(rtmp);CHKERRQ(ierr);
568   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
569   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
570   C->factor = FACTOR_LU;
571   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
572   C->assembled = PETSC_TRUE;
573   PetscLogFlops(C->n);
574   if (nshift){
575     if (shift_nonzero) {
576       PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of shift_nonzero tries %D, shift_ amount%g\n",nshift,shift_amount);
577     } else if (shift_pd) {
578       b->lu_shift_fraction = shift_fraction;
579       PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: diagonal shifted up by %e fraction top_value %e number shifts %D\n",shift_fraction,shift_top,nshift);
580     }
581   }
582   PetscFunctionReturn(0);
583 }
584 
585 #undef __FUNCT__
586 #define __FUNCT__ "MatUsePETSc_SeqAIJ"
587 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A)
588 {
589   PetscFunctionBegin;
590   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
591   A->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
592   PetscFunctionReturn(0);
593 }
594 
595 
596 /* ----------------------------------------------------------- */
597 #undef __FUNCT__
598 #define __FUNCT__ "MatLUFactor_SeqAIJ"
599 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
600 {
601   PetscErrorCode ierr;
602   Mat            C;
603 
604   PetscFunctionBegin;
605   ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr);
606   ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr);
607   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
608   PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);
609   PetscFunctionReturn(0);
610 }
611 /* ----------------------------------------------------------- */
612 #undef __FUNCT__
613 #define __FUNCT__ "MatSolve_SeqAIJ"
614 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
615 {
616   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
617   IS             iscol = a->col,isrow = a->row;
618   PetscErrorCode ierr;
619   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
620   PetscInt       nz,*rout,*cout;
621   PetscScalar    *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
622 
623   PetscFunctionBegin;
624   if (!n) PetscFunctionReturn(0);
625 
626   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
627   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
628   tmp  = a->solve_work;
629 
630   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
631   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
632 
633   /* forward solve the lower triangular */
634   tmp[0] = b[*r++];
635   tmps   = tmp;
636   for (i=1; i<n; i++) {
637     v   = aa + ai[i] ;
638     vi  = aj + ai[i] ;
639     nz  = a->diag[i] - ai[i];
640     sum = b[*r++];
641     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
642     tmp[i] = sum;
643   }
644 
645   /* backward solve the upper triangular */
646   for (i=n-1; i>=0; i--){
647     v   = aa + a->diag[i] + 1;
648     vi  = aj + a->diag[i] + 1;
649     nz  = ai[i+1] - a->diag[i] - 1;
650     sum = tmp[i];
651     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
652     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
653   }
654 
655   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
656   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
657   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
658   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
659   PetscLogFlops(2*a->nz - A->n);
660   PetscFunctionReturn(0);
661 }
662 
663 /* ----------------------------------------------------------- */
664 #undef __FUNCT__
665 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
666 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
667 {
668   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
669   PetscErrorCode ierr;
670   PetscInt       n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag;
671   PetscScalar    *x,*b,*aa = a->a;
672 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
673   PetscInt       adiag_i,i,*vi,nz,ai_i;
674   PetscScalar    *v,sum;
675 #endif
676 
677   PetscFunctionBegin;
678   if (!n) PetscFunctionReturn(0);
679 
680   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
681   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
682 
683 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
684   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
685 #else
686   /* forward solve the lower triangular */
687   x[0] = b[0];
688   for (i=1; i<n; i++) {
689     ai_i = ai[i];
690     v    = aa + ai_i;
691     vi   = aj + ai_i;
692     nz   = adiag[i] - ai_i;
693     sum  = b[i];
694     while (nz--) sum -= *v++ * x[*vi++];
695     x[i] = sum;
696   }
697 
698   /* backward solve the upper triangular */
699   for (i=n-1; i>=0; i--){
700     adiag_i = adiag[i];
701     v       = aa + adiag_i + 1;
702     vi      = aj + adiag_i + 1;
703     nz      = ai[i+1] - adiag_i - 1;
704     sum     = x[i];
705     while (nz--) sum -= *v++ * x[*vi++];
706     x[i]    = sum*aa[adiag_i];
707   }
708 #endif
709   PetscLogFlops(2*a->nz - A->n);
710   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
711   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
712   PetscFunctionReturn(0);
713 }
714 
715 #undef __FUNCT__
716 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
717 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
718 {
719   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
720   IS             iscol = a->col,isrow = a->row;
721   PetscErrorCode ierr;
722   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
723   PetscInt       nz,*rout,*cout;
724   PetscScalar    *x,*b,*tmp,*aa = a->a,sum,*v;
725 
726   PetscFunctionBegin;
727   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
728 
729   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
730   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
731   tmp  = a->solve_work;
732 
733   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
734   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
735 
736   /* forward solve the lower triangular */
737   tmp[0] = b[*r++];
738   for (i=1; i<n; i++) {
739     v   = aa + ai[i] ;
740     vi  = aj + ai[i] ;
741     nz  = a->diag[i] - ai[i];
742     sum = b[*r++];
743     while (nz--) sum -= *v++ * tmp[*vi++ ];
744     tmp[i] = sum;
745   }
746 
747   /* backward solve the upper triangular */
748   for (i=n-1; i>=0; i--){
749     v   = aa + a->diag[i] + 1;
750     vi  = aj + a->diag[i] + 1;
751     nz  = ai[i+1] - a->diag[i] - 1;
752     sum = tmp[i];
753     while (nz--) sum -= *v++ * tmp[*vi++ ];
754     tmp[i] = sum*aa[a->diag[i]];
755     x[*c--] += tmp[i];
756   }
757 
758   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
759   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
760   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
761   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
762   PetscLogFlops(2*a->nz);
763 
764   PetscFunctionReturn(0);
765 }
766 /* -------------------------------------------------------------------*/
767 #undef __FUNCT__
768 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
769 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
770 {
771   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
772   IS             iscol = a->col,isrow = a->row;
773   PetscErrorCode ierr;
774   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
775   PetscInt       nz,*rout,*cout,*diag = a->diag;
776   PetscScalar    *x,*b,*tmp,*aa = a->a,*v,s1;
777 
778   PetscFunctionBegin;
779   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
780   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
781   tmp  = a->solve_work;
782 
783   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
784   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
785 
786   /* copy the b into temp work space according to permutation */
787   for (i=0; i<n; i++) tmp[i] = b[c[i]];
788 
789   /* forward solve the U^T */
790   for (i=0; i<n; i++) {
791     v   = aa + diag[i] ;
792     vi  = aj + diag[i] + 1;
793     nz  = ai[i+1] - diag[i] - 1;
794     s1  = tmp[i];
795     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
796     while (nz--) {
797       tmp[*vi++ ] -= (*v++)*s1;
798     }
799     tmp[i] = s1;
800   }
801 
802   /* backward solve the L^T */
803   for (i=n-1; i>=0; i--){
804     v   = aa + diag[i] - 1 ;
805     vi  = aj + diag[i] - 1 ;
806     nz  = diag[i] - ai[i];
807     s1  = tmp[i];
808     while (nz--) {
809       tmp[*vi-- ] -= (*v--)*s1;
810     }
811   }
812 
813   /* copy tmp into x according to permutation */
814   for (i=0; i<n; i++) x[r[i]] = tmp[i];
815 
816   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
817   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
818   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
819   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
820 
821   PetscLogFlops(2*a->nz-A->n);
822   PetscFunctionReturn(0);
823 }
824 
825 #undef __FUNCT__
826 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
827 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
828 {
829   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
830   IS             iscol = a->col,isrow = a->row;
831   PetscErrorCode ierr;
832   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
833   PetscInt       nz,*rout,*cout,*diag = a->diag;
834   PetscScalar    *x,*b,*tmp,*aa = a->a,*v;
835 
836   PetscFunctionBegin;
837   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
838 
839   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
840   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
841   tmp = a->solve_work;
842 
843   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
844   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
845 
846   /* copy the b into temp work space according to permutation */
847   for (i=0; i<n; i++) tmp[i] = b[c[i]];
848 
849   /* forward solve the U^T */
850   for (i=0; i<n; i++) {
851     v   = aa + diag[i] ;
852     vi  = aj + diag[i] + 1;
853     nz  = ai[i+1] - diag[i] - 1;
854     tmp[i] *= *v++;
855     while (nz--) {
856       tmp[*vi++ ] -= (*v++)*tmp[i];
857     }
858   }
859 
860   /* backward solve the L^T */
861   for (i=n-1; i>=0; i--){
862     v   = aa + diag[i] - 1 ;
863     vi  = aj + diag[i] - 1 ;
864     nz  = diag[i] - ai[i];
865     while (nz--) {
866       tmp[*vi-- ] -= (*v--)*tmp[i];
867     }
868   }
869 
870   /* copy tmp into x according to permutation */
871   for (i=0; i<n; i++) x[r[i]] += tmp[i];
872 
873   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
874   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
875   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
876   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
877 
878   PetscLogFlops(2*a->nz);
879   PetscFunctionReturn(0);
880 }
881 /* ----------------------------------------------------------------*/
882 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat);
883 
884 #undef __FUNCT__
885 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
886 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
887 {
888   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
889   IS             isicol;
890   PetscErrorCode ierr;
891   PetscInt       *r,*ic,prow,n = A->m,*ai = a->i,*aj = a->j;
892   PetscInt       *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev;
893   PetscInt       *dloc,idx,row,m,fm,nzf,nzi,len, reallocs = 0,dcount = 0;
894   PetscInt       incrlev,nnz,i,levels,diagonal_fill;
895   PetscTruth     col_identity,row_identity;
896   PetscReal      f;
897 
898   PetscFunctionBegin;
899   f             = info->fill;
900   levels        = (PetscInt)info->levels;
901   diagonal_fill = (PetscInt)info->diagonal_fill;
902   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
903 
904   /* special case that simply copies fill pattern */
905   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
906   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
907   if (!levels && row_identity && col_identity) {
908     ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
909     (*fact)->factor = FACTOR_LU;
910     b               = (Mat_SeqAIJ*)(*fact)->data;
911     if (!b->diag) {
912       ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
913     }
914     ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
915     b->row              = isrow;
916     b->col              = iscol;
917     b->icol             = isicol;
918     b->lu_damping       = info->damping;
919     b->lu_zeropivot     = info->zeropivot;
920     b->lu_shift         = info->shift;
921     b->lu_shift_fraction= info->shift_fraction;
922     ierr                = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
923     (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
924     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
925     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
926     PetscFunctionReturn(0);
927   }
928 
929   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
930   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
931 
932   /* get new row pointers */
933   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr);
934   ainew[0] = 0;
935   /* don't know how many column pointers are needed so estimate */
936   jmax = (PetscInt)(f*(ai[n]+1));
937   ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr);
938   /* ajfill is level of fill for each fill entry */
939   ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajfill);CHKERRQ(ierr);
940   /* fill is a linked list of nonzeros in active row */
941   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr);
942   /* im is level for each filled value */
943   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&im);CHKERRQ(ierr);
944   /* dloc is location of diagonal in factor */
945   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&dloc);CHKERRQ(ierr);
946   dloc[0]  = 0;
947   for (prow=0; prow<n; prow++) {
948 
949     /* copy current row into linked list */
950     nzf     = nz  = ai[r[prow]+1] - ai[r[prow]];
951     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
952     xi      = aj + ai[r[prow]];
953     fill[n] = n;
954     fill[prow] = -1; /* marker to indicate if diagonal exists */
955     while (nz--) {
956       fm  = n;
957       idx = ic[*xi++ ];
958       do {
959         m  = fm;
960         fm = fill[m];
961       } while (fm < idx);
962       fill[m]   = idx;
963       fill[idx] = fm;
964       im[idx]   = 0;
965     }
966 
967     /* make sure diagonal entry is included */
968     if (diagonal_fill && fill[prow] == -1) {
969       fm = n;
970       while (fill[fm] < prow) fm = fill[fm];
971       fill[prow] = fill[fm]; /* insert diagonal into linked list */
972       fill[fm]   = prow;
973       im[prow]   = 0;
974       nzf++;
975       dcount++;
976     }
977 
978     nzi = 0;
979     row = fill[n];
980     while (row < prow) {
981       incrlev = im[row] + 1;
982       nz      = dloc[row];
983       xi      = ajnew  + ainew[row]  + nz + 1;
984       flev    = ajfill + ainew[row]  + nz + 1;
985       nnz     = ainew[row+1] - ainew[row] - nz - 1;
986       fm      = row;
987       while (nnz-- > 0) {
988         idx = *xi++ ;
989         if (*flev + incrlev > levels) {
990           flev++;
991           continue;
992         }
993         do {
994           m  = fm;
995           fm = fill[m];
996         } while (fm < idx);
997         if (fm != idx) {
998           im[idx]   = *flev + incrlev;
999           fill[m]   = idx;
1000           fill[idx] = fm;
1001           fm        = idx;
1002           nzf++;
1003         } else {
1004           if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev;
1005         }
1006         flev++;
1007       }
1008       row = fill[row];
1009       nzi++;
1010     }
1011     /* copy new filled row into permanent storage */
1012     ainew[prow+1] = ainew[prow] + nzf;
1013     if (ainew[prow+1] > jmax) {
1014 
1015       /* estimate how much additional space we will need */
1016       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
1017       /* just double the memory each time */
1018       /*  maxadd = (PetscInt)((f*(ai[n]+!shift)*(n-prow+5))/n); */
1019       PetscInt maxadd = jmax;
1020       if (maxadd < nzf) maxadd = (n-prow)*(nzf+1);
1021       jmax += maxadd;
1022 
1023       /* allocate a longer ajnew and ajfill */
1024       ierr   = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr);
1025       ierr   = PetscMemcpy(xi,ajnew,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr);
1026       ierr   = PetscFree(ajnew);CHKERRQ(ierr);
1027       ajnew  = xi;
1028       ierr   = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr);
1029       ierr   = PetscMemcpy(xi,ajfill,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr);
1030       ierr   = PetscFree(ajfill);CHKERRQ(ierr);
1031       ajfill = xi;
1032       reallocs++; /* count how many times we realloc */
1033     }
1034     xi          = ajnew + ainew[prow] ;
1035     flev        = ajfill + ainew[prow] ;
1036     dloc[prow]  = nzi;
1037     fm          = fill[n];
1038     while (nzf--) {
1039       *xi++   = fm ;
1040       *flev++ = im[fm];
1041       fm      = fill[fm];
1042     }
1043     /* make sure row has diagonal entry */
1044     if (ajnew[ainew[prow]+dloc[prow]] != prow) {
1045       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
1046     try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow);
1047     }
1048   }
1049   ierr = PetscFree(ajfill);CHKERRQ(ierr);
1050   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1051   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1052   ierr = PetscFree(fill);CHKERRQ(ierr);
1053   ierr = PetscFree(im);CHKERRQ(ierr);
1054 
1055   {
1056     PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
1057     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
1058     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af);
1059     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af);
1060     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n");
1061     if (diagonal_fill) {
1062       PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount);
1063     }
1064   }
1065 
1066   /* put together the new matrix */
1067   ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr);
1068   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
1069   ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr);
1070   PetscLogObjectParent(*fact,isicol);
1071   b = (Mat_SeqAIJ*)(*fact)->data;
1072   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1073   b->singlemalloc = PETSC_FALSE;
1074   len = (ainew[n] )*sizeof(PetscScalar);
1075   /* the next line frees the default space generated by the Create() */
1076   ierr = PetscFree(b->a);CHKERRQ(ierr);
1077   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1078   ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr);
1079   b->j          = ajnew;
1080   b->i          = ainew;
1081   for (i=0; i<n; i++) dloc[i] += ainew[i];
1082   b->diag       = dloc;
1083   b->ilen       = 0;
1084   b->imax       = 0;
1085   b->row        = isrow;
1086   b->col        = iscol;
1087   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1088   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1089   b->icol       = isicol;
1090   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1091   /* In b structure:  Free imax, ilen, old a, old j.
1092      Allocate dloc, solve_work, new a, new j */
1093   PetscLogObjectMemory(*fact,(ainew[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));
1094   b->maxnz             = b->nz = ainew[n] ;
1095   b->lu_damping        = info->damping;
1096   b->lu_shift          = info->shift;
1097   b->lu_shift_fraction = info->shift_fraction;
1098   b->lu_zeropivot = info->zeropivot;
1099   (*fact)->factor = FACTOR_LU;
1100   ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr);
1101   (*fact)->ops->lufactornumeric =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
1102 
1103   (*fact)->info.factor_mallocs    = reallocs;
1104   (*fact)->info.fill_ratio_given  = f;
1105   (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]);
1106   PetscFunctionReturn(0);
1107 }
1108 
1109 #include "src/mat/impls/sbaij/seq/sbaij.h"
1110 #undef __FUNCT__
1111 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
1112 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,Mat *B)
1113 {
1114   Mat            C = *B;
1115   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1116   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
1117   IS             ip=b->row;
1118   PetscErrorCode ierr;
1119   PetscInt       *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol;
1120   PetscInt       *ai=a->i,*aj=a->j;
1121   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1122   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1123   PetscReal      damping=b->factor_damping,zeropivot=b->factor_zeropivot,shift_amount;
1124   PetscTruth     damp,chshift;
1125   PetscInt       nshift=0,ndamp=0;
1126 
1127   PetscFunctionBegin;
1128   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
1129 
1130   /* initialization */
1131   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1132   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
1133   jl   = il + mbs;
1134   rtmp = (MatScalar*)(jl + mbs);
1135 
1136   shift_amount = 0;
1137   do {
1138     damp = PETSC_FALSE;
1139     chshift = PETSC_FALSE;
1140 
1141     for (i=0; i<mbs; i++) {
1142       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1143     }
1144 
1145     for (k = 0; k<mbs; k++){
1146       bval = ba + bi[k];
1147       /* initialize k-th row by the perm[k]-th row of A */
1148       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1149       for (j = jmin; j < jmax; j++){
1150         col = rip[aj[j]];
1151         if (col >= k){ /* only take upper triangular entry */
1152           rtmp[col] = aa[j];
1153           *bval++  = 0.0; /* for in-place factorization */
1154         }
1155       }
1156       /* damp the diagonal of the matrix */
1157       if (ndamp||nshift) rtmp[k] += damping+shift_amount;
1158 
1159       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1160       dk = rtmp[k];
1161       i = jl[k]; /* first row to be added to k_th row  */
1162 
1163       while (i < k){
1164         nexti = jl[i]; /* next row to be added to k_th row */
1165 
1166         /* compute multiplier, update diag(k) and U(i,k) */
1167         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1168         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
1169         dk += uikdi*ba[ili];
1170         ba[ili] = uikdi; /* -U(i,k) */
1171 
1172         /* add multiple of row i to k-th row */
1173         jmin = ili + 1; jmax = bi[i+1];
1174         if (jmin < jmax){
1175           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1176           /* update il and jl for row i */
1177           il[i] = jmin;
1178           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1179         }
1180         i = nexti;
1181       }
1182 
1183       if (PetscRealPart(dk) < zeropivot && b->factor_shift){
1184 	/* calculate a shift that would make this row diagonally dominant */
1185 	PetscReal rs = PetscAbs(PetscRealPart(dk));
1186 	jmin      = bi[k]+1;
1187 	nz        = bi[k+1] - jmin;
1188 	if (nz){
1189 	  bcol = bj + jmin;
1190 	  bval = ba + jmin;
1191 	  while (nz--){
1192 	    rs += PetscAbsScalar(rtmp[*bcol++]);
1193 	  }
1194 	}
1195 	/* if this shift is less than the previous, just up the previous
1196 	   one by a bit */
1197 	shift_amount = PetscMax(rs,1.1*shift_amount);
1198 	chshift  = PETSC_TRUE;
1199 	/* Unlike in the ILU case there is no exit condition on nshift:
1200 	   we increase the shift until it converges. There is no guarantee that
1201 	   this algorithm converges faster or slower, or is better or worse
1202 	   than the ILU algorithm. */
1203 	nshift++;
1204 	break;
1205       }
1206       if (PetscRealPart(dk) < zeropivot){
1207         if (damping == (PetscReal) PETSC_DECIDE) damping = -PetscRealPart(dk)/(k+1);
1208         if (damping > 0.0) {
1209           if (ndamp) damping *= 2.0;
1210           damp = PETSC_TRUE;
1211           ndamp++;
1212           break;
1213         } else if (PetscAbsScalar(dk) < zeropivot){
1214           SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g",k,PetscRealPart(dk),zeropivot);
1215         } else {
1216           PetscLogInfo((PetscObject)A,"Negative pivot %g in row %D of Cholesky factorization\n",PetscRealPart(dk),k);
1217         }
1218       }
1219 
1220       /* copy data into U(k,:) */
1221       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1222       jmin = bi[k]+1; jmax = bi[k+1];
1223       if (jmin < jmax) {
1224         for (j=jmin; j<jmax; j++){
1225           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1226         }
1227         /* add the k-th row into il and jl */
1228         il[k] = jmin;
1229         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1230       }
1231     }
1232   } while (damp||chshift);
1233   ierr = PetscFree(il);CHKERRQ(ierr);
1234 
1235   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
1236   C->factor       = FACTOR_CHOLESKY;
1237   C->assembled    = PETSC_TRUE;
1238   C->preallocated = PETSC_TRUE;
1239   PetscLogFlops(C->m);
1240   if (ndamp) {
1241     PetscLogInfo(0,"MatCholeskyFactorNumerical_SeqAIJ: number of damping tries %D damping value %g\n",ndamp,damping);
1242   }
1243   if (nshift) {
1244     PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ diagonal shifted %D shifts\n",nshift);
1245   }
1246   PetscFunctionReturn(0);
1247 }
1248 #undef __FUNCT__
1249 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering"
1250 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering(Mat A,Mat *fact)
1251 {
1252   Mat            C=*fact;
1253   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1254   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
1255   PetscErrorCode ierr;
1256   PetscInt       i,j,am=A->m;
1257   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
1258   PetscInt       k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz;
1259   MatScalar      *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval;
1260   PetscReal      zeropivot=b->factor_zeropivot,shift_amount,rs;
1261   PetscTruth     chshift;
1262   PetscInt       nshift=0;
1263   PetscReal      shift_pd=b->factor_shift,shift_nonzero=b->factor_damping;
1264 
1265   PetscFunctionBegin;
1266   /* initialization */
1267   nz   = (2*am+1)*sizeof(PetscInt)+am*sizeof(MatScalar);
1268   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
1269   jl   = il + am;
1270   rtmp = (MatScalar*)(jl + am);
1271 
1272   shift_amount = 0;
1273   do {
1274     chshift = PETSC_FALSE;
1275     for (i=0; i<am; i++) {
1276       rtmp[i] = 0.0; jl[i] = am; il[0] = 0;
1277     }
1278 
1279     for (k = 0; k<am; k++){
1280     /* initialize k-th row with elements nonzero in row perm(k) of A */
1281       nz   = ai[k+1] - ai[k];
1282       acol = aj + ai[k];
1283       aval = aa + ai[k];
1284       bval = ba + bi[k];
1285       while (nz -- ){
1286         if (*acol < k) { /* skip lower triangular entries */
1287           acol++; aval++;
1288         } else {
1289           rtmp[*acol++] = *aval++;
1290           *bval++       = 0.0; /* for in-place factorization */
1291         }
1292       }
1293       /* shift the diagonal of the matrix */
1294       if (nshift) rtmp[k] += shift_amount;
1295 
1296       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1297       dk = rtmp[k];
1298       i  = jl[k]; /* first row to be added to k_th row  */
1299 
1300       while (i < k){
1301         nexti = jl[i]; /* next row to be added to k_th row */
1302         /* compute multiplier, update D(k) and U(i,k) */
1303         ili   = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1304         uikdi = - ba[ili]*ba[bi[i]];
1305         dk   += uikdi*ba[ili];
1306         ba[ili] = uikdi; /* -U(i,k) */
1307 
1308         /* add multiple of row i to k-th row ... */
1309         jmin = ili + 1;
1310         nz   = bi[i+1] - jmin;
1311         if (nz > 0){
1312           bcol = bj + jmin;
1313           bval = ba + jmin;
1314           while (nz --) rtmp[*bcol++] += uikdi*(*bval++);
1315           /* update il and jl for i-th row */
1316           il[i] = jmin;
1317           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1318         }
1319         i = nexti;
1320       }
1321 
1322       /* shift the diagonals when zero pivot is detected */
1323       /* compute rs=sum of abs(off-diagonal) */
1324       rs   = 0.0;
1325       jmin = bi[k]+1;
1326       nz   = bi[k+1] - jmin;
1327       if (nz){
1328         bcol = bj + jmin;
1329         while (nz--){
1330           rs += PetscAbsScalar(rtmp[*bcol++]);
1331         }
1332       }
1333       if (PetscAbsScalar(dk) <= zeropivot*rs && shift_nonzero){
1334         /* force |diag(*B)| > zeropivot*rs */
1335         if (!nshift){
1336           shift_amount = shift_nonzero;
1337         } else {
1338           shift_amount *= 2.0;
1339         }
1340         chshift = PETSC_TRUE;
1341         nshift++;
1342         break;
1343       } else if (PetscRealPart(dk) <= zeropivot*rs && shift_pd){
1344         /* calculate a shift that would make this row diagonally dominant */
1345 	shift_amount = PetscMax(rs+PetscAbs(PetscRealPart(dk)),1.1*shift_amount);
1346 	chshift      = PETSC_TRUE;
1347 	/* Unlike in the ILU case there is no exit condition on nshift:
1348 	   we increase the shift until it converges. There is no guarantee that
1349 	   this algorithm converges faster or slower, or is better or worse
1350 	   than the ILU algorithm. */
1351 	nshift++;
1352 	break;
1353       } else if (PetscAbsScalar(dk) <= zeropivot*rs){
1354         SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs);
1355       }
1356 
1357       /* copy data into U(k,:) */
1358       ba[bi[k]] = 1.0/dk;
1359       jmin      = bi[k]+1;
1360       nz        = bi[k+1] - jmin;
1361       if (nz){
1362         bcol = bj + jmin;
1363         bval = ba + jmin;
1364         while (nz--){
1365           *bval++       = rtmp[*bcol];
1366           rtmp[*bcol++] = 0.0;
1367         }
1368         /* add k-th row into il and jl */
1369         il[k] = jmin;
1370         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1371       }
1372     }
1373   } while (chshift);
1374   ierr = PetscFree(il);CHKERRQ(ierr);
1375 
1376   C->factor       = FACTOR_CHOLESKY;
1377   C->assembled    = PETSC_TRUE;
1378   C->preallocated = PETSC_TRUE;
1379   PetscLogFlops(C->m);
1380   if (nshift){
1381     if (shift_nonzero) {
1382       PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering: number of shift_nonzero tries %D, shift_amount %g\n",nshift,shift_amount);
1383     } else if (shift_pd) {
1384       PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering: number of shift_pd tries %D, shift_amount %g\n",nshift,shift_amount);
1385     }
1386   }
1387   PetscFunctionReturn(0);
1388 }
1389 
1390 #include "petscbt.h"
1391 #include "src/mat/utils/freespace.h"
1392 #undef __FUNCT__
1393 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
1394 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1395 {
1396   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1397   Mat_SeqSBAIJ   *b;
1398   Mat            B;
1399   PetscErrorCode ierr;
1400   PetscTruth     perm_identity;
1401   PetscInt       reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui;
1402   PetscInt       jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1403   PetscInt       nlnk,*lnk,*lnk_lvl,ncols,ncols_upper,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr;
1404   PetscReal      fill=info->fill,levels=info->levels;
1405   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1406   FreeSpaceList  free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1407   PetscBT        lnkbt;
1408 
1409   PetscFunctionBegin;
1410   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1411   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1412 
1413   /* special case that simply copies fill pattern */
1414   if (!levels && perm_identity) {
1415     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
1416     ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1417     for (i=0; i<am; i++) {
1418       ui[i] = ai[i+1] - a->diag[i]; /* ui: rowlengths - changes when !perm_identity */
1419     }
1420     ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr);
1421     B = *fact;
1422     ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1423     ierr = MatSeqSBAIJSetPreallocation(B,1,0,ui);CHKERRQ(ierr);
1424 
1425     b  = (Mat_SeqSBAIJ*)B->data;
1426     uj = b->j;
1427     for (i=0; i<am; i++) {
1428       aj = a->j + a->diag[i];
1429       for (j=0; j<ui[i]; j++){
1430         *uj++ = *aj++;
1431       }
1432       b->ilen[i] = ui[i];
1433     }
1434     ierr = PetscFree(ui);CHKERRQ(ierr);
1435     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1436     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1437 
1438     b->factor_zeropivot      = info->zeropivot;
1439     b->factor_damping        = info->damping;
1440     b->factor_shift          = info->shift;
1441     B->ops->solve                 = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1442     B->ops->solvetranspose        = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1443     ierr = PetscObjectComposeFunction((PetscObject)B,"MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering","dummyname",(FCNVOID)B->ops->choleskyfactornumeric);CHKERRQ(ierr);
1444     B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering;
1445     PetscFunctionReturn(0);
1446   }
1447 
1448   /* initialization */
1449   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1450   ui[0] = 0;
1451   ierr  = PetscMalloc((2*am+1)*sizeof(PetscInt),&cols_lvl);CHKERRQ(ierr);
1452 
1453   /* jl: linked list for storing indices of the pivot rows
1454      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1455   ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr);
1456   il         = jl + am;
1457   uj_ptr     = (PetscInt**)(il + am);
1458   uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1459   for (i=0; i<am; i++){
1460     jl[i] = am; il[i] = 0;
1461   }
1462 
1463   /* create and initialize a linked list for storing column indices of the active row k */
1464   nlnk = am + 1;
1465   ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1466 
1467   /* initial FreeSpace size is fill*(ai[am]+1) */
1468   ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1469   current_space = free_space;
1470   ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
1471   current_space_lvl = free_space_lvl;
1472 
1473   for (k=0; k<am; k++){  /* for each active row k */
1474     /* initialize lnk by the column indices of row rip[k] of A */
1475     nzk   = 0;
1476     ncols = ai[rip[k]+1] - ai[rip[k]];
1477     ncols_upper = 0;
1478     cols        = cols_lvl + am;
1479     for (j=0; j<ncols; j++){
1480       i = rip[*(aj + ai[rip[k]] + j)];
1481       if (i >= k){ /* only take upper triangular entry */
1482         cols[ncols_upper] = i;
1483         cols_lvl[ncols_upper] = -1;  /* initialize level for nonzero entries */
1484         ncols_upper++;
1485       }
1486     }
1487     ierr = PetscIncompleteLLAdd(ncols_upper,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1488     nzk += nlnk;
1489 
1490     /* update lnk by computing fill-in for each pivot row to be merged in */
1491     prow = jl[k]; /* 1st pivot row */
1492 
1493     while (prow < k){
1494       nextprow = jl[prow];
1495 
1496       /* merge prow into k-th row */
1497       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1498       jmax = ui[prow+1];
1499       ncols = jmax-jmin;
1500       i     = jmin - ui[prow];
1501       cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1502       for (j=0; j<ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + i + j);
1503       ierr = PetscIncompleteLLAdd(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1504       nzk += nlnk;
1505 
1506       /* update il and jl for prow */
1507       if (jmin < jmax){
1508         il[prow] = jmin;
1509         j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1510       }
1511       prow = nextprow;
1512     }
1513 
1514     /* if free space is not available, make more free space */
1515     if (current_space->local_remaining<nzk) {
1516       i = am - k + 1; /* num of unfactored rows */
1517       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1518       ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
1519       ierr = GetMoreSpace(i,&current_space_lvl);CHKERRQ(ierr);
1520       reallocs++;
1521     }
1522 
1523     /* copy data into free_space and free_space_lvl, then initialize lnk */
1524     ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1525 
1526     /* add the k-th row into il and jl */
1527     if (nzk-1 > 0){
1528       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1529       jl[k] = jl[i]; jl[i] = k;
1530       il[k] = ui[k] + 1;
1531     }
1532     uj_ptr[k]     = current_space->array;
1533     uj_lvl_ptr[k] = current_space_lvl->array;
1534 
1535     current_space->array           += nzk;
1536     current_space->local_used      += nzk;
1537     current_space->local_remaining -= nzk;
1538 
1539     current_space_lvl->array           += nzk;
1540     current_space_lvl->local_used      += nzk;
1541     current_space_lvl->local_remaining -= nzk;
1542 
1543     ui[k+1] = ui[k] + nzk;
1544   }
1545 
1546   if (ai[am] != 0) {
1547     PetscReal af = ((PetscReal)(2*ui[am]-am))/((PetscReal)ai[am]);
1548     PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af);
1549     PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af);
1550     PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af);
1551   } else {
1552      PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n");
1553   }
1554 
1555   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1556   ierr = PetscFree(jl);CHKERRQ(ierr);
1557   ierr = PetscFree(cols_lvl);CHKERRQ(ierr);
1558 
1559   /* destroy list of free space and other temporary array(s) */
1560   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1561   ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1562   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1563   ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr);
1564 
1565   /* put together the new matrix in MATSEQSBAIJ format */
1566   ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr);
1567   B = *fact;
1568   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1569   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
1570 
1571   b = (Mat_SeqSBAIJ*)B->data;
1572   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1573   b->singlemalloc = PETSC_FALSE;
1574   /* the next line frees the default space generated by the Create() */
1575   ierr = PetscFree(b->a);CHKERRQ(ierr);
1576   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1577   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1578   b->j    = uj;
1579   b->i    = ui;
1580   b->diag = 0;
1581   b->ilen = 0;
1582   b->imax = 0;
1583   b->row  = perm;
1584   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1585   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1586   b->icol = perm;
1587   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1588   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1589   PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
1590   b->maxnz = b->nz = ui[am];
1591 
1592   B->factor                 = FACTOR_CHOLESKY;
1593   B->info.factor_mallocs    = reallocs;
1594   B->info.fill_ratio_given  = fill;
1595   if (ai[am] != 0) {
1596     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1597   } else {
1598     B->info.fill_ratio_needed = 0.0;
1599   }
1600 
1601   b->factor_zeropivot      = info->zeropivot;
1602   b->factor_damping        = info->damping;
1603   b->factor_shift          = info->shift;
1604   if (perm_identity){
1605     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1606     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1607     ierr = PetscObjectComposeFunction((PetscObject)B,"MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering","dummyname",(FCNVOID)B->ops->choleskyfactornumeric);CHKERRQ(ierr);
1608     B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering;
1609   } else {
1610     (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1611   }
1612   PetscFunctionReturn(0);
1613 }
1614 
1615 #undef __FUNCT__
1616 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
1617 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1618 {
1619   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1620   Mat_SeqSBAIJ   *b;
1621   Mat            B;
1622   PetscErrorCode ierr;
1623   PetscTruth     perm_identity;
1624   PetscReal      fill = info->fill;
1625   PetscInt       *rip,*riip,i,mbs=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow;
1626   PetscInt       *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1627   PetscInt       nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1628   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1629   PetscBT        lnkbt;
1630   IS             iperm;
1631 
1632   PetscFunctionBegin;
1633   /* check whether perm is the identity mapping */
1634   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1635   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1636 
1637   if (!perm_identity){
1638     /* check if perm is symmetric! */
1639     ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1640     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1641     for (i=0; i<mbs; i++) {
1642       if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation");
1643     }
1644     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1645     ierr = ISDestroy(iperm);CHKERRQ(ierr);
1646   }
1647 
1648   /* initialization */
1649   ierr  = PetscMalloc((mbs+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1650   ui[0] = 0;
1651 
1652   /* jl: linked list for storing indices of the pivot rows
1653      il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
1654   ierr = PetscMalloc((3*mbs+1)*sizeof(PetscInt)+mbs*sizeof(PetscInt*),&jl);CHKERRQ(ierr);
1655   il     = jl + mbs;
1656   cols   = il + mbs;
1657   ui_ptr = (PetscInt**)(cols + mbs);
1658   for (i=0; i<mbs; i++){
1659     jl[i] = mbs; il[i] = 0;
1660   }
1661 
1662   /* create and initialize a linked list for storing column indices of the active row k */
1663   nlnk = mbs + 1;
1664   ierr = PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1665 
1666   /* initial FreeSpace size is fill*(ai[mbs]+1) */
1667   ierr = GetMoreSpace((PetscInt)(fill*(ai[mbs]+1)),&free_space);CHKERRQ(ierr);
1668   current_space = free_space;
1669 
1670   for (k=0; k<mbs; k++){  /* for each active row k */
1671     /* initialize lnk by the column indices of row rip[k] of A */
1672     nzk   = 0;
1673     ncols = ai[rip[k]+1] - ai[rip[k]];
1674     ncols_upper = 0;
1675     for (j=0; j<ncols; j++){
1676       i = rip[*(aj + ai[rip[k]] + j)];
1677       if (i >= k){ /* only take upper triangular entry */
1678         cols[ncols_upper] = i;
1679         ncols_upper++;
1680       }
1681     }
1682     ierr = PetscLLAdd(ncols_upper,cols,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1683     nzk += nlnk;
1684 
1685     /* update lnk by computing fill-in for each pivot row to be merged in */
1686     prow = jl[k]; /* 1st pivot row */
1687 
1688     while (prow < k){
1689       nextprow = jl[prow];
1690       /* merge prow into k-th row */
1691       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
1692       jmax = ui[prow+1];
1693       ncols = jmax-jmin;
1694       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
1695       ierr = PetscLLAdd(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1696       nzk += nlnk;
1697 
1698       /* update il and jl for prow */
1699       if (jmin < jmax){
1700         il[prow] = jmin;
1701         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1702       }
1703       prow = nextprow;
1704     }
1705 
1706     /* if free space is not available, make more free space */
1707     if (current_space->local_remaining<nzk) {
1708       i = mbs - k + 1; /* num of unfactored rows */
1709       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1710       ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
1711       reallocs++;
1712     }
1713 
1714     /* copy data into free space, then initialize lnk */
1715     ierr = PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1716 
1717     /* add the k-th row into il and jl */
1718     if (nzk-1 > 0){
1719       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
1720       jl[k] = jl[i]; jl[i] = k;
1721       il[k] = ui[k] + 1;
1722     }
1723     ui_ptr[k] = current_space->array;
1724     current_space->array           += nzk;
1725     current_space->local_used      += nzk;
1726     current_space->local_remaining -= nzk;
1727 
1728     ui[k+1] = ui[k] + nzk;
1729   }
1730 
1731   if (ai[mbs] != 0) {
1732     PetscReal af = ((PetscReal)(2*ui[mbs]-mbs))/((PetscReal)ai[mbs]);
1733     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af);
1734     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af);
1735     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af);
1736   } else {
1737      PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n");
1738   }
1739 
1740   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1741   ierr = PetscFree(jl);CHKERRQ(ierr);
1742 
1743   /* destroy list of free space and other temporary array(s) */
1744   ierr = PetscMalloc((ui[mbs]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1745   ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1746   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1747 
1748   /* put together the new matrix in MATSEQSBAIJ format */
1749   ierr = MatCreate(PETSC_COMM_SELF,mbs,mbs,mbs,mbs,fact);CHKERRQ(ierr);
1750   B    = *fact;
1751   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1752   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
1753 
1754   b = (Mat_SeqSBAIJ*)B->data;
1755   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1756   b->singlemalloc = PETSC_FALSE;
1757   /* the next line frees the default space generated by the Create() */
1758   ierr = PetscFree(b->a);CHKERRQ(ierr);
1759   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1760   ierr = PetscMalloc((ui[mbs]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1761   b->j    = uj;
1762   b->i    = ui;
1763   b->diag = 0;
1764   b->ilen = 0;
1765   b->imax = 0;
1766   b->row  = perm;
1767   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1768   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1769   b->icol = perm;
1770   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1771   ierr    = PetscMalloc((mbs+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1772   PetscLogObjectMemory(B,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));
1773   b->maxnz = b->nz = ui[mbs];
1774 
1775   B->factor                 = FACTOR_CHOLESKY;
1776   B->info.factor_mallocs    = reallocs;
1777   B->info.fill_ratio_given  = fill;
1778   if (ai[mbs] != 0) {
1779     B->info.fill_ratio_needed = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]);
1780   } else {
1781     B->info.fill_ratio_needed = 0.0;
1782   }
1783   if (perm_identity){
1784     (*fact)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1785     (*fact)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1786     ierr = PetscObjectComposeFunction((PetscObject)(*fact),"MatCholeskyFactorNumeric_NaturalOrdering","dummyname",(FCNVOID)(*fact)->ops->choleskyfactornumeric);CHKERRQ(ierr);
1787     (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering;
1788   } else {
1789     (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1790   }
1791   PetscFunctionReturn(0);
1792 }
1793