xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision 04e58c48bdbac346df2d413bb802c16e4766d6a4)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/seq/aij.h"
4 #include "src/inline/dot.h"
5 #include "src/inline/spops.h"
6 #include "petscbt.h"
7 #include "src/mat/utils/freespace.h"
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ"
11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
12 {
13   PetscFunctionBegin;
14 
15   SETERRQ(PETSC_ERR_SUP,"Code not written");
16 #if !defined(PETSC_USE_DEBUG)
17   PetscFunctionReturn(0);
18 #endif
19 }
20 
21 
22 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat);
23 
24 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
25 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
26 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
27 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*);
28 #endif
29 
30 #undef __FUNCT__
31 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
32   /* ------------------------------------------------------------
33 
34           This interface was contribed by Tony Caola
35 
36      This routine is an interface to the pivoting drop-tolerance
37      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
38      SPARSEKIT2.
39 
40      The SPARSEKIT2 routines used here are covered by the GNU
41      copyright; see the file gnu in this directory.
42 
43      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
44      help in getting this routine ironed out.
45 
46      The major drawback to this routine is that if info->fill is
47      not large enough it fails rather than allocating more space;
48      this can be fixed by hacking/improving the f2c version of
49      Yousef Saad's code.
50 
51      ------------------------------------------------------------
52 */
53 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
54 {
55 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
56   PetscFunctionBegin;
57   SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
58   You can obtain the drop tolerance routines by installing PETSc from\n\
59   www.mcs.anl.gov/petsc\n");
60 #else
61   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
62   IS             iscolf,isicol,isirow;
63   PetscTruth     reorder;
64   PetscErrorCode ierr,sierr;
65   PetscInt       *c,*r,*ic,i,n = A->m;
66   PetscInt       *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
67   PetscInt       *ordcol,*iwk,*iperm,*jw;
68   PetscInt       jmax,lfill,job,*o_i,*o_j;
69   PetscScalar    *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
70   PetscReal      af;
71 
72   PetscFunctionBegin;
73 
74   if (info->dt == PETSC_DEFAULT)      info->dt      = .005;
75   if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax);
76   if (info->dtcol == PETSC_DEFAULT)   info->dtcol   = .01;
77   if (info->fill == PETSC_DEFAULT)    info->fill    = ((double)(n*(info->dtcount+1)))/a->nz;
78   lfill   = (PetscInt)(info->dtcount/2.0);
79   jmax    = (PetscInt)(info->fill*a->nz);
80 
81 
82   /* ------------------------------------------------------------
83      If reorder=.TRUE., then the original matrix has to be
84      reordered to reflect the user selected ordering scheme, and
85      then de-reordered so it is in it's original format.
86      Because Saad's dperm() is NOT in place, we have to copy
87      the original matrix and allocate more storage. . .
88      ------------------------------------------------------------
89   */
90 
91   /* set reorder to true if either isrow or iscol is not identity */
92   ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr);
93   if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);}
94   reorder = PetscNot(reorder);
95 
96 
97   /* storage for ilu factor */
98   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr);
99   ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr);
100   ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr);
101   ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr);
102 
103   /* ------------------------------------------------------------
104      Make sure that everything is Fortran formatted (1-Based)
105      ------------------------------------------------------------
106   */
107   for (i=old_i[0];i<old_i[n];i++) {
108     old_j[i]++;
109   }
110   for(i=0;i<n+1;i++) {
111     old_i[i]++;
112   };
113 
114 
115   if (reorder) {
116     ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr);
117     ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
118     for(i=0;i<n;i++) {
119       r[i]  = r[i]+1;
120       c[i]  = c[i]+1;
121     }
122     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr);
123     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr);
124     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr);
125     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
126     for (i=0;i<n;i++) {
127       r[i]  = r[i]-1;
128       c[i]  = c[i]-1;
129     }
130     ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr);
131     ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
132     o_a = old_a2;
133     o_j = old_j2;
134     o_i = old_i2;
135   } else {
136     o_a = old_a;
137     o_j = old_j;
138     o_i = old_i;
139   }
140 
141   /* ------------------------------------------------------------
142      Call Saad's ilutp() routine to generate the factorization
143      ------------------------------------------------------------
144   */
145 
146   ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr);
147   ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr);
148   ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr);
149 
150   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
151   if (sierr) {
152     switch (sierr) {
153       case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
154       case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
155       case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
156       case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
157       case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax);
158       default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
159     }
160   }
161 
162   ierr = PetscFree(w);CHKERRQ(ierr);
163   ierr = PetscFree(jw);CHKERRQ(ierr);
164 
165   /* ------------------------------------------------------------
166      Saad's routine gives the result in Modified Sparse Row (msr)
167      Convert to Compressed Sparse Row format (csr)
168      ------------------------------------------------------------
169   */
170 
171   ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr);
172   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr);
173 
174   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
175 
176   ierr = PetscFree(iwk);CHKERRQ(ierr);
177   ierr = PetscFree(wk);CHKERRQ(ierr);
178 
179   if (reorder) {
180     ierr = PetscFree(old_a2);CHKERRQ(ierr);
181     ierr = PetscFree(old_j2);CHKERRQ(ierr);
182     ierr = PetscFree(old_i2);CHKERRQ(ierr);
183   } else {
184     /* fix permutation of old_j that the factorization introduced */
185     for (i=old_i[0]; i<old_i[n]; i++) {
186       old_j[i-1] = iperm[old_j[i-1]-1];
187     }
188   }
189 
190   /* get rid of the shift to indices starting at 1 */
191   for (i=0; i<n+1; i++) {
192     old_i[i]--;
193   }
194   for (i=old_i[0];i<old_i[n];i++) {
195     old_j[i]--;
196   }
197 
198   /* Make the factored matrix 0-based */
199   for (i=0; i<n+1; i++) {
200     new_i[i]--;
201   }
202   for (i=new_i[0];i<new_i[n];i++) {
203     new_j[i]--;
204   }
205 
206   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
207   /*-- permute the right-hand-side and solution vectors           --*/
208   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
209   ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr);
210   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
211   for(i=0; i<n; i++) {
212     ordcol[i] = ic[iperm[i]-1];
213   };
214   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
215   ierr = ISDestroy(isicol);CHKERRQ(ierr);
216 
217   ierr = PetscFree(iperm);CHKERRQ(ierr);
218 
219   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr);
220   ierr = PetscFree(ordcol);CHKERRQ(ierr);
221 
222   /*----- put together the new matrix -----*/
223 
224   ierr = MatCreate(A->comm,fact);CHKERRQ(ierr);
225   ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr);
226   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
227   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
228   (*fact)->factor    = FACTOR_LU;
229   (*fact)->assembled = PETSC_TRUE;
230 
231   b = (Mat_SeqAIJ*)(*fact)->data;
232   b->freedata      = PETSC_TRUE;
233   b->sorted        = PETSC_FALSE;
234   b->singlemalloc  = PETSC_FALSE;
235   b->a             = new_a;
236   b->j             = new_j;
237   b->i             = new_i;
238   b->ilen          = 0;
239   b->imax          = 0;
240   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
241   b->row           = isirow;
242   b->col           = iscolf;
243   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
244   b->maxnz = b->nz = new_i[n];
245   ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
246   (*fact)->info.factor_mallocs = 0;
247 
248   ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
249 
250   af = ((double)b->nz)/((double)a->nz) + .001;
251   ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af));CHKERRQ(ierr);
252   ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af));CHKERRQ(ierr);
253   ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af));CHKERRQ(ierr);
254   ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:for best performance.\n"));CHKERRQ(ierr);
255 
256   ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr);
257 
258   PetscFunctionReturn(0);
259 #endif
260 }
261 
262 #undef __FUNCT__
263 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
264 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
265 {
266   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
267   IS                 isicol;
268   PetscErrorCode     ierr;
269   PetscInt           *r,*ic,i,n=A->m,*ai=a->i,*aj=a->j;
270   PetscInt           *bi,*bj,*ajtmp;
271   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
272   PetscReal          f;
273   PetscInt           nlnk,*lnk,k,*cols,**bi_ptr;
274   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
275   PetscBT            lnkbt;
276 
277   PetscFunctionBegin;
278   if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
279   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
280   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
281   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
282 
283   /* get new row pointers */
284   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
285   bi[0] = 0;
286 
287   /* bdiag is location of diagonal in factor */
288   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
289   bdiag[0] = 0;
290 
291   /* linked list for storing column indices of the active row */
292   nlnk = n + 1;
293   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
294 
295   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt)+n*sizeof(PetscInt**),&cols);CHKERRQ(ierr);
296   im     = cols + n;
297   bi_ptr = (PetscInt**)(im + n);
298 
299   /* initial FreeSpace size is f*(ai[n]+1) */
300   f = info->fill;
301   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
302   current_space = free_space;
303 
304   for (i=0; i<n; i++) {
305     /* copy previous fill into linked list */
306     nzi = 0;
307     nnz = ai[r[i]+1] - ai[r[i]];
308     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
309     ajtmp = aj + ai[r[i]];
310     for (k=0; k<nnz; k++) cols[k] = ic[*(ajtmp+k)]; /* note: cols is not sorted when iscol!=indentity */
311     ierr = PetscLLAdd(nnz,cols,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
312     nzi += nlnk;
313 
314     /* add pivot rows into linked list */
315     row = lnk[n];
316     while (row < i) {
317       nzbd    = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
318       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
319       ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
320       nzi += nlnk;
321       row  = lnk[row];
322     }
323     bi[i+1] = bi[i] + nzi;
324     im[i]   = nzi;
325 
326     /* mark bdiag */
327     nzbd = 0;
328     nnz  = nzi;
329     k    = lnk[n];
330     while (nnz-- && k < i){
331       nzbd++;
332       k = lnk[k];
333     }
334     bdiag[i] = bi[i] + nzbd;
335 
336     /* if free space is not available, make more free space */
337     if (current_space->local_remaining<nzi) {
338       nnz = (n - i)*nzi; /* estimated and max additional space needed */
339       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
340       reallocs++;
341     }
342 
343     /* copy data into free space, then initialize lnk */
344     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
345     bi_ptr[i] = current_space->array;
346     current_space->array           += nzi;
347     current_space->local_used      += nzi;
348     current_space->local_remaining -= nzi;
349   }
350 #if defined(PETSC_USE_DEBUG)
351   if (ai[n] != 0) {
352     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
353     ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af));CHKERRQ(ierr);
354     ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %G or use \n",af));CHKERRQ(ierr);
355     ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af));CHKERRQ(ierr);
356     ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"));CHKERRQ(ierr);
357   } else {
358     ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"));CHKERRQ(ierr);
359   }
360 #endif
361 
362   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
363   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
364 
365   /* destroy list of free space and other temporary array(s) */
366   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
367   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
368   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
369   ierr = PetscFree(cols);CHKERRQ(ierr);
370 
371   /* put together the new matrix */
372   ierr = MatCreate(A->comm,B);CHKERRQ(ierr);
373   ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr);
374   ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr);
375   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
376   ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr);
377   b    = (Mat_SeqAIJ*)(*B)->data;
378   b->freedata     = PETSC_TRUE;
379   b->singlemalloc = PETSC_FALSE;
380   ierr          = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
381   b->j          = bj;
382   b->i          = bi;
383   b->diag       = bdiag;
384   b->ilen       = 0;
385   b->imax       = 0;
386   b->row        = isrow;
387   b->col        = iscol;
388   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
389   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
390   b->icol       = isicol;
391   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
392 
393   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
394   ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
395   b->maxnz = b->nz = bi[n] ;
396 
397   (*B)->factor                 =  FACTOR_LU;
398   (*B)->info.factor_mallocs    = reallocs;
399   (*B)->info.fill_ratio_given  = f;
400 
401   if (ai[n] != 0) {
402     (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
403   } else {
404     (*B)->info.fill_ratio_needed = 0.0;
405   }
406   ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr);
407   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
408   PetscFunctionReturn(0);
409 }
410 
411 /* ----------------------------------------------------------- */
412 #undef __FUNCT__
413 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
414 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
415 {
416   Mat            C=*B;
417   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
418   IS             isrow = b->row,isicol = b->icol;
419   PetscErrorCode ierr;
420   PetscInt       *r,*ic,i,j,n=A->m,*bi=b->i,*bj=b->j;
421   PetscInt       *ajtmp,*bjtmp,nz,row,*ics;
422   PetscInt       *diag_offset = b->diag,diag,*pj;
423   PetscScalar    *rtmp,*v,*pc,multiplier,*pv,*rtmps;
424   PetscScalar    d;
425   PetscReal      rs;
426   LUShift_Ctx    sctx;
427   PetscInt       newshift;
428 
429   PetscFunctionBegin;
430   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
431   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
432   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
433   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
434   rtmps = rtmp; ics = ic;
435 
436   if (!a->diag) {
437     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
438   }
439   /* if both shift schemes are chosen by user, only use info->shiftpd */
440   if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0;
441   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
442     PetscInt *aai = a->i,*ddiag = a->diag;
443     sctx.shift_top = 0;
444     for (i=0; i<n; i++) {
445       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
446       d  = (a->a)[ddiag[i]];
447       rs = -PetscAbsScalar(d) - PetscRealPart(d);
448       v  = a->a+aai[i];
449       nz = aai[i+1] - aai[i];
450       for (j=0; j<nz; j++)
451 	rs += PetscAbsScalar(v[j]);
452       if (rs>sctx.shift_top) sctx.shift_top = rs;
453     }
454     if (sctx.shift_top == 0.0) sctx.shift_top += 1.e-12;
455     sctx.shift_top    *= 1.1;
456     sctx.nshift_max   = 5;
457     sctx.shift_lo     = 0.;
458     sctx.shift_hi     = 1.;
459   }
460 
461   sctx.shift_amount = 0;
462   sctx.nshift       = 0;
463   do {
464     sctx.lushift = PETSC_FALSE;
465     for (i=0; i<n; i++){
466       nz    = bi[i+1] - bi[i];
467       bjtmp = bj + bi[i];
468       for  (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0;
469 
470       /* load in initial (unfactored row) */
471       nz    = a->i[r[i]+1] - a->i[r[i]];
472       ajtmp = a->j + a->i[r[i]];
473       v     = a->a + a->i[r[i]];
474       for (j=0; j<nz; j++) {
475         rtmp[ics[ajtmp[j]]] = v[j];
476       }
477       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
478 
479       row = *bjtmp++;
480       while  (row < i) {
481         pc = rtmp + row;
482         if (*pc != 0.0) {
483           pv         = b->a + diag_offset[row];
484           pj         = b->j + diag_offset[row] + 1;
485           multiplier = *pc / *pv++;
486           *pc        = multiplier;
487           nz         = bi[row+1] - diag_offset[row] - 1;
488           for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
489           ierr = PetscLogFlops(2*nz);CHKERRQ(ierr);
490         }
491         row = *bjtmp++;
492       }
493       /* finished row so stick it into b->a */
494       pv   = b->a + bi[i] ;
495       pj   = b->j + bi[i] ;
496       nz   = bi[i+1] - bi[i];
497       diag = diag_offset[i] - bi[i];
498       rs   = 0.0;
499       for (j=0; j<nz; j++) {
500         pv[j] = rtmps[pj[j]];
501         if (j != diag) rs += PetscAbsScalar(pv[j]);
502       }
503 
504       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
505       sctx.rs  = rs;
506       sctx.pv  = pv[diag];
507       ierr = MatLUCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr);
508       if (newshift == 1){
509         break;    /* sctx.shift_amount is updated */
510       } else if (newshift == -1){
511         SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs);
512       }
513     }
514 
515     if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
516       /*
517        * if no shift in this attempt & shifting & started shifting & can refine,
518        * then try lower shift
519        */
520       sctx.shift_hi        = info->shift_fraction;
521       info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
522       sctx.shift_amount    = info->shift_fraction * sctx.shift_top;
523       sctx.lushift         = PETSC_TRUE;
524       sctx.nshift++;
525     }
526   } while (sctx.lushift);
527 
528   /* invert diagonal entries for simplier triangular solves */
529   for (i=0; i<n; i++) {
530     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
531   }
532 
533   ierr = PetscFree(rtmp);CHKERRQ(ierr);
534   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
535   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
536   C->factor = FACTOR_LU;
537   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
538   C->assembled = PETSC_TRUE;
539   ierr = PetscLogFlops(C->n);CHKERRQ(ierr);
540   if (sctx.nshift){
541     if (info->shiftnz) {
542       ierr = PetscLogInfo((0,"MatLUFactorNumeric_SeqAIJ: number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr);
543     } else if (info->shiftpd) {
544       ierr = PetscLogInfo((0,"MatLUFactorNumeric_SeqAIJ: number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top));CHKERRQ(ierr);
545     }
546   }
547   PetscFunctionReturn(0);
548 }
549 
550 #undef __FUNCT__
551 #define __FUNCT__ "MatUsePETSc_SeqAIJ"
552 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A)
553 {
554   PetscFunctionBegin;
555   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
556   A->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
557   PetscFunctionReturn(0);
558 }
559 
560 
561 /* ----------------------------------------------------------- */
562 #undef __FUNCT__
563 #define __FUNCT__ "MatLUFactor_SeqAIJ"
564 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
565 {
566   PetscErrorCode ierr;
567   Mat            C;
568 
569   PetscFunctionBegin;
570   ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr);
571   ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr);
572   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
573   ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr);
574   PetscFunctionReturn(0);
575 }
576 /* ----------------------------------------------------------- */
577 #undef __FUNCT__
578 #define __FUNCT__ "MatSolve_SeqAIJ"
579 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
580 {
581   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
582   IS             iscol = a->col,isrow = a->row;
583   PetscErrorCode ierr;
584   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
585   PetscInt       nz,*rout,*cout;
586   PetscScalar    *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
587 
588   PetscFunctionBegin;
589   if (!n) PetscFunctionReturn(0);
590 
591   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
592   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
593   tmp  = a->solve_work;
594 
595   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
596   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
597 
598   /* forward solve the lower triangular */
599   tmp[0] = b[*r++];
600   tmps   = tmp;
601   for (i=1; i<n; i++) {
602     v   = aa + ai[i] ;
603     vi  = aj + ai[i] ;
604     nz  = a->diag[i] - ai[i];
605     sum = b[*r++];
606     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
607     tmp[i] = sum;
608   }
609 
610   /* backward solve the upper triangular */
611   for (i=n-1; i>=0; i--){
612     v   = aa + a->diag[i] + 1;
613     vi  = aj + a->diag[i] + 1;
614     nz  = ai[i+1] - a->diag[i] - 1;
615     sum = tmp[i];
616     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
617     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
618   }
619 
620   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
621   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
622   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
623   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
624   ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr);
625   PetscFunctionReturn(0);
626 }
627 
628 /* ----------------------------------------------------------- */
629 #undef __FUNCT__
630 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
631 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
632 {
633   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
634   PetscErrorCode ierr;
635   PetscInt       n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag;
636   PetscScalar    *x,*b,*aa = a->a;
637 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
638   PetscInt       adiag_i,i,*vi,nz,ai_i;
639   PetscScalar    *v,sum;
640 #endif
641 
642   PetscFunctionBegin;
643   if (!n) PetscFunctionReturn(0);
644 
645   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
646   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
647 
648 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
649   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
650 #else
651   /* forward solve the lower triangular */
652   x[0] = b[0];
653   for (i=1; i<n; i++) {
654     ai_i = ai[i];
655     v    = aa + ai_i;
656     vi   = aj + ai_i;
657     nz   = adiag[i] - ai_i;
658     sum  = b[i];
659     while (nz--) sum -= *v++ * x[*vi++];
660     x[i] = sum;
661   }
662 
663   /* backward solve the upper triangular */
664   for (i=n-1; i>=0; i--){
665     adiag_i = adiag[i];
666     v       = aa + adiag_i + 1;
667     vi      = aj + adiag_i + 1;
668     nz      = ai[i+1] - adiag_i - 1;
669     sum     = x[i];
670     while (nz--) sum -= *v++ * x[*vi++];
671     x[i]    = sum*aa[adiag_i];
672   }
673 #endif
674   ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr);
675   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
676   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
677   PetscFunctionReturn(0);
678 }
679 
680 #undef __FUNCT__
681 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
682 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
683 {
684   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
685   IS             iscol = a->col,isrow = a->row;
686   PetscErrorCode ierr;
687   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
688   PetscInt       nz,*rout,*cout;
689   PetscScalar    *x,*b,*tmp,*aa = a->a,sum,*v;
690 
691   PetscFunctionBegin;
692   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
693 
694   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
695   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
696   tmp  = a->solve_work;
697 
698   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
699   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
700 
701   /* forward solve the lower triangular */
702   tmp[0] = b[*r++];
703   for (i=1; i<n; i++) {
704     v   = aa + ai[i] ;
705     vi  = aj + ai[i] ;
706     nz  = a->diag[i] - ai[i];
707     sum = b[*r++];
708     while (nz--) sum -= *v++ * tmp[*vi++ ];
709     tmp[i] = sum;
710   }
711 
712   /* backward solve the upper triangular */
713   for (i=n-1; i>=0; i--){
714     v   = aa + a->diag[i] + 1;
715     vi  = aj + a->diag[i] + 1;
716     nz  = ai[i+1] - a->diag[i] - 1;
717     sum = tmp[i];
718     while (nz--) sum -= *v++ * tmp[*vi++ ];
719     tmp[i] = sum*aa[a->diag[i]];
720     x[*c--] += tmp[i];
721   }
722 
723   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
724   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
725   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
726   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
727   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
728 
729   PetscFunctionReturn(0);
730 }
731 /* -------------------------------------------------------------------*/
732 #undef __FUNCT__
733 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
734 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
735 {
736   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
737   IS             iscol = a->col,isrow = a->row;
738   PetscErrorCode ierr;
739   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
740   PetscInt       nz,*rout,*cout,*diag = a->diag;
741   PetscScalar    *x,*b,*tmp,*aa = a->a,*v,s1;
742 
743   PetscFunctionBegin;
744   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
745   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
746   tmp  = a->solve_work;
747 
748   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
749   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
750 
751   /* copy the b into temp work space according to permutation */
752   for (i=0; i<n; i++) tmp[i] = b[c[i]];
753 
754   /* forward solve the U^T */
755   for (i=0; i<n; i++) {
756     v   = aa + diag[i] ;
757     vi  = aj + diag[i] + 1;
758     nz  = ai[i+1] - diag[i] - 1;
759     s1  = tmp[i];
760     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
761     while (nz--) {
762       tmp[*vi++ ] -= (*v++)*s1;
763     }
764     tmp[i] = s1;
765   }
766 
767   /* backward solve the L^T */
768   for (i=n-1; i>=0; i--){
769     v   = aa + diag[i] - 1 ;
770     vi  = aj + diag[i] - 1 ;
771     nz  = diag[i] - ai[i];
772     s1  = tmp[i];
773     while (nz--) {
774       tmp[*vi-- ] -= (*v--)*s1;
775     }
776   }
777 
778   /* copy tmp into x according to permutation */
779   for (i=0; i<n; i++) x[r[i]] = tmp[i];
780 
781   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
782   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
783   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
784   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
785 
786   ierr = PetscLogFlops(2*a->nz-A->n);CHKERRQ(ierr);
787   PetscFunctionReturn(0);
788 }
789 
790 #undef __FUNCT__
791 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
792 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
793 {
794   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
795   IS             iscol = a->col,isrow = a->row;
796   PetscErrorCode ierr;
797   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
798   PetscInt       nz,*rout,*cout,*diag = a->diag;
799   PetscScalar    *x,*b,*tmp,*aa = a->a,*v;
800 
801   PetscFunctionBegin;
802   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
803 
804   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
805   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
806   tmp = a->solve_work;
807 
808   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
809   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
810 
811   /* copy the b into temp work space according to permutation */
812   for (i=0; i<n; i++) tmp[i] = b[c[i]];
813 
814   /* forward solve the U^T */
815   for (i=0; i<n; i++) {
816     v   = aa + diag[i] ;
817     vi  = aj + diag[i] + 1;
818     nz  = ai[i+1] - diag[i] - 1;
819     tmp[i] *= *v++;
820     while (nz--) {
821       tmp[*vi++ ] -= (*v++)*tmp[i];
822     }
823   }
824 
825   /* backward solve the L^T */
826   for (i=n-1; i>=0; i--){
827     v   = aa + diag[i] - 1 ;
828     vi  = aj + diag[i] - 1 ;
829     nz  = diag[i] - ai[i];
830     while (nz--) {
831       tmp[*vi-- ] -= (*v--)*tmp[i];
832     }
833   }
834 
835   /* copy tmp into x according to permutation */
836   for (i=0; i<n; i++) x[r[i]] += tmp[i];
837 
838   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
839   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
840   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
841   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
842 
843   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
844   PetscFunctionReturn(0);
845 }
846 /* ----------------------------------------------------------------*/
847 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat);
848 
849 #undef __FUNCT__
850 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
851 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
852 {
853   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
854   IS                 isicol;
855   PetscErrorCode     ierr;
856   PetscInt           *r,*ic,n=A->m,*ai=a->i,*aj=a->j;
857   PetscInt           *bi,*cols,nnz,*cols_lvl;
858   PetscInt           *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0;
859   PetscInt           i,levels,diagonal_fill;
860   PetscTruth         col_identity,row_identity;
861   PetscReal          f;
862   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
863   PetscBT            lnkbt;
864   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
865   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
866   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
867 
868   PetscFunctionBegin;
869   f             = info->fill;
870   levels        = (PetscInt)info->levels;
871   diagonal_fill = (PetscInt)info->diagonal_fill;
872   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
873 
874   /* special case that simply copies fill pattern */
875   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
876   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
877   if (!levels && row_identity && col_identity) {
878     ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
879     (*fact)->factor = FACTOR_LU;
880     b               = (Mat_SeqAIJ*)(*fact)->data;
881     if (!b->diag) {
882       ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
883     }
884     ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
885     b->row              = isrow;
886     b->col              = iscol;
887     b->icol             = isicol;
888     ierr                = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
889     (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
890     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
891     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
892     PetscFunctionReturn(0);
893   }
894 
895   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
896   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
897 
898   /* get new row pointers */
899   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
900   bi[0] = 0;
901   /* bdiag is location of diagonal in factor */
902   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
903   bdiag[0]  = 0;
904 
905   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr);
906   bjlvl_ptr = (PetscInt**)(bj_ptr + n);
907 
908   /* create a linked list for storing column indices of the active row */
909   nlnk = n + 1;
910   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
911 
912   /* initial FreeSpace size is f*(ai[n]+1) */
913   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
914   current_space = free_space;
915   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
916   current_space_lvl = free_space_lvl;
917 
918   for (i=0; i<n; i++) {
919     nzi = 0;
920     /* copy current row into linked list */
921     nnz  = ai[r[i]+1] - ai[r[i]];
922     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
923     cols = aj + ai[r[i]];
924     lnk[i] = -1; /* marker to indicate if diagonal exists */
925     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
926     nzi += nlnk;
927 
928     /* make sure diagonal entry is included */
929     if (diagonal_fill && lnk[i] == -1) {
930       fm = n;
931       while (lnk[fm] < i) fm = lnk[fm];
932       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
933       lnk[fm]    = i;
934       lnk_lvl[i] = 0;
935       nzi++; dcount++;
936     }
937 
938     /* add pivot rows into the active row */
939     nzbd = 0;
940     prow = lnk[n];
941     while (prow < i) {
942       nnz      = bdiag[prow];
943       cols     = bj_ptr[prow] + nnz + 1;
944       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
945       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
946       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
947       nzi += nlnk;
948       prow = lnk[prow];
949       nzbd++;
950     }
951     bdiag[i] = nzbd;
952     bi[i+1]  = bi[i] + nzi;
953 
954     /* if free space is not available, make more free space */
955     if (current_space->local_remaining<nzi) {
956       nnz = nzi*(n - i); /* estimated and max additional space needed */
957       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
958       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
959       reallocs++;
960     }
961 
962     /* copy data into free_space and free_space_lvl, then initialize lnk */
963     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
964     bj_ptr[i]    = current_space->array;
965     bjlvl_ptr[i] = current_space_lvl->array;
966 
967     /* make sure the active row i has diagonal entry */
968     if (*(bj_ptr[i]+bdiag[i]) != i) {
969       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
970     try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",i);
971     }
972 
973     current_space->array           += nzi;
974     current_space->local_used      += nzi;
975     current_space->local_remaining -= nzi;
976     current_space_lvl->array           += nzi;
977     current_space_lvl->local_used      += nzi;
978     current_space_lvl->local_remaining -= nzi;
979   }
980 
981   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
982   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
983 
984   /* destroy list of free space and other temporary arrays */
985   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
986   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
987   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
988   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
989   ierr = PetscFree(bj_ptr);CHKERRQ(ierr);
990 
991 #if defined(PETSC_USE_DEBUG)
992   {
993     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
994     ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af));CHKERRQ(ierr);
995     ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af));CHKERRQ(ierr);
996     ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af));CHKERRQ(ierr);
997     ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"));CHKERRQ(ierr);
998     if (diagonal_fill) {
999       ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount));CHKERRQ(ierr);
1000     }
1001   }
1002 #endif
1003 
1004   /* put together the new matrix */
1005   ierr = MatCreate(A->comm,fact);CHKERRQ(ierr);
1006   ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr);
1007   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
1008   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1009   ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr);
1010   b = (Mat_SeqAIJ*)(*fact)->data;
1011   b->freedata     = PETSC_TRUE;
1012   b->singlemalloc = PETSC_FALSE;
1013   len = (bi[n] )*sizeof(PetscScalar);
1014   ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr);
1015   b->j          = bj;
1016   b->i          = bi;
1017   for (i=0; i<n; i++) bdiag[i] += bi[i];
1018   b->diag       = bdiag;
1019   b->ilen       = 0;
1020   b->imax       = 0;
1021   b->row        = isrow;
1022   b->col        = iscol;
1023   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1024   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1025   b->icol       = isicol;
1026   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1027   /* In b structure:  Free imax, ilen, old a, old j.
1028      Allocate bdiag, solve_work, new a, new j */
1029   ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
1030   b->maxnz             = b->nz = bi[n] ;
1031   (*fact)->factor = FACTOR_LU;
1032   (*fact)->info.factor_mallocs    = reallocs;
1033   (*fact)->info.fill_ratio_given  = f;
1034   (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1035 
1036   ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr);
1037   (*fact)->ops->lufactornumeric =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
1038 
1039   PetscFunctionReturn(0);
1040 }
1041 
1042 #include "src/mat/impls/sbaij/seq/sbaij.h"
1043 #undef __FUNCT__
1044 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
1045 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
1046 {
1047   Mat            C = *B;
1048   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1049   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
1050   IS             ip=b->row;
1051   PetscErrorCode ierr;
1052   PetscInt       *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol;
1053   PetscInt       *ai=a->i,*aj=a->j;
1054   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1055   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1056   PetscReal      zeropivot,rs,shiftnz;
1057   PetscReal      shiftpd;
1058   ChShift_Ctx    sctx;
1059   PetscInt       newshift;
1060 
1061   PetscFunctionBegin;
1062   shiftnz   = info->shiftnz;
1063   shiftpd   = info->shiftpd;
1064   zeropivot = info->zeropivot;
1065 
1066   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
1067 
1068   /* initialization */
1069   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1070   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
1071   jl   = il + mbs;
1072   rtmp = (MatScalar*)(jl + mbs);
1073 
1074   sctx.shift_amount = 0;
1075   sctx.nshift       = 0;
1076   do {
1077     sctx.chshift = PETSC_FALSE;
1078     for (i=0; i<mbs; i++) {
1079       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1080     }
1081 
1082     for (k = 0; k<mbs; k++){
1083       bval = ba + bi[k];
1084       /* initialize k-th row by the perm[k]-th row of A */
1085       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1086       for (j = jmin; j < jmax; j++){
1087         col = rip[aj[j]];
1088         if (col >= k){ /* only take upper triangular entry */
1089           rtmp[col] = aa[j];
1090           *bval++  = 0.0; /* for in-place factorization */
1091         }
1092       }
1093       /* shift the diagonal of the matrix */
1094       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1095 
1096       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1097       dk = rtmp[k];
1098       i = jl[k]; /* first row to be added to k_th row  */
1099 
1100       while (i < k){
1101         nexti = jl[i]; /* next row to be added to k_th row */
1102 
1103         /* compute multiplier, update diag(k) and U(i,k) */
1104         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1105         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
1106         dk += uikdi*ba[ili];
1107         ba[ili] = uikdi; /* -U(i,k) */
1108 
1109         /* add multiple of row i to k-th row */
1110         jmin = ili + 1; jmax = bi[i+1];
1111         if (jmin < jmax){
1112           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1113           /* update il and jl for row i */
1114           il[i] = jmin;
1115           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1116         }
1117         i = nexti;
1118       }
1119 
1120       /* shift the diagonals when zero pivot is detected */
1121       /* compute rs=sum of abs(off-diagonal) */
1122       rs   = 0.0;
1123       jmin = bi[k]+1;
1124       nz   = bi[k+1] - jmin;
1125       if (nz){
1126         bcol = bj + jmin;
1127         while (nz--){
1128           rs += PetscAbsScalar(rtmp[*bcol]);
1129           bcol++;
1130         }
1131       }
1132 
1133       sctx.rs = rs;
1134       sctx.pv = dk;
1135       ierr = MatCholeskyCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr);
1136       if (newshift == 1){
1137         break;    /* sctx.shift_amount is updated */
1138       } else if (newshift == -1){
1139         SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs);
1140       }
1141 
1142       /* copy data into U(k,:) */
1143       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1144       jmin = bi[k]+1; jmax = bi[k+1];
1145       if (jmin < jmax) {
1146         for (j=jmin; j<jmax; j++){
1147           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1148         }
1149         /* add the k-th row into il and jl */
1150         il[k] = jmin;
1151         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1152       }
1153     }
1154   } while (sctx.chshift);
1155   ierr = PetscFree(il);CHKERRQ(ierr);
1156 
1157   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
1158   C->factor       = FACTOR_CHOLESKY;
1159   C->assembled    = PETSC_TRUE;
1160   C->preallocated = PETSC_TRUE;
1161   ierr = PetscLogFlops(C->m);CHKERRQ(ierr);
1162   if (sctx.nshift){
1163     if (shiftnz) {
1164       ierr = PetscLogInfo((0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr);
1165     } else if (shiftpd) {
1166       ierr = PetscLogInfo((0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr);
1167     }
1168   }
1169   PetscFunctionReturn(0);
1170 }
1171 
1172 #undef __FUNCT__
1173 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
1174 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1175 {
1176   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
1177   Mat_SeqSBAIJ       *b;
1178   Mat                B;
1179   PetscErrorCode     ierr;
1180   PetscTruth         perm_identity;
1181   PetscInt           reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui;
1182   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1183   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1184   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
1185   PetscReal          fill=info->fill,levels=info->levels;
1186   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1187   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1188   PetscBT            lnkbt;
1189 
1190   PetscFunctionBegin;
1191   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1192   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1193 
1194   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1195   ui[0] = 0;
1196 
1197   /* special case that simply copies fill pattern */
1198   if (!levels && perm_identity) {
1199     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
1200     for (i=0; i<am; i++) {
1201       ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1202     }
1203     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1204     cols = uj;
1205     for (i=0; i<am; i++) {
1206       aj    = a->j + a->diag[i];
1207       ncols = ui[i+1] - ui[i];
1208       for (j=0; j<ncols; j++) *cols++ = *aj++;
1209     }
1210   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1211     /* initialization */
1212     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
1213 
1214     /* jl: linked list for storing indices of the pivot rows
1215        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1216     ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1217     il         = jl + am;
1218     uj_ptr     = (PetscInt**)(il + am);
1219     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1220     for (i=0; i<am; i++){
1221       jl[i] = am; il[i] = 0;
1222     }
1223 
1224     /* create and initialize a linked list for storing column indices of the active row k */
1225     nlnk = am + 1;
1226     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1227 
1228     /* initial FreeSpace size is fill*(ai[am]+1) */
1229     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1230     current_space = free_space;
1231     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
1232     current_space_lvl = free_space_lvl;
1233 
1234     for (k=0; k<am; k++){  /* for each active row k */
1235       /* initialize lnk by the column indices of row rip[k] of A */
1236       nzk   = 0;
1237       ncols = ai[rip[k]+1] - ai[rip[k]];
1238       ncols_upper = 0;
1239       for (j=0; j<ncols; j++){
1240         i = *(aj + ai[rip[k]] + j);
1241         if (rip[i] >= k){ /* only take upper triangular entry */
1242           ajtmp[ncols_upper] = i;
1243           ncols_upper++;
1244         }
1245       }
1246       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1247       nzk += nlnk;
1248 
1249       /* update lnk by computing fill-in for each pivot row to be merged in */
1250       prow = jl[k]; /* 1st pivot row */
1251 
1252       while (prow < k){
1253         nextprow = jl[prow];
1254 
1255         /* merge prow into k-th row */
1256         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1257         jmax = ui[prow+1];
1258         ncols = jmax-jmin;
1259         i     = jmin - ui[prow];
1260         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1261         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
1262         j     = *(uj - 1);
1263         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
1264         nzk += nlnk;
1265 
1266         /* update il and jl for prow */
1267         if (jmin < jmax){
1268           il[prow] = jmin;
1269           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1270         }
1271         prow = nextprow;
1272       }
1273 
1274       /* if free space is not available, make more free space */
1275       if (current_space->local_remaining<nzk) {
1276         i = am - k + 1; /* num of unfactored rows */
1277         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1278         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
1279         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
1280         reallocs++;
1281       }
1282 
1283       /* copy data into free_space and free_space_lvl, then initialize lnk */
1284       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1285 
1286       /* add the k-th row into il and jl */
1287       if (nzk > 1){
1288         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1289         jl[k] = jl[i]; jl[i] = k;
1290         il[k] = ui[k] + 1;
1291       }
1292       uj_ptr[k]     = current_space->array;
1293       uj_lvl_ptr[k] = current_space_lvl->array;
1294 
1295       current_space->array           += nzk;
1296       current_space->local_used      += nzk;
1297       current_space->local_remaining -= nzk;
1298 
1299       current_space_lvl->array           += nzk;
1300       current_space_lvl->local_used      += nzk;
1301       current_space_lvl->local_remaining -= nzk;
1302 
1303       ui[k+1] = ui[k] + nzk;
1304     }
1305 
1306 #if defined(PETSC_USE_DEBUG)
1307     if (ai[am] != 0) {
1308       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1309       ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af));CHKERRQ(ierr);
1310       ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af));CHKERRQ(ierr);
1311       ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af));CHKERRQ(ierr);
1312     } else {
1313       ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n"));CHKERRQ(ierr);
1314     }
1315 #endif
1316 
1317     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1318     ierr = PetscFree(jl);CHKERRQ(ierr);
1319     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
1320 
1321     /* destroy list of free space and other temporary array(s) */
1322     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1323     ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1324     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1325     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1326 
1327   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
1328 
1329   /* put together the new matrix in MATSEQSBAIJ format */
1330   ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr);
1331   ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr);
1332   B = *fact;
1333   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1334   ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1335 
1336   b    = (Mat_SeqSBAIJ*)B->data;
1337   b->singlemalloc = PETSC_FALSE;
1338   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1339   b->j    = uj;
1340   b->i    = ui;
1341   b->diag = 0;
1342   b->ilen = 0;
1343   b->imax = 0;
1344   b->row  = perm;
1345   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1346   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1347   b->icol = perm;
1348   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1349   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1350   ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1351   b->maxnz = b->nz = ui[am];
1352 
1353   B->factor                 = FACTOR_CHOLESKY;
1354   B->info.factor_mallocs    = reallocs;
1355   B->info.fill_ratio_given  = fill;
1356   if (ai[am] != 0) {
1357     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1358   } else {
1359     B->info.fill_ratio_needed = 0.0;
1360   }
1361   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1362   if (perm_identity){
1363     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1364     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1365   }
1366   PetscFunctionReturn(0);
1367 }
1368 
1369 #undef __FUNCT__
1370 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
1371 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1372 {
1373   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
1374   Mat_SeqSBAIJ       *b;
1375   Mat                B;
1376   PetscErrorCode     ierr;
1377   PetscTruth         perm_identity;
1378   PetscReal          fill = info->fill;
1379   PetscInt           *rip,*riip,i,am=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow;
1380   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1381   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1382   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1383   PetscBT            lnkbt;
1384   IS                 iperm;
1385 
1386   PetscFunctionBegin;
1387   /* check whether perm is the identity mapping */
1388   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1389   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1390 
1391   if (!perm_identity){
1392     /* check if perm is symmetric! */
1393     ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1394     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1395     for (i=0; i<am; i++) {
1396       if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation");
1397     }
1398     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1399     ierr = ISDestroy(iperm);CHKERRQ(ierr);
1400   }
1401 
1402   /* initialization */
1403   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1404   ui[0] = 0;
1405 
1406   /* jl: linked list for storing indices of the pivot rows
1407      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1408   ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1409   il     = jl + am;
1410   cols   = il + am;
1411   ui_ptr = (PetscInt**)(cols + am);
1412   for (i=0; i<am; i++){
1413     jl[i] = am; il[i] = 0;
1414   }
1415 
1416   /* create and initialize a linked list for storing column indices of the active row k */
1417   nlnk = am + 1;
1418   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1419 
1420   /* initial FreeSpace size is fill*(ai[am]+1) */
1421   ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1422   current_space = free_space;
1423 
1424   for (k=0; k<am; k++){  /* for each active row k */
1425     /* initialize lnk by the column indices of row rip[k] of A */
1426     nzk   = 0;
1427     ncols = ai[rip[k]+1] - ai[rip[k]];
1428     ncols_upper = 0;
1429     for (j=0; j<ncols; j++){
1430       i = rip[*(aj + ai[rip[k]] + j)];
1431       if (i >= k){ /* only take upper triangular entry */
1432         cols[ncols_upper] = i;
1433         ncols_upper++;
1434       }
1435     }
1436     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1437     nzk += nlnk;
1438 
1439     /* update lnk by computing fill-in for each pivot row to be merged in */
1440     prow = jl[k]; /* 1st pivot row */
1441 
1442     while (prow < k){
1443       nextprow = jl[prow];
1444       /* merge prow into k-th row */
1445       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1446       jmax = ui[prow+1];
1447       ncols = jmax-jmin;
1448       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1449       ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1450       nzk += nlnk;
1451 
1452       /* update il and jl for prow */
1453       if (jmin < jmax){
1454         il[prow] = jmin;
1455         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1456       }
1457       prow = nextprow;
1458     }
1459 
1460     /* if free space is not available, make more free space */
1461     if (current_space->local_remaining<nzk) {
1462       i = am - k + 1; /* num of unfactored rows */
1463       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1464       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
1465       reallocs++;
1466     }
1467 
1468     /* copy data into free space, then initialize lnk */
1469     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1470 
1471     /* add the k-th row into il and jl */
1472     if (nzk-1 > 0){
1473       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1474       jl[k] = jl[i]; jl[i] = k;
1475       il[k] = ui[k] + 1;
1476     }
1477     ui_ptr[k] = current_space->array;
1478     current_space->array           += nzk;
1479     current_space->local_used      += nzk;
1480     current_space->local_remaining -= nzk;
1481 
1482     ui[k+1] = ui[k] + nzk;
1483   }
1484 
1485 #if defined(PETSC_USE_DEBUG)
1486   if (ai[am] != 0) {
1487     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
1488     ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af));CHKERRQ(ierr);
1489     ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af));CHKERRQ(ierr);
1490     ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af));CHKERRQ(ierr);
1491   } else {
1492      ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n"));CHKERRQ(ierr);
1493   }
1494 #endif
1495 
1496   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1497   ierr = PetscFree(jl);CHKERRQ(ierr);
1498 
1499   /* destroy list of free space and other temporary array(s) */
1500   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1501   ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1502   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1503 
1504   /* put together the new matrix in MATSEQSBAIJ format */
1505   ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr);
1506   ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr);
1507   B    = *fact;
1508   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1509   ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1510 
1511   b = (Mat_SeqSBAIJ*)B->data;
1512   b->singlemalloc = PETSC_FALSE;
1513   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1514   b->j    = uj;
1515   b->i    = ui;
1516   b->diag = 0;
1517   b->ilen = 0;
1518   b->imax = 0;
1519   b->row  = perm;
1520   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1521   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1522   b->icol = perm;
1523   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1524   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1525   ierr    = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1526   b->maxnz = b->nz = ui[am];
1527 
1528   B->factor                 = FACTOR_CHOLESKY;
1529   B->info.factor_mallocs    = reallocs;
1530   B->info.fill_ratio_given  = fill;
1531   if (ai[am] != 0) {
1532     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1533   } else {
1534     B->info.fill_ratio_needed = 0.0;
1535   }
1536   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1537   if (perm_identity){
1538     (*fact)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1539     (*fact)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1540   }
1541   PetscFunctionReturn(0);
1542 }
1543