xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision 71c2f376a8ea5b67e6a5bb4815321c85a96b4c48)
1 
2 #include "src/mat/impls/aij/seq/aij.h"
3 #include "src/inline/dot.h"
4 #include "src/inline/spops.h"
5 #include "petscbt.h"
6 #include "src/mat/utils/freespace.h"
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ"
10 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
11 {
12   PetscFunctionBegin;
13 
14   SETERRQ(PETSC_ERR_SUP,"Code not written");
15 #if !defined(PETSC_USE_DEBUG)
16   PetscFunctionReturn(0);
17 #endif
18 }
19 
20 
21 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat);
22 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth);
23 
24 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
25 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
26 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*);
27 
28 #undef __FUNCT__
29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
30   /* ------------------------------------------------------------
31 
32           This interface was contribed by Tony Caola
33 
34      This routine is an interface to the pivoting drop-tolerance
35      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
36      SPARSEKIT2.
37 
38      The SPARSEKIT2 routines used here are covered by the GNU
39      copyright; see the file gnu in this directory.
40 
41      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
42      help in getting this routine ironed out.
43 
44      The major drawback to this routine is that if info->fill is
45      not large enough it fails rather than allocating more space;
46      this can be fixed by hacking/improving the f2c version of
47      Yousef Saad's code.
48 
49      ------------------------------------------------------------
50 */
51 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact)
52 {
53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
54   PetscFunctionBegin;
55   SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
56   You can obtain the drop tolerance routines by installing PETSc from\n\
57   www.mcs.anl.gov/petsc\n");
58 #else
59   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
60   IS             iscolf,isicol,isirow;
61   PetscTruth     reorder;
62   PetscErrorCode ierr,sierr;
63   PetscInt       *c,*r,*ic,i,n = A->m;
64   PetscInt       *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
65   PetscInt       *ordcol,*iwk,*iperm,*jw;
66   PetscInt       jmax,lfill,job,*o_i,*o_j;
67   PetscScalar    *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
68   PetscReal      af;
69 
70   PetscFunctionBegin;
71 
72   if (info->dt == PETSC_DEFAULT)      info->dt      = .005;
73   if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax);
74   if (info->dtcol == PETSC_DEFAULT)   info->dtcol   = .01;
75   if (info->fill == PETSC_DEFAULT)    info->fill    = ((double)(n*(info->dtcount+1)))/a->nz;
76   lfill   = (PetscInt)(info->dtcount/2.0);
77   jmax    = (PetscInt)(info->fill*a->nz);
78 
79 
80   /* ------------------------------------------------------------
81      If reorder=.TRUE., then the original matrix has to be
82      reordered to reflect the user selected ordering scheme, and
83      then de-reordered so it is in it's original format.
84      Because Saad's dperm() is NOT in place, we have to copy
85      the original matrix and allocate more storage. . .
86      ------------------------------------------------------------
87   */
88 
89   /* set reorder to true if either isrow or iscol is not identity */
90   ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr);
91   if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);}
92   reorder = PetscNot(reorder);
93 
94 
95   /* storage for ilu factor */
96   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr);
97   ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr);
98   ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr);
99   ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr);
100 
101   /* ------------------------------------------------------------
102      Make sure that everything is Fortran formatted (1-Based)
103      ------------------------------------------------------------
104   */
105   for (i=old_i[0];i<old_i[n];i++) {
106     old_j[i]++;
107   }
108   for(i=0;i<n+1;i++) {
109     old_i[i]++;
110   };
111 
112 
113   if (reorder) {
114     ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr);
115     ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
116     for(i=0;i<n;i++) {
117       r[i]  = r[i]+1;
118       c[i]  = c[i]+1;
119     }
120     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr);
121     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr);
122     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr);
123     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
124     for (i=0;i<n;i++) {
125       r[i]  = r[i]-1;
126       c[i]  = c[i]-1;
127     }
128     ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr);
129     ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
130     o_a = old_a2;
131     o_j = old_j2;
132     o_i = old_i2;
133   } else {
134     o_a = old_a;
135     o_j = old_j;
136     o_i = old_i;
137   }
138 
139   /* ------------------------------------------------------------
140      Call Saad's ilutp() routine to generate the factorization
141      ------------------------------------------------------------
142   */
143 
144   ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr);
145   ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr);
146   ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr);
147 
148   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
149   if (sierr) {
150     switch (sierr) {
151       case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
152       case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
153       case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
154       case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
155       case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax);
156       default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
157     }
158   }
159 
160   ierr = PetscFree(w);CHKERRQ(ierr);
161   ierr = PetscFree(jw);CHKERRQ(ierr);
162 
163   /* ------------------------------------------------------------
164      Saad's routine gives the result in Modified Sparse Row (msr)
165      Convert to Compressed Sparse Row format (csr)
166      ------------------------------------------------------------
167   */
168 
169   ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr);
170   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr);
171 
172   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
173 
174   ierr = PetscFree(iwk);CHKERRQ(ierr);
175   ierr = PetscFree(wk);CHKERRQ(ierr);
176 
177   if (reorder) {
178     ierr = PetscFree(old_a2);CHKERRQ(ierr);
179     ierr = PetscFree(old_j2);CHKERRQ(ierr);
180     ierr = PetscFree(old_i2);CHKERRQ(ierr);
181   } else {
182     /* fix permutation of old_j that the factorization introduced */
183     for (i=old_i[0]; i<old_i[n]; i++) {
184       old_j[i-1] = iperm[old_j[i-1]-1];
185     }
186   }
187 
188   /* get rid of the shift to indices starting at 1 */
189   for (i=0; i<n+1; i++) {
190     old_i[i]--;
191   }
192   for (i=old_i[0];i<old_i[n];i++) {
193     old_j[i]--;
194   }
195 
196   /* Make the factored matrix 0-based */
197   for (i=0; i<n+1; i++) {
198     new_i[i]--;
199   }
200   for (i=new_i[0];i<new_i[n];i++) {
201     new_j[i]--;
202   }
203 
204   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
205   /*-- permute the right-hand-side and solution vectors           --*/
206   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
207   ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr);
208   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
209   for(i=0; i<n; i++) {
210     ordcol[i] = ic[iperm[i]-1];
211   };
212   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
213   ierr = ISDestroy(isicol);CHKERRQ(ierr);
214 
215   ierr = PetscFree(iperm);CHKERRQ(ierr);
216 
217   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr);
218   ierr = PetscFree(ordcol);CHKERRQ(ierr);
219 
220   /*----- put together the new matrix -----*/
221 
222   ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr);
223   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
224   ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr);
225   (*fact)->factor    = FACTOR_LU;
226   (*fact)->assembled = PETSC_TRUE;
227 
228   b = (Mat_SeqAIJ*)(*fact)->data;
229   ierr = PetscFree(b->imax);CHKERRQ(ierr);
230   b->sorted        = PETSC_FALSE;
231   b->singlemalloc  = PETSC_FALSE;
232   /* the next line frees the default space generated by the MatCreate() */
233   ierr             = PetscFree(b->a);CHKERRQ(ierr);
234   ierr             = PetscFree(b->ilen);CHKERRQ(ierr);
235   b->a             = new_a;
236   b->j             = new_j;
237   b->i             = new_i;
238   b->ilen          = 0;
239   b->imax          = 0;
240   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
241   b->row           = isirow;
242   b->col           = iscolf;
243   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
244   b->maxnz = b->nz = new_i[n];
245   ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
246   (*fact)->info.factor_mallocs = 0;
247 
248   ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
249 
250   af = ((double)b->nz)/((double)a->nz) + .001;
251   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af);
252   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af);
253   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af);
254   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n");
255 
256   /* Call parent MatILUDTFactor here */
257 
258   PetscFunctionReturn(0);
259 #endif
260 }
261 
262 #undef __FUNCT__
263 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
264 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
265 {
266   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
267   IS             isicol;
268   PetscErrorCode ierr;
269   PetscInt       *r,*ic,i,n=A->m,*ai=a->i,*aj=a->j;
270   PetscInt       *bi,*bj,*ajtmp;
271   PetscInt       *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
272   PetscReal      f;
273   PetscInt       nlnk,*lnk,k,*cols,**bi_ptr;
274   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
275   PetscBT        lnkbt;
276 
277   PetscFunctionBegin;
278   if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
279   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
280   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
281   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
282 
283   /* get new row pointers */
284   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
285   bi[0] = 0;
286 
287   /* bdiag is location of diagonal in factor */
288   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
289   bdiag[0] = 0;
290 
291   /* linked list for storing column indices of the active row */
292   nlnk = n + 1;
293   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
294 
295   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt)+n*sizeof(PetscInt**),&cols);CHKERRQ(ierr);
296   im     = cols + n;
297   bi_ptr = (PetscInt**)(im + n);
298 
299   /* initial FreeSpace size is f*(ai[n]+1) */
300   f = info->fill;
301   ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
302   current_space = free_space;
303 
304   for (i=0; i<n; i++) {
305     /* copy previous fill into linked list */
306     nzi = 0;
307     nnz = ai[r[i]+1] - ai[r[i]];
308     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
309     ajtmp = aj + ai[r[i]];
310     for (k=0; k<nnz; k++) cols[k] = ic[*(ajtmp+k)]; /* note: cols is not sorted when iscol!=indentity */
311     ierr = PetscLLAdd(nnz,cols,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
312     nzi += nlnk;
313 
314     /* add pivot rows into linked list */
315     row = lnk[n];
316     while (row < i) {
317       nzbd    = bdiag[row] - bi[row] + 1;
318       ajtmp   = bi_ptr[row] + nzbd;
319       nnz     = im[row] - nzbd; /* num of columns with row<indices<=i */
320       im[row] = nzbd;
321       ierr = PetscLLAddSortedLU(nnz,ajtmp,row,nlnk,lnk,lnkbt,i,nzbd);CHKERRQ(ierr);
322       nzi     += nlnk;
323       im[row] += nzbd;  /* update im[row]: num of cols with index<=i */
324 
325       row = lnk[row];
326     }
327 
328     bi[i+1] = bi[i] + nzi;
329     im[i]   = nzi;
330 
331     /* mark bdiag */
332     nzbd = 0;
333     nnz  = nzi;
334     k    = lnk[n];
335     while (nnz-- && k < i){
336       nzbd++;
337       k = lnk[k];
338     }
339     bdiag[i] = bi[i] + nzbd;
340 
341     /* if free space is not available, make more free space */
342     if (current_space->local_remaining<nzi) {
343       nnz = (n - i)*nzi; /* estimated and max additional space needed */
344       ierr = GetMoreSpace(nnz,&current_space);CHKERRQ(ierr);
345       reallocs++;
346     }
347 
348     /* copy data into free space, then initialize lnk */
349     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
350     bi_ptr[i] = current_space->array;
351     current_space->array           += nzi;
352     current_space->local_used      += nzi;
353     current_space->local_remaining -= nzi;
354   }
355   if (ai[n] != 0) {
356     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
357     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
358     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af);
359     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af);
360     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n");
361   } else {
362     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n");
363   }
364 
365   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
366   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
367 
368   /* destroy list of free space and other temporary array(s) */
369   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
370   ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
371   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
372   ierr = PetscFree(cols);CHKERRQ(ierr);
373 
374   /* put together the new matrix */
375   ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr);
376   ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr);
377   ierr = MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);CHKERRQ(ierr);
378   ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr);
379   b    = (Mat_SeqAIJ*)(*B)->data;
380   ierr = PetscFree(b->imax);CHKERRQ(ierr);
381   b->singlemalloc = PETSC_FALSE;
382   /* the next line frees the default space generated by the Create() */
383   ierr = PetscFree(b->a);CHKERRQ(ierr);
384   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
385   ierr          = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
386   b->j          = bj;
387   b->i          = bi;
388   b->diag       = bdiag;
389   b->ilen       = 0;
390   b->imax       = 0;
391   b->row        = isrow;
392   b->col        = iscol;
393   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
394   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
395   b->icol       = isicol;
396   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
397 
398   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
399   ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
400   b->maxnz = b->nz = bi[n] ;
401 
402   (*B)->factor                 =  FACTOR_LU;
403   (*B)->info.factor_mallocs    = reallocs;
404   (*B)->info.fill_ratio_given  = f;
405 
406   if (ai[n] != 0) {
407     (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
408   } else {
409     (*B)->info.fill_ratio_needed = 0.0;
410   }
411   /* Call parent MatLUFactorSymbolic here */
412   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
413   PetscFunctionReturn(0);
414 }
415 
416 /* ----------------------------------------------------------- */
417 #undef __FUNCT__
418 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
419 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
420 {
421   Mat            C=*B;
422   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
423   IS             isrow = b->row,isicol = b->icol;
424   PetscErrorCode ierr;
425   PetscInt       *r,*ic,i,j,n=A->m,*bi=b->i,*bj=b->j;
426   PetscInt       *ajtmp,*bjtmp,nz,row,*ics;
427   PetscInt       *diag_offset = b->diag,diag,*pj;
428   PetscScalar    *rtmp,*v,*pc,multiplier,*pv,*rtmps;
429   PetscReal      zeropivot,rs,d,shift_nz;
430   PetscReal      row_shift,shift_top=0.;
431   PetscTruth     shift_pd;
432   LUShift_Ctx    sctx;
433   PetscInt       newshift;
434 
435   PetscFunctionBegin;
436   shift_nz  = info->shiftnz;
437   shift_pd  = info->shiftpd;
438   zeropivot = info->zeropivot;
439 
440   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
441   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
442   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
443   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
444   rtmps = rtmp; ics = ic;
445 
446   if (!a->diag) {
447     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
448   }
449   /* if both shift schemes are chosen by user, only use shift_pd */
450   if (shift_pd && shift_nz) shift_nz = 0.0;
451   if (shift_pd) { /* set shift_top=max{row_shift} */
452     PetscInt *aai = a->i,*ddiag = a->diag;
453     shift_top = 0;
454     for (i=0; i<n; i++) {
455       d = PetscAbsScalar((a->a)[ddiag[i]]);
456       /* calculate amt of shift needed for this row */
457       if (d<=0) {
458         row_shift = 0;
459       } else {
460         row_shift = -2*d;
461       }
462       v  = a->a+aai[i];
463       nz = aai[i+1] - aai[i];
464       for (j=0; j<nz; j++)
465 	row_shift += PetscAbsScalar(v[j]);
466       if (row_shift>shift_top) shift_top = row_shift;
467     }
468   }
469 
470   sctx.shift_amount = 0;
471   sctx.shift_top    = shift_top;
472   sctx.nshift       = 0;
473   sctx.nshift_max   = 5;
474   sctx.shift_lo     = 0.;
475   sctx.shift_hi     = 1.;
476   do {
477     sctx.lushift = PETSC_FALSE;
478     for (i=0; i<n; i++){
479       nz    = bi[i+1] - bi[i];
480       bjtmp = bj + bi[i];
481       for  (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0;
482 
483       /* load in initial (unfactored row) */
484       nz    = a->i[r[i]+1] - a->i[r[i]];
485       ajtmp = a->j + a->i[r[i]];
486       v     = a->a + a->i[r[i]];
487       for (j=0; j<nz; j++) {
488         rtmp[ics[ajtmp[j]]] = v[j];
489       }
490       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
491 
492       row = *bjtmp++;
493       while  (row < i) {
494         pc = rtmp + row;
495         if (*pc != 0.0) {
496           pv         = b->a + diag_offset[row];
497           pj         = b->j + diag_offset[row] + 1;
498           multiplier = *pc / *pv++;
499           *pc        = multiplier;
500           nz         = bi[row+1] - diag_offset[row] - 1;
501           for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
502           PetscLogFlops(2*nz);
503         }
504         row = *bjtmp++;
505       }
506       /* finished row so stick it into b->a */
507       pv   = b->a + bi[i] ;
508       pj   = b->j + bi[i] ;
509       nz   = bi[i+1] - bi[i];
510       diag = diag_offset[i] - bi[i];
511       rs   = 0.0;
512       for (j=0; j<nz; j++) {
513         pv[j] = rtmps[pj[j]];
514         if (j != diag) rs += PetscAbsScalar(pv[j]);
515       }
516 
517       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
518       sctx.rs  = rs;
519       sctx.pv  = pv[diag];
520       ierr = Mat_LUCheckShift(info,&sctx,&newshift);CHKERRQ(ierr);
521       if (newshift == 1){
522         break;    /* sctx.shift_amount is updated */
523       } else if (newshift == -1){
524         SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs);
525       }
526     }
527 
528     if (shift_pd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
529       /*
530        * if no shift in this attempt & shifting & started shifting & can refine,
531        * then try lower shift
532        */
533       sctx.shift_hi       = info->shift_fraction;
534       info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
535       sctx.shift_amount   = info->shift_fraction * sctx.shift_top;
536       sctx.lushift        = PETSC_TRUE;
537       sctx.nshift++;
538     }
539   } while (sctx.lushift);
540 
541   /* invert diagonal entries for simplier triangular solves */
542   for (i=0; i<n; i++) {
543     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
544   }
545 
546   ierr = PetscFree(rtmp);CHKERRQ(ierr);
547   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
548   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
549   C->factor = FACTOR_LU;
550   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
551   C->assembled = PETSC_TRUE;
552   PetscLogFlops(C->n);
553   if (sctx.nshift){
554     if (shift_nz) {
555       PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount);
556     } else if (shift_pd) {
557       b->lu_shift_fraction = info->shift_fraction;
558       PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: diagonal shifted up by %e fraction top_value %e number shifts %D\n",info->shift_fraction,shift_top,sctx.nshift);
559     }
560   }
561   PetscFunctionReturn(0);
562 }
563 
564 #undef __FUNCT__
565 #define __FUNCT__ "MatUsePETSc_SeqAIJ"
566 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A)
567 {
568   PetscFunctionBegin;
569   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
570   A->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
571   PetscFunctionReturn(0);
572 }
573 
574 
575 /* ----------------------------------------------------------- */
576 #undef __FUNCT__
577 #define __FUNCT__ "MatLUFactor_SeqAIJ"
578 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
579 {
580   PetscErrorCode ierr;
581   Mat            C;
582 
583   PetscFunctionBegin;
584   ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr);
585   ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr);
586   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
587   ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr);
588   PetscFunctionReturn(0);
589 }
590 /* ----------------------------------------------------------- */
591 #undef __FUNCT__
592 #define __FUNCT__ "MatSolve_SeqAIJ"
593 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
594 {
595   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
596   IS             iscol = a->col,isrow = a->row;
597   PetscErrorCode ierr;
598   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
599   PetscInt       nz,*rout,*cout;
600   PetscScalar    *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
601 
602   PetscFunctionBegin;
603   if (!n) PetscFunctionReturn(0);
604 
605   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
606   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
607   tmp  = a->solve_work;
608 
609   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
610   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
611 
612   /* forward solve the lower triangular */
613   tmp[0] = b[*r++];
614   tmps   = tmp;
615   for (i=1; i<n; i++) {
616     v   = aa + ai[i] ;
617     vi  = aj + ai[i] ;
618     nz  = a->diag[i] - ai[i];
619     sum = b[*r++];
620     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
621     tmp[i] = sum;
622   }
623 
624   /* backward solve the upper triangular */
625   for (i=n-1; i>=0; i--){
626     v   = aa + a->diag[i] + 1;
627     vi  = aj + a->diag[i] + 1;
628     nz  = ai[i+1] - a->diag[i] - 1;
629     sum = tmp[i];
630     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
631     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
632   }
633 
634   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
635   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
636   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
637   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
638   PetscLogFlops(2*a->nz - A->n);
639   PetscFunctionReturn(0);
640 }
641 
642 /* ----------------------------------------------------------- */
643 #undef __FUNCT__
644 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
645 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
646 {
647   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
648   PetscErrorCode ierr;
649   PetscInt       n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag;
650   PetscScalar    *x,*b,*aa = a->a;
651 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
652   PetscInt       adiag_i,i,*vi,nz,ai_i;
653   PetscScalar    *v,sum;
654 #endif
655 
656   PetscFunctionBegin;
657   if (!n) PetscFunctionReturn(0);
658 
659   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
660   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
661 
662 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
663   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
664 #else
665   /* forward solve the lower triangular */
666   x[0] = b[0];
667   for (i=1; i<n; i++) {
668     ai_i = ai[i];
669     v    = aa + ai_i;
670     vi   = aj + ai_i;
671     nz   = adiag[i] - ai_i;
672     sum  = b[i];
673     while (nz--) sum -= *v++ * x[*vi++];
674     x[i] = sum;
675   }
676 
677   /* backward solve the upper triangular */
678   for (i=n-1; i>=0; i--){
679     adiag_i = adiag[i];
680     v       = aa + adiag_i + 1;
681     vi      = aj + adiag_i + 1;
682     nz      = ai[i+1] - adiag_i - 1;
683     sum     = x[i];
684     while (nz--) sum -= *v++ * x[*vi++];
685     x[i]    = sum*aa[adiag_i];
686   }
687 #endif
688   PetscLogFlops(2*a->nz - A->n);
689   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
690   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
691   PetscFunctionReturn(0);
692 }
693 
694 #undef __FUNCT__
695 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
696 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
697 {
698   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
699   IS             iscol = a->col,isrow = a->row;
700   PetscErrorCode ierr;
701   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
702   PetscInt       nz,*rout,*cout;
703   PetscScalar    *x,*b,*tmp,*aa = a->a,sum,*v;
704 
705   PetscFunctionBegin;
706   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
707 
708   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
709   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
710   tmp  = a->solve_work;
711 
712   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
713   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
714 
715   /* forward solve the lower triangular */
716   tmp[0] = b[*r++];
717   for (i=1; i<n; i++) {
718     v   = aa + ai[i] ;
719     vi  = aj + ai[i] ;
720     nz  = a->diag[i] - ai[i];
721     sum = b[*r++];
722     while (nz--) sum -= *v++ * tmp[*vi++ ];
723     tmp[i] = sum;
724   }
725 
726   /* backward solve the upper triangular */
727   for (i=n-1; i>=0; i--){
728     v   = aa + a->diag[i] + 1;
729     vi  = aj + a->diag[i] + 1;
730     nz  = ai[i+1] - a->diag[i] - 1;
731     sum = tmp[i];
732     while (nz--) sum -= *v++ * tmp[*vi++ ];
733     tmp[i] = sum*aa[a->diag[i]];
734     x[*c--] += tmp[i];
735   }
736 
737   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
738   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
739   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
740   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
741   PetscLogFlops(2*a->nz);
742 
743   PetscFunctionReturn(0);
744 }
745 /* -------------------------------------------------------------------*/
746 #undef __FUNCT__
747 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
748 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
749 {
750   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
751   IS             iscol = a->col,isrow = a->row;
752   PetscErrorCode ierr;
753   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
754   PetscInt       nz,*rout,*cout,*diag = a->diag;
755   PetscScalar    *x,*b,*tmp,*aa = a->a,*v,s1;
756 
757   PetscFunctionBegin;
758   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
759   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
760   tmp  = a->solve_work;
761 
762   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
763   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
764 
765   /* copy the b into temp work space according to permutation */
766   for (i=0; i<n; i++) tmp[i] = b[c[i]];
767 
768   /* forward solve the U^T */
769   for (i=0; i<n; i++) {
770     v   = aa + diag[i] ;
771     vi  = aj + diag[i] + 1;
772     nz  = ai[i+1] - diag[i] - 1;
773     s1  = tmp[i];
774     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
775     while (nz--) {
776       tmp[*vi++ ] -= (*v++)*s1;
777     }
778     tmp[i] = s1;
779   }
780 
781   /* backward solve the L^T */
782   for (i=n-1; i>=0; i--){
783     v   = aa + diag[i] - 1 ;
784     vi  = aj + diag[i] - 1 ;
785     nz  = diag[i] - ai[i];
786     s1  = tmp[i];
787     while (nz--) {
788       tmp[*vi-- ] -= (*v--)*s1;
789     }
790   }
791 
792   /* copy tmp into x according to permutation */
793   for (i=0; i<n; i++) x[r[i]] = tmp[i];
794 
795   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
796   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
797   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
798   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
799 
800   PetscLogFlops(2*a->nz-A->n);
801   PetscFunctionReturn(0);
802 }
803 
804 #undef __FUNCT__
805 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
806 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
807 {
808   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
809   IS             iscol = a->col,isrow = a->row;
810   PetscErrorCode ierr;
811   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
812   PetscInt       nz,*rout,*cout,*diag = a->diag;
813   PetscScalar    *x,*b,*tmp,*aa = a->a,*v;
814 
815   PetscFunctionBegin;
816   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
817 
818   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
819   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
820   tmp = a->solve_work;
821 
822   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
823   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
824 
825   /* copy the b into temp work space according to permutation */
826   for (i=0; i<n; i++) tmp[i] = b[c[i]];
827 
828   /* forward solve the U^T */
829   for (i=0; i<n; i++) {
830     v   = aa + diag[i] ;
831     vi  = aj + diag[i] + 1;
832     nz  = ai[i+1] - diag[i] - 1;
833     tmp[i] *= *v++;
834     while (nz--) {
835       tmp[*vi++ ] -= (*v++)*tmp[i];
836     }
837   }
838 
839   /* backward solve the L^T */
840   for (i=n-1; i>=0; i--){
841     v   = aa + diag[i] - 1 ;
842     vi  = aj + diag[i] - 1 ;
843     nz  = diag[i] - ai[i];
844     while (nz--) {
845       tmp[*vi-- ] -= (*v--)*tmp[i];
846     }
847   }
848 
849   /* copy tmp into x according to permutation */
850   for (i=0; i<n; i++) x[r[i]] += tmp[i];
851 
852   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
853   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
854   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
855   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
856 
857   PetscLogFlops(2*a->nz);
858   PetscFunctionReturn(0);
859 }
860 /* ----------------------------------------------------------------*/
861 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat);
862 
863 #undef __FUNCT__
864 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
865 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
866 {
867   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
868   IS             isicol;
869   PetscErrorCode ierr;
870   PetscInt       *r,*ic,n=A->m,*ai=a->i,*aj=a->j;
871   PetscInt       *bi,*cols,nnz,*cols_lvl;
872   PetscInt       *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0;
873   PetscInt       i,levels,diagonal_fill;
874   PetscTruth     col_identity,row_identity;
875   PetscReal      f;
876   PetscInt       nlnk,*lnk,*lnk_lvl=PETSC_NULL;
877   PetscBT        lnkbt;
878   PetscInt       nzi,*bj,**bj_ptr,**bjlvl_ptr;
879   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
880   FreeSpaceList  free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
881 
882   PetscFunctionBegin;
883   f             = info->fill;
884   levels        = (PetscInt)info->levels;
885   diagonal_fill = (PetscInt)info->diagonal_fill;
886   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
887 
888   /* special case that simply copies fill pattern */
889   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
890   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
891   if (!levels && row_identity && col_identity) {
892     ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
893     (*fact)->factor = FACTOR_LU;
894     b               = (Mat_SeqAIJ*)(*fact)->data;
895     if (!b->diag) {
896       ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
897     }
898     ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
899     b->row              = isrow;
900     b->col              = iscol;
901     b->icol             = isicol;
902     ierr                = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
903     (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
904     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
905     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
906     PetscFunctionReturn(0);
907   }
908 
909   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
910   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
911 
912   /* get new row pointers */
913   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
914   bi[0] = 0;
915   /* bdiag is location of diagonal in factor */
916   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
917   bdiag[0]  = 0;
918 
919   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr);
920   bjlvl_ptr = (PetscInt**)(bj_ptr + n);
921 
922   /* create a linked list for storing column indices of the active row */
923   nlnk = n + 1;
924   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
925 
926   /* initial FreeSpace size is f*(ai[n]+1) */
927   ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
928   current_space = free_space;
929   ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
930   current_space_lvl = free_space_lvl;
931 
932   for (i=0; i<n; i++) {
933     nzi = 0;
934     /* copy current row into linked list */
935     nnz  = ai[r[i]+1] - ai[r[i]];
936     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
937     cols = aj + ai[r[i]];
938     lnk[i] = -1; /* marker to indicate if diagonal exists */
939     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
940     nzi += nlnk;
941 
942     /* make sure diagonal entry is included */
943     if (diagonal_fill && lnk[i] == -1) {
944       fm = n;
945       while (lnk[fm] < i) fm = lnk[fm];
946       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
947       lnk[fm]    = i;
948       lnk_lvl[i] = 0;
949       nzi++; dcount++;
950     }
951 
952     /* add pivot rows into the active row */
953     nzbd = 0;
954     prow = lnk[n];
955     while (prow < i) {
956       nnz      = bdiag[prow];
957       cols     = bj_ptr[prow] + nnz + 1;
958       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
959       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
960       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
961       nzi += nlnk;
962       prow = lnk[prow];
963       nzbd++;
964     }
965     bdiag[i] = nzbd;
966     bi[i+1]  = bi[i] + nzi;
967 
968     /* if free space is not available, make more free space */
969     if (current_space->local_remaining<nzi) {
970       nnz = nzi*(n - i); /* estimated and max additional space needed */
971       ierr = GetMoreSpace(nnz,&current_space);CHKERRQ(ierr);
972       ierr = GetMoreSpace(nnz,&current_space_lvl);CHKERRQ(ierr);
973       reallocs++;
974     }
975 
976     /* copy data into free_space and free_space_lvl, then initialize lnk */
977     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
978     bj_ptr[i]    = current_space->array;
979     bjlvl_ptr[i] = current_space_lvl->array;
980 
981     /* make sure the active row i has diagonal entry */
982     if (*(bj_ptr[i]+bdiag[i]) != i) {
983       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
984     try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",i);
985     }
986 
987     current_space->array           += nzi;
988     current_space->local_used      += nzi;
989     current_space->local_remaining -= nzi;
990     current_space_lvl->array           += nzi;
991     current_space_lvl->local_used      += nzi;
992     current_space_lvl->local_remaining -= nzi;
993   }
994 
995   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
996   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
997 
998   /* destroy list of free space and other temporary arrays */
999   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
1000   ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
1001   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1002   ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr);
1003   ierr = PetscFree(bj_ptr);CHKERRQ(ierr);
1004 
1005   {
1006     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1007     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
1008     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af);
1009     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af);
1010     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n");
1011     if (diagonal_fill) {
1012       PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount);
1013     }
1014   }
1015 
1016   /* put together the new matrix */
1017   ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr);
1018   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
1019   ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr);
1020   ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr);
1021   b = (Mat_SeqAIJ*)(*fact)->data;
1022   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1023   b->singlemalloc = PETSC_FALSE;
1024   len = (bi[n] )*sizeof(PetscScalar);
1025   /* the next line frees the default space generated by the Create() */
1026   ierr = PetscFree(b->a);CHKERRQ(ierr);
1027   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1028   ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr);
1029   b->j          = bj;
1030   b->i          = bi;
1031   for (i=0; i<n; i++) bdiag[i] += bi[i];
1032   b->diag       = bdiag;
1033   b->ilen       = 0;
1034   b->imax       = 0;
1035   b->row        = isrow;
1036   b->col        = iscol;
1037   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1038   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1039   b->icol       = isicol;
1040   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1041   /* In b structure:  Free imax, ilen, old a, old j.
1042      Allocate bdiag, solve_work, new a, new j */
1043   ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
1044   b->maxnz             = b->nz = bi[n] ;
1045   (*fact)->factor = FACTOR_LU;
1046   (*fact)->info.factor_mallocs    = reallocs;
1047   (*fact)->info.fill_ratio_given  = f;
1048   (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1049 
1050   /* Call parent MatILUFactorSymbolic here */
1051 
1052   (*fact)->ops->lufactornumeric =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
1053   PetscFunctionReturn(0);
1054 }
1055 
1056 #include "src/mat/impls/sbaij/seq/sbaij.h"
1057 #undef __FUNCT__
1058 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
1059 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
1060 {
1061   Mat            C = *B;
1062   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1063   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
1064   IS             ip=b->row;
1065   PetscErrorCode ierr;
1066   PetscInt       *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol;
1067   PetscInt       *ai=a->i,*aj=a->j;
1068   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1069   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1070   PetscReal      zeropivot,rs,shiftnz;
1071   PetscTruth     shiftpd;
1072   ChShift_Ctx    sctx;
1073   PetscInt       newshift;
1074 
1075   PetscFunctionBegin;
1076   shiftnz   = info->shiftnz;
1077   shiftpd   = info->shiftpd;
1078   zeropivot = info->zeropivot;
1079 
1080   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
1081 
1082   /* initialization */
1083   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1084   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
1085   jl   = il + mbs;
1086   rtmp = (MatScalar*)(jl + mbs);
1087 
1088   sctx.shift_amount = 0;
1089   sctx.nshift       = 0;
1090   do {
1091     sctx.chshift = PETSC_FALSE;
1092     for (i=0; i<mbs; i++) {
1093       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1094     }
1095 
1096     for (k = 0; k<mbs; k++){
1097       bval = ba + bi[k];
1098       /* initialize k-th row by the perm[k]-th row of A */
1099       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1100       for (j = jmin; j < jmax; j++){
1101         col = rip[aj[j]];
1102         if (col >= k){ /* only take upper triangular entry */
1103           rtmp[col] = aa[j];
1104           *bval++  = 0.0; /* for in-place factorization */
1105         }
1106       }
1107       /* shift the diagonal of the matrix */
1108       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1109 
1110       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1111       dk = rtmp[k];
1112       i = jl[k]; /* first row to be added to k_th row  */
1113 
1114       while (i < k){
1115         nexti = jl[i]; /* next row to be added to k_th row */
1116 
1117         /* compute multiplier, update diag(k) and U(i,k) */
1118         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1119         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
1120         dk += uikdi*ba[ili];
1121         ba[ili] = uikdi; /* -U(i,k) */
1122 
1123         /* add multiple of row i to k-th row */
1124         jmin = ili + 1; jmax = bi[i+1];
1125         if (jmin < jmax){
1126           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1127           /* update il and jl for row i */
1128           il[i] = jmin;
1129           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1130         }
1131         i = nexti;
1132       }
1133 
1134       /* shift the diagonals when zero pivot is detected */
1135       /* compute rs=sum of abs(off-diagonal) */
1136       rs   = 0.0;
1137       jmin = bi[k]+1;
1138       nz   = bi[k+1] - jmin;
1139       if (nz){
1140         bcol = bj + jmin;
1141         while (nz--){
1142           rs += PetscAbsScalar(rtmp[*bcol]);
1143           bcol++;
1144         }
1145       }
1146 
1147       sctx.rs = rs;
1148       sctx.pv = dk;
1149       ierr = Mat_CholeskyCheckShift(info,&sctx,&newshift);CHKERRQ(ierr);
1150       if (newshift == 1){
1151         break;    /* sctx.shift_amount is updated */
1152       } else if (newshift == -1){
1153         SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs);
1154       }
1155 
1156       /* copy data into U(k,:) */
1157       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1158       jmin = bi[k]+1; jmax = bi[k+1];
1159       if (jmin < jmax) {
1160         for (j=jmin; j<jmax; j++){
1161           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1162         }
1163         /* add the k-th row into il and jl */
1164         il[k] = jmin;
1165         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1166       }
1167     }
1168   } while (sctx.chshift);
1169   ierr = PetscFree(il);CHKERRQ(ierr);
1170 
1171   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
1172   C->factor       = FACTOR_CHOLESKY;
1173   C->assembled    = PETSC_TRUE;
1174   C->preallocated = PETSC_TRUE;
1175   PetscLogFlops(C->m);
1176   if (sctx.nshift){
1177     if (shiftnz) {
1178       PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount);
1179     } else if (shiftpd) {
1180       PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount);
1181     }
1182   }
1183   PetscFunctionReturn(0);
1184 }
1185 
1186 #undef __FUNCT__
1187 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
1188 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1189 {
1190   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1191   Mat_SeqSBAIJ   *b;
1192   Mat            B;
1193   PetscErrorCode ierr;
1194   PetscTruth     perm_identity;
1195   PetscInt       reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui;
1196   PetscInt       jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1197   PetscInt       nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1198   PetscInt       ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
1199   PetscReal      fill=info->fill,levels=info->levels;
1200   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1201   FreeSpaceList  free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1202   PetscBT        lnkbt;
1203 
1204   PetscFunctionBegin;
1205   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1206   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1207 
1208   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1209   ui[0] = 0;
1210 
1211   /* special case that simply copies fill pattern */
1212   if (!levels && perm_identity) {
1213     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
1214     for (i=0; i<am; i++) {
1215       ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1216     }
1217     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1218     cols = uj;
1219     for (i=0; i<am; i++) {
1220       aj    = a->j + a->diag[i];
1221       ncols = ui[i+1] - ui[i];
1222       for (j=0; j<ncols; j++) *cols++ = *aj++;
1223     }
1224   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1225     /* initialization */
1226     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
1227 
1228     /* jl: linked list for storing indices of the pivot rows
1229        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1230     ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1231     il         = jl + am;
1232     uj_ptr     = (PetscInt**)(il + am);
1233     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1234     for (i=0; i<am; i++){
1235       jl[i] = am; il[i] = 0;
1236     }
1237 
1238     /* create and initialize a linked list for storing column indices of the active row k */
1239     nlnk = am + 1;
1240     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1241 
1242     /* initial FreeSpace size is fill*(ai[am]+1) */
1243     ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1244     current_space = free_space;
1245     ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
1246     current_space_lvl = free_space_lvl;
1247 
1248     for (k=0; k<am; k++){  /* for each active row k */
1249       /* initialize lnk by the column indices of row rip[k] of A */
1250       nzk   = 0;
1251       ncols = ai[rip[k]+1] - ai[rip[k]];
1252       ncols_upper = 0;
1253       for (j=0; j<ncols; j++){
1254         i = *(aj + ai[rip[k]] + j);
1255         if (rip[i] >= k){ /* only take upper triangular entry */
1256           ajtmp[ncols_upper] = i;
1257           ncols_upper++;
1258         }
1259       }
1260       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1261       nzk += nlnk;
1262 
1263       /* update lnk by computing fill-in for each pivot row to be merged in */
1264       prow = jl[k]; /* 1st pivot row */
1265 
1266       while (prow < k){
1267         nextprow = jl[prow];
1268 
1269         /* merge prow into k-th row */
1270         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1271         jmax = ui[prow+1];
1272         ncols = jmax-jmin;
1273         i     = jmin - ui[prow];
1274         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1275         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
1276         j     = *(uj - 1);
1277         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
1278         nzk += nlnk;
1279 
1280         /* update il and jl for prow */
1281         if (jmin < jmax){
1282           il[prow] = jmin;
1283           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1284         }
1285         prow = nextprow;
1286       }
1287 
1288       /* if free space is not available, make more free space */
1289       if (current_space->local_remaining<nzk) {
1290         i = am - k + 1; /* num of unfactored rows */
1291         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1292         ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
1293         ierr = GetMoreSpace(i,&current_space_lvl);CHKERRQ(ierr);
1294         reallocs++;
1295       }
1296 
1297       /* copy data into free_space and free_space_lvl, then initialize lnk */
1298       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1299 
1300       /* add the k-th row into il and jl */
1301       if (nzk > 1){
1302         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1303         jl[k] = jl[i]; jl[i] = k;
1304         il[k] = ui[k] + 1;
1305       }
1306       uj_ptr[k]     = current_space->array;
1307       uj_lvl_ptr[k] = current_space_lvl->array;
1308 
1309       current_space->array           += nzk;
1310       current_space->local_used      += nzk;
1311       current_space->local_remaining -= nzk;
1312 
1313       current_space_lvl->array           += nzk;
1314       current_space_lvl->local_used      += nzk;
1315       current_space_lvl->local_remaining -= nzk;
1316 
1317       ui[k+1] = ui[k] + nzk;
1318     }
1319 
1320     if (ai[am] != 0) {
1321       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1322       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af);
1323       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af);
1324       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af);
1325     } else {
1326       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n");
1327     }
1328 
1329     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1330     ierr = PetscFree(jl);CHKERRQ(ierr);
1331     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
1332 
1333     /* destroy list of free space and other temporary array(s) */
1334     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1335     ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1336     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1337     ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr);
1338 
1339   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
1340 
1341   /* put together the new matrix in MATSEQSBAIJ format */
1342   ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr);
1343   B = *fact;
1344   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1345   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
1346 
1347   b    = (Mat_SeqSBAIJ*)B->data;
1348   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1349   b->singlemalloc = PETSC_FALSE;
1350   /* the next line frees the default space generated by the Create() */
1351   ierr = PetscFree(b->a);CHKERRQ(ierr);
1352   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1353   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1354   b->j    = uj;
1355   b->i    = ui;
1356   b->diag = 0;
1357   b->ilen = 0;
1358   b->imax = 0;
1359   b->row  = perm;
1360   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1361   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1362   b->icol = perm;
1363   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1364   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1365   ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1366   b->maxnz = b->nz = ui[am];
1367 
1368   B->factor                 = FACTOR_CHOLESKY;
1369   B->info.factor_mallocs    = reallocs;
1370   B->info.fill_ratio_given  = fill;
1371   if (ai[am] != 0) {
1372     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1373   } else {
1374     B->info.fill_ratio_needed = 0.0;
1375   }
1376   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1377   if (perm_identity){
1378     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1379     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1380   }
1381   PetscFunctionReturn(0);
1382 }
1383 
1384 #undef __FUNCT__
1385 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
1386 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1387 {
1388   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1389   Mat_SeqSBAIJ   *b;
1390   Mat            B;
1391   PetscErrorCode ierr;
1392   PetscTruth     perm_identity;
1393   PetscReal      fill = info->fill;
1394   PetscInt       *rip,*riip,i,am=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow;
1395   PetscInt       *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1396   PetscInt       nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1397   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1398   PetscBT        lnkbt;
1399   IS             iperm;
1400 
1401   PetscFunctionBegin;
1402   /* check whether perm is the identity mapping */
1403   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1404   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1405 
1406   if (!perm_identity){
1407     /* check if perm is symmetric! */
1408     ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1409     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1410     for (i=0; i<am; i++) {
1411       if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation");
1412     }
1413     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1414     ierr = ISDestroy(iperm);CHKERRQ(ierr);
1415   }
1416 
1417   /* initialization */
1418   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1419   ui[0] = 0;
1420 
1421   /* jl: linked list for storing indices of the pivot rows
1422      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1423   ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1424   il     = jl + am;
1425   cols   = il + am;
1426   ui_ptr = (PetscInt**)(cols + am);
1427   for (i=0; i<am; i++){
1428     jl[i] = am; il[i] = 0;
1429   }
1430 
1431   /* create and initialize a linked list for storing column indices of the active row k */
1432   nlnk = am + 1;
1433   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1434 
1435   /* initial FreeSpace size is fill*(ai[am]+1) */
1436   ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1437   current_space = free_space;
1438 
1439   for (k=0; k<am; k++){  /* for each active row k */
1440     /* initialize lnk by the column indices of row rip[k] of A */
1441     nzk   = 0;
1442     ncols = ai[rip[k]+1] - ai[rip[k]];
1443     ncols_upper = 0;
1444     for (j=0; j<ncols; j++){
1445       i = rip[*(aj + ai[rip[k]] + j)];
1446       if (i >= k){ /* only take upper triangular entry */
1447         cols[ncols_upper] = i;
1448         ncols_upper++;
1449       }
1450     }
1451     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1452     nzk += nlnk;
1453 
1454     /* update lnk by computing fill-in for each pivot row to be merged in */
1455     prow = jl[k]; /* 1st pivot row */
1456 
1457     while (prow < k){
1458       nextprow = jl[prow];
1459       /* merge prow into k-th row */
1460       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1461       jmax = ui[prow+1];
1462       ncols = jmax-jmin;
1463       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1464       ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1465       nzk += nlnk;
1466 
1467       /* update il and jl for prow */
1468       if (jmin < jmax){
1469         il[prow] = jmin;
1470         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1471       }
1472       prow = nextprow;
1473     }
1474 
1475     /* if free space is not available, make more free space */
1476     if (current_space->local_remaining<nzk) {
1477       i = am - k + 1; /* num of unfactored rows */
1478       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1479       ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
1480       reallocs++;
1481     }
1482 
1483     /* copy data into free space, then initialize lnk */
1484     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1485 
1486     /* add the k-th row into il and jl */
1487     if (nzk-1 > 0){
1488       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1489       jl[k] = jl[i]; jl[i] = k;
1490       il[k] = ui[k] + 1;
1491     }
1492     ui_ptr[k] = current_space->array;
1493     current_space->array           += nzk;
1494     current_space->local_used      += nzk;
1495     current_space->local_remaining -= nzk;
1496 
1497     ui[k+1] = ui[k] + nzk;
1498   }
1499 
1500   if (ai[am] != 0) {
1501     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
1502     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af);
1503     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af);
1504     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af);
1505   } else {
1506      PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n");
1507   }
1508 
1509   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1510   ierr = PetscFree(jl);CHKERRQ(ierr);
1511 
1512   /* destroy list of free space and other temporary array(s) */
1513   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1514   ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1515   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1516 
1517   /* put together the new matrix in MATSEQSBAIJ format */
1518   ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr);
1519   B    = *fact;
1520   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1521   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
1522 
1523   b = (Mat_SeqSBAIJ*)B->data;
1524   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1525   b->singlemalloc = PETSC_FALSE;
1526   /* the next line frees the default space generated by the Create() */
1527   ierr = PetscFree(b->a);CHKERRQ(ierr);
1528   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1529   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1530   b->j    = uj;
1531   b->i    = ui;
1532   b->diag = 0;
1533   b->ilen = 0;
1534   b->imax = 0;
1535   b->row  = perm;
1536   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1537   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1538   b->icol = perm;
1539   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1540   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1541   ierr    = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1542   b->maxnz = b->nz = ui[am];
1543 
1544   B->factor                 = FACTOR_CHOLESKY;
1545   B->info.factor_mallocs    = reallocs;
1546   B->info.fill_ratio_given  = fill;
1547   if (ai[am] != 0) {
1548     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1549   } else {
1550     B->info.fill_ratio_needed = 0.0;
1551   }
1552   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1553   if (perm_identity){
1554     (*fact)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1555     (*fact)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1556   }
1557   PetscFunctionReturn(0);
1558 }
1559