xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision afcc9904c424d6147a82cc16ffa6c57590ac8da1)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/seq/aij.h"
4 #include "src/inline/dot.h"
5 #include "src/inline/spops.h"
6 #include "petscbt.h"
7 #include "src/mat/utils/freespace.h"
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ"
11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
12 {
13   PetscFunctionBegin;
14 
15   SETERRQ(PETSC_ERR_SUP,"Code not written");
16 #if !defined(PETSC_USE_DEBUG)
17   PetscFunctionReturn(0);
18 #endif
19 }
20 
21 
22 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat);
23 
24 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
25 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
26 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
27 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*);
28 #endif
29 
30 #undef __FUNCT__
31 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
32   /* ------------------------------------------------------------
33 
34           This interface was contribed by Tony Caola
35 
36      This routine is an interface to the pivoting drop-tolerance
37      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
38      SPARSEKIT2.
39 
40      The SPARSEKIT2 routines used here are covered by the GNU
41      copyright; see the file gnu in this directory.
42 
43      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
44      help in getting this routine ironed out.
45 
46      The major drawback to this routine is that if info->fill is
47      not large enough it fails rather than allocating more space;
48      this can be fixed by hacking/improving the f2c version of
49      Yousef Saad's code.
50 
51      ------------------------------------------------------------
52 */
53 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
54 {
55 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
56   PetscFunctionBegin;
57   SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
58   You can obtain the drop tolerance routines by installing PETSc from\n\
59   www.mcs.anl.gov/petsc\n");
60 #else
61   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
62   IS             iscolf,isicol,isirow;
63   PetscTruth     reorder;
64   PetscErrorCode ierr,sierr;
65   PetscInt       *c,*r,*ic,i,n = A->m;
66   PetscInt       *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
67   PetscInt       *ordcol,*iwk,*iperm,*jw;
68   PetscInt       jmax,lfill,job,*o_i,*o_j;
69   PetscScalar    *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
70   PetscReal      af;
71 
72   PetscFunctionBegin;
73 
74   if (info->dt == PETSC_DEFAULT)      info->dt      = .005;
75   if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax);
76   if (info->dtcol == PETSC_DEFAULT)   info->dtcol   = .01;
77   if (info->fill == PETSC_DEFAULT)    info->fill    = ((double)(n*(info->dtcount+1)))/a->nz;
78   lfill   = (PetscInt)(info->dtcount/2.0);
79   jmax    = (PetscInt)(info->fill*a->nz);
80 
81 
82   /* ------------------------------------------------------------
83      If reorder=.TRUE., then the original matrix has to be
84      reordered to reflect the user selected ordering scheme, and
85      then de-reordered so it is in it's original format.
86      Because Saad's dperm() is NOT in place, we have to copy
87      the original matrix and allocate more storage. . .
88      ------------------------------------------------------------
89   */
90 
91   /* set reorder to true if either isrow or iscol is not identity */
92   ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr);
93   if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);}
94   reorder = PetscNot(reorder);
95 
96 
97   /* storage for ilu factor */
98   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr);
99   ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr);
100   ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr);
101   ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr);
102 
103   /* ------------------------------------------------------------
104      Make sure that everything is Fortran formatted (1-Based)
105      ------------------------------------------------------------
106   */
107   for (i=old_i[0];i<old_i[n];i++) {
108     old_j[i]++;
109   }
110   for(i=0;i<n+1;i++) {
111     old_i[i]++;
112   };
113 
114 
115   if (reorder) {
116     ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr);
117     ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
118     for(i=0;i<n;i++) {
119       r[i]  = r[i]+1;
120       c[i]  = c[i]+1;
121     }
122     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr);
123     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr);
124     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr);
125     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
126     for (i=0;i<n;i++) {
127       r[i]  = r[i]-1;
128       c[i]  = c[i]-1;
129     }
130     ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr);
131     ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
132     o_a = old_a2;
133     o_j = old_j2;
134     o_i = old_i2;
135   } else {
136     o_a = old_a;
137     o_j = old_j;
138     o_i = old_i;
139   }
140 
141   /* ------------------------------------------------------------
142      Call Saad's ilutp() routine to generate the factorization
143      ------------------------------------------------------------
144   */
145 
146   ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr);
147   ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr);
148   ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr);
149 
150   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
151   if (sierr) {
152     switch (sierr) {
153       case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
154       case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
155       case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
156       case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
157       case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax);
158       default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
159     }
160   }
161 
162   ierr = PetscFree(w);CHKERRQ(ierr);
163   ierr = PetscFree(jw);CHKERRQ(ierr);
164 
165   /* ------------------------------------------------------------
166      Saad's routine gives the result in Modified Sparse Row (msr)
167      Convert to Compressed Sparse Row format (csr)
168      ------------------------------------------------------------
169   */
170 
171   ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr);
172   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr);
173 
174   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
175 
176   ierr = PetscFree(iwk);CHKERRQ(ierr);
177   ierr = PetscFree(wk);CHKERRQ(ierr);
178 
179   if (reorder) {
180     ierr = PetscFree(old_a2);CHKERRQ(ierr);
181     ierr = PetscFree(old_j2);CHKERRQ(ierr);
182     ierr = PetscFree(old_i2);CHKERRQ(ierr);
183   } else {
184     /* fix permutation of old_j that the factorization introduced */
185     for (i=old_i[0]; i<old_i[n]; i++) {
186       old_j[i-1] = iperm[old_j[i-1]-1];
187     }
188   }
189 
190   /* get rid of the shift to indices starting at 1 */
191   for (i=0; i<n+1; i++) {
192     old_i[i]--;
193   }
194   for (i=old_i[0];i<old_i[n];i++) {
195     old_j[i]--;
196   }
197 
198   /* Make the factored matrix 0-based */
199   for (i=0; i<n+1; i++) {
200     new_i[i]--;
201   }
202   for (i=new_i[0];i<new_i[n];i++) {
203     new_j[i]--;
204   }
205 
206   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
207   /*-- permute the right-hand-side and solution vectors           --*/
208   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
209   ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr);
210   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
211   for(i=0; i<n; i++) {
212     ordcol[i] = ic[iperm[i]-1];
213   };
214   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
215   ierr = ISDestroy(isicol);CHKERRQ(ierr);
216 
217   ierr = PetscFree(iperm);CHKERRQ(ierr);
218 
219   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr);
220   ierr = PetscFree(ordcol);CHKERRQ(ierr);
221 
222   /*----- put together the new matrix -----*/
223 
224   ierr = MatCreate(A->comm,fact);CHKERRQ(ierr);
225   ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr);
226   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
227   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
228   (*fact)->factor    = FACTOR_LU;
229   (*fact)->assembled = PETSC_TRUE;
230 
231   b = (Mat_SeqAIJ*)(*fact)->data;
232   b->freedata      = PETSC_TRUE;
233   b->sorted        = PETSC_FALSE;
234   b->singlemalloc  = PETSC_FALSE;
235   b->a             = new_a;
236   b->j             = new_j;
237   b->i             = new_i;
238   b->ilen          = 0;
239   b->imax          = 0;
240   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
241   b->row           = isirow;
242   b->col           = iscolf;
243   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
244   b->maxnz = b->nz = new_i[n];
245   ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
246   (*fact)->info.factor_mallocs = 0;
247 
248   ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
249 
250   af = ((double)b->nz)/((double)a->nz) + .001;
251   ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af));CHKERRQ(ierr);
252   ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af));CHKERRQ(ierr);
253   ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af));CHKERRQ(ierr);
254   ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:for best performance.\n"));CHKERRQ(ierr);
255 
256   ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr);
257 
258   PetscFunctionReturn(0);
259 #endif
260 }
261 
262 #undef __FUNCT__
263 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
264 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
265 {
266   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
267   IS                 isicol;
268   PetscErrorCode     ierr;
269   PetscInt           *r,*ic,i,n=A->m,*ai=a->i,*aj=a->j;
270   PetscInt           *bi,*bj,*ajtmp;
271   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
272   PetscReal          f;
273   PetscInt           nlnk,*lnk,k,*cols,**bi_ptr;
274   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
275   PetscBT            lnkbt;
276 
277   PetscFunctionBegin;
278   if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
279   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
280   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
281   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
282 
283   /* get new row pointers */
284   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
285   bi[0] = 0;
286 
287   /* bdiag is location of diagonal in factor */
288   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
289   bdiag[0] = 0;
290 
291   /* linked list for storing column indices of the active row */
292   nlnk = n + 1;
293   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
294 
295   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt)+n*sizeof(PetscInt**),&cols);CHKERRQ(ierr);
296   im     = cols + n;
297   bi_ptr = (PetscInt**)(im + n);
298 
299   /* initial FreeSpace size is f*(ai[n]+1) */
300   f = info->fill;
301   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
302   current_space = free_space;
303 
304   for (i=0; i<n; i++) {
305     /* copy previous fill into linked list */
306     nzi = 0;
307     nnz = ai[r[i]+1] - ai[r[i]];
308     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
309     ajtmp = aj + ai[r[i]];
310     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
311     nzi += nlnk;
312 
313     /* add pivot rows into linked list */
314     row = lnk[n];
315     while (row < i) {
316       nzbd    = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
317       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
318       ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
319       nzi += nlnk;
320       row  = lnk[row];
321     }
322     bi[i+1] = bi[i] + nzi;
323     im[i]   = nzi;
324 
325     /* mark bdiag */
326     nzbd = 0;
327     nnz  = nzi;
328     k    = lnk[n];
329     while (nnz-- && k < i){
330       nzbd++;
331       k = lnk[k];
332     }
333     bdiag[i] = bi[i] + nzbd;
334 
335     /* if free space is not available, make more free space */
336     if (current_space->local_remaining<nzi) {
337       nnz = (n - i)*nzi; /* estimated and max additional space needed */
338       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
339       reallocs++;
340     }
341 
342     /* copy data into free space, then initialize lnk */
343     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
344     bi_ptr[i] = current_space->array;
345     current_space->array           += nzi;
346     current_space->local_used      += nzi;
347     current_space->local_remaining -= nzi;
348   }
349 #if defined(PETSC_USE_DEBUG)
350   if (ai[n] != 0) {
351     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
352     ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af));CHKERRQ(ierr);
353     ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %G or use \n",af));CHKERRQ(ierr);
354     ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af));CHKERRQ(ierr);
355     ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"));CHKERRQ(ierr);
356   } else {
357     ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"));CHKERRQ(ierr);
358   }
359 #endif
360 
361   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
362   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
363 
364   /* destroy list of free space and other temporary array(s) */
365   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
366   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
367   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
368   ierr = PetscFree(cols);CHKERRQ(ierr);
369 
370   /* put together the new matrix */
371   ierr = MatCreate(A->comm,B);CHKERRQ(ierr);
372   ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr);
373   ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr);
374   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
375   ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr);
376   b    = (Mat_SeqAIJ*)(*B)->data;
377   b->freedata     = PETSC_TRUE;
378   b->singlemalloc = PETSC_FALSE;
379   ierr          = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
380   b->j          = bj;
381   b->i          = bi;
382   b->diag       = bdiag;
383   b->ilen       = 0;
384   b->imax       = 0;
385   b->row        = isrow;
386   b->col        = iscol;
387   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
388   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
389   b->icol       = isicol;
390   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
391 
392   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
393   ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
394   b->maxnz = b->nz = bi[n] ;
395 
396   (*B)->factor                 =  FACTOR_LU;
397   (*B)->info.factor_mallocs    = reallocs;
398   (*B)->info.fill_ratio_given  = f;
399 
400   if (ai[n] != 0) {
401     (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
402   } else {
403     (*B)->info.fill_ratio_needed = 0.0;
404   }
405   ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr);
406   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
407   PetscFunctionReturn(0);
408 }
409 
410 /* ----------------------------------------------------------- */
411 #undef __FUNCT__
412 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
413 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
414 {
415   Mat            C=*B;
416   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
417   IS             isrow = b->row,isicol = b->icol;
418   PetscErrorCode ierr;
419   PetscInt       *r,*ic,i,j,n=A->m,*bi=b->i,*bj=b->j;
420   PetscInt       *ajtmp,*bjtmp,nz,row,*ics;
421   PetscInt       *diag_offset = b->diag,diag,*pj;
422   PetscScalar    *rtmp,*v,*pc,multiplier,*pv,*rtmps;
423   PetscScalar    d;
424   PetscReal      rs;
425   LUShift_Ctx    sctx;
426   PetscInt       newshift;
427 
428   PetscFunctionBegin;
429   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
430   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
431   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
432   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
433   rtmps = rtmp; ics = ic;
434 
435   if (!a->diag) {
436     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
437   }
438   /* if both shift schemes are chosen by user, only use info->shiftpd */
439   if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0;
440   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
441     PetscInt *aai = a->i,*ddiag = a->diag;
442     sctx.shift_top = 0;
443     for (i=0; i<n; i++) {
444       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
445       d  = (a->a)[ddiag[i]];
446       rs = -PetscAbsScalar(d) - PetscRealPart(d);
447       v  = a->a+aai[i];
448       nz = aai[i+1] - aai[i];
449       for (j=0; j<nz; j++)
450 	rs += PetscAbsScalar(v[j]);
451       if (rs>sctx.shift_top) sctx.shift_top = rs;
452     }
453     if (sctx.shift_top == 0.0) sctx.shift_top += 1.e-12;
454     sctx.shift_top    *= 1.1;
455     sctx.nshift_max   = 5;
456     sctx.shift_lo     = 0.;
457     sctx.shift_hi     = 1.;
458   }
459 
460   sctx.shift_amount = 0;
461   sctx.nshift       = 0;
462   do {
463     sctx.lushift = PETSC_FALSE;
464     for (i=0; i<n; i++){
465       nz    = bi[i+1] - bi[i];
466       bjtmp = bj + bi[i];
467       for  (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0;
468 
469       /* load in initial (unfactored row) */
470       nz    = a->i[r[i]+1] - a->i[r[i]];
471       ajtmp = a->j + a->i[r[i]];
472       v     = a->a + a->i[r[i]];
473       for (j=0; j<nz; j++) {
474         rtmp[ics[ajtmp[j]]] = v[j];
475       }
476       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
477 
478       row = *bjtmp++;
479       while  (row < i) {
480         pc = rtmp + row;
481         if (*pc != 0.0) {
482           pv         = b->a + diag_offset[row];
483           pj         = b->j + diag_offset[row] + 1;
484           multiplier = *pc / *pv++;
485           *pc        = multiplier;
486           nz         = bi[row+1] - diag_offset[row] - 1;
487           for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
488           ierr = PetscLogFlops(2*nz);CHKERRQ(ierr);
489         }
490         row = *bjtmp++;
491       }
492       /* finished row so stick it into b->a */
493       pv   = b->a + bi[i] ;
494       pj   = b->j + bi[i] ;
495       nz   = bi[i+1] - bi[i];
496       diag = diag_offset[i] - bi[i];
497       rs   = 0.0;
498       for (j=0; j<nz; j++) {
499         pv[j] = rtmps[pj[j]];
500         if (j != diag) rs += PetscAbsScalar(pv[j]);
501       }
502 
503       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
504       sctx.rs  = rs;
505       sctx.pv  = pv[diag];
506       ierr = MatLUCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr);
507       if (newshift == 1){
508         break;    /* sctx.shift_amount is updated */
509       } else if (newshift == -1){
510         SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs);
511       }
512     }
513 
514     if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
515       /*
516        * if no shift in this attempt & shifting & started shifting & can refine,
517        * then try lower shift
518        */
519       sctx.shift_hi        = info->shift_fraction;
520       info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
521       sctx.shift_amount    = info->shift_fraction * sctx.shift_top;
522       sctx.lushift         = PETSC_TRUE;
523       sctx.nshift++;
524     }
525   } while (sctx.lushift);
526 
527   /* invert diagonal entries for simplier triangular solves */
528   for (i=0; i<n; i++) {
529     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
530   }
531 
532   ierr = PetscFree(rtmp);CHKERRQ(ierr);
533   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
534   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
535   C->factor = FACTOR_LU;
536   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
537   C->assembled = PETSC_TRUE;
538   ierr = PetscLogFlops(C->n);CHKERRQ(ierr);
539   if (sctx.nshift){
540     if (info->shiftnz) {
541       ierr = PetscLogInfo((0,"MatLUFactorNumeric_SeqAIJ: number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr);
542     } else if (info->shiftpd) {
543       ierr = PetscLogInfo((0,"MatLUFactorNumeric_SeqAIJ: number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top));CHKERRQ(ierr);
544     }
545   }
546   PetscFunctionReturn(0);
547 }
548 
549 #undef __FUNCT__
550 #define __FUNCT__ "MatUsePETSc_SeqAIJ"
551 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A)
552 {
553   PetscFunctionBegin;
554   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
555   A->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
556   PetscFunctionReturn(0);
557 }
558 
559 
560 /* ----------------------------------------------------------- */
561 #undef __FUNCT__
562 #define __FUNCT__ "MatLUFactor_SeqAIJ"
563 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
564 {
565   PetscErrorCode ierr;
566   Mat            C;
567 
568   PetscFunctionBegin;
569   ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr);
570   ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr);
571   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
572   ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr);
573   PetscFunctionReturn(0);
574 }
575 /* ----------------------------------------------------------- */
576 #undef __FUNCT__
577 #define __FUNCT__ "MatSolve_SeqAIJ"
578 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
579 {
580   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
581   IS             iscol = a->col,isrow = a->row;
582   PetscErrorCode ierr;
583   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
584   PetscInt       nz,*rout,*cout;
585   PetscScalar    *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
586 
587   PetscFunctionBegin;
588   if (!n) PetscFunctionReturn(0);
589 
590   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
591   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
592   tmp  = a->solve_work;
593 
594   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
595   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
596 
597   /* forward solve the lower triangular */
598   tmp[0] = b[*r++];
599   tmps   = tmp;
600   for (i=1; i<n; i++) {
601     v   = aa + ai[i] ;
602     vi  = aj + ai[i] ;
603     nz  = a->diag[i] - ai[i];
604     sum = b[*r++];
605     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
606     tmp[i] = sum;
607   }
608 
609   /* backward solve the upper triangular */
610   for (i=n-1; i>=0; i--){
611     v   = aa + a->diag[i] + 1;
612     vi  = aj + a->diag[i] + 1;
613     nz  = ai[i+1] - a->diag[i] - 1;
614     sum = tmp[i];
615     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
616     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
617   }
618 
619   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
620   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
621   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
622   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
623   ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr);
624   PetscFunctionReturn(0);
625 }
626 
627 /* ----------------------------------------------------------- */
628 #undef __FUNCT__
629 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
630 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
631 {
632   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
633   PetscErrorCode ierr;
634   PetscInt       n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag;
635   PetscScalar    *x,*b,*aa = a->a;
636 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
637   PetscInt       adiag_i,i,*vi,nz,ai_i;
638   PetscScalar    *v,sum;
639 #endif
640 
641   PetscFunctionBegin;
642   if (!n) PetscFunctionReturn(0);
643 
644   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
645   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
646 
647 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
648   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
649 #else
650   /* forward solve the lower triangular */
651   x[0] = b[0];
652   for (i=1; i<n; i++) {
653     ai_i = ai[i];
654     v    = aa + ai_i;
655     vi   = aj + ai_i;
656     nz   = adiag[i] - ai_i;
657     sum  = b[i];
658     while (nz--) sum -= *v++ * x[*vi++];
659     x[i] = sum;
660   }
661 
662   /* backward solve the upper triangular */
663   for (i=n-1; i>=0; i--){
664     adiag_i = adiag[i];
665     v       = aa + adiag_i + 1;
666     vi      = aj + adiag_i + 1;
667     nz      = ai[i+1] - adiag_i - 1;
668     sum     = x[i];
669     while (nz--) sum -= *v++ * x[*vi++];
670     x[i]    = sum*aa[adiag_i];
671   }
672 #endif
673   ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr);
674   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
675   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
676   PetscFunctionReturn(0);
677 }
678 
679 #undef __FUNCT__
680 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
681 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
682 {
683   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
684   IS             iscol = a->col,isrow = a->row;
685   PetscErrorCode ierr;
686   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
687   PetscInt       nz,*rout,*cout;
688   PetscScalar    *x,*b,*tmp,*aa = a->a,sum,*v;
689 
690   PetscFunctionBegin;
691   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
692 
693   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
694   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
695   tmp  = a->solve_work;
696 
697   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
698   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
699 
700   /* forward solve the lower triangular */
701   tmp[0] = b[*r++];
702   for (i=1; i<n; i++) {
703     v   = aa + ai[i] ;
704     vi  = aj + ai[i] ;
705     nz  = a->diag[i] - ai[i];
706     sum = b[*r++];
707     while (nz--) sum -= *v++ * tmp[*vi++ ];
708     tmp[i] = sum;
709   }
710 
711   /* backward solve the upper triangular */
712   for (i=n-1; i>=0; i--){
713     v   = aa + a->diag[i] + 1;
714     vi  = aj + a->diag[i] + 1;
715     nz  = ai[i+1] - a->diag[i] - 1;
716     sum = tmp[i];
717     while (nz--) sum -= *v++ * tmp[*vi++ ];
718     tmp[i] = sum*aa[a->diag[i]];
719     x[*c--] += tmp[i];
720   }
721 
722   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
723   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
724   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
725   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
726   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
727 
728   PetscFunctionReturn(0);
729 }
730 /* -------------------------------------------------------------------*/
731 #undef __FUNCT__
732 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
733 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
734 {
735   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
736   IS             iscol = a->col,isrow = a->row;
737   PetscErrorCode ierr;
738   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
739   PetscInt       nz,*rout,*cout,*diag = a->diag;
740   PetscScalar    *x,*b,*tmp,*aa = a->a,*v,s1;
741 
742   PetscFunctionBegin;
743   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
744   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
745   tmp  = a->solve_work;
746 
747   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
748   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
749 
750   /* copy the b into temp work space according to permutation */
751   for (i=0; i<n; i++) tmp[i] = b[c[i]];
752 
753   /* forward solve the U^T */
754   for (i=0; i<n; i++) {
755     v   = aa + diag[i] ;
756     vi  = aj + diag[i] + 1;
757     nz  = ai[i+1] - diag[i] - 1;
758     s1  = tmp[i];
759     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
760     while (nz--) {
761       tmp[*vi++ ] -= (*v++)*s1;
762     }
763     tmp[i] = s1;
764   }
765 
766   /* backward solve the L^T */
767   for (i=n-1; i>=0; i--){
768     v   = aa + diag[i] - 1 ;
769     vi  = aj + diag[i] - 1 ;
770     nz  = diag[i] - ai[i];
771     s1  = tmp[i];
772     while (nz--) {
773       tmp[*vi-- ] -= (*v--)*s1;
774     }
775   }
776 
777   /* copy tmp into x according to permutation */
778   for (i=0; i<n; i++) x[r[i]] = tmp[i];
779 
780   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
781   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
782   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
783   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
784 
785   ierr = PetscLogFlops(2*a->nz-A->n);CHKERRQ(ierr);
786   PetscFunctionReturn(0);
787 }
788 
789 #undef __FUNCT__
790 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
791 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
792 {
793   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
794   IS             iscol = a->col,isrow = a->row;
795   PetscErrorCode ierr;
796   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
797   PetscInt       nz,*rout,*cout,*diag = a->diag;
798   PetscScalar    *x,*b,*tmp,*aa = a->a,*v;
799 
800   PetscFunctionBegin;
801   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
802 
803   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
804   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
805   tmp = a->solve_work;
806 
807   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
808   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
809 
810   /* copy the b into temp work space according to permutation */
811   for (i=0; i<n; i++) tmp[i] = b[c[i]];
812 
813   /* forward solve the U^T */
814   for (i=0; i<n; i++) {
815     v   = aa + diag[i] ;
816     vi  = aj + diag[i] + 1;
817     nz  = ai[i+1] - diag[i] - 1;
818     tmp[i] *= *v++;
819     while (nz--) {
820       tmp[*vi++ ] -= (*v++)*tmp[i];
821     }
822   }
823 
824   /* backward solve the L^T */
825   for (i=n-1; i>=0; i--){
826     v   = aa + diag[i] - 1 ;
827     vi  = aj + diag[i] - 1 ;
828     nz  = diag[i] - ai[i];
829     while (nz--) {
830       tmp[*vi-- ] -= (*v--)*tmp[i];
831     }
832   }
833 
834   /* copy tmp into x according to permutation */
835   for (i=0; i<n; i++) x[r[i]] += tmp[i];
836 
837   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
838   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
839   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
840   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
841 
842   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
843   PetscFunctionReturn(0);
844 }
845 /* ----------------------------------------------------------------*/
846 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat);
847 
848 #undef __FUNCT__
849 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
850 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
851 {
852   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
853   IS                 isicol;
854   PetscErrorCode     ierr;
855   PetscInt           *r,*ic,n=A->m,*ai=a->i,*aj=a->j;
856   PetscInt           *bi,*cols,nnz,*cols_lvl;
857   PetscInt           *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0;
858   PetscInt           i,levels,diagonal_fill;
859   PetscTruth         col_identity,row_identity;
860   PetscReal          f;
861   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
862   PetscBT            lnkbt;
863   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
864   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
865   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
866 
867   PetscFunctionBegin;
868   f             = info->fill;
869   levels        = (PetscInt)info->levels;
870   diagonal_fill = (PetscInt)info->diagonal_fill;
871   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
872 
873   /* special case that simply copies fill pattern */
874   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
875   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
876   if (!levels && row_identity && col_identity) {
877     ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
878     (*fact)->factor = FACTOR_LU;
879     b               = (Mat_SeqAIJ*)(*fact)->data;
880     if (!b->diag) {
881       ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
882     }
883     ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
884     b->row              = isrow;
885     b->col              = iscol;
886     b->icol             = isicol;
887     ierr                = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
888     (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
889     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
890     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
891     PetscFunctionReturn(0);
892   }
893 
894   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
895   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
896 
897   /* get new row pointers */
898   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
899   bi[0] = 0;
900   /* bdiag is location of diagonal in factor */
901   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
902   bdiag[0]  = 0;
903 
904   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr);
905   bjlvl_ptr = (PetscInt**)(bj_ptr + n);
906 
907   /* create a linked list for storing column indices of the active row */
908   nlnk = n + 1;
909   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
910 
911   /* initial FreeSpace size is f*(ai[n]+1) */
912   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
913   current_space = free_space;
914   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
915   current_space_lvl = free_space_lvl;
916 
917   for (i=0; i<n; i++) {
918     nzi = 0;
919     /* copy current row into linked list */
920     nnz  = ai[r[i]+1] - ai[r[i]];
921     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
922     cols = aj + ai[r[i]];
923     lnk[i] = -1; /* marker to indicate if diagonal exists */
924     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
925     nzi += nlnk;
926 
927     /* make sure diagonal entry is included */
928     if (diagonal_fill && lnk[i] == -1) {
929       fm = n;
930       while (lnk[fm] < i) fm = lnk[fm];
931       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
932       lnk[fm]    = i;
933       lnk_lvl[i] = 0;
934       nzi++; dcount++;
935     }
936 
937     /* add pivot rows into the active row */
938     nzbd = 0;
939     prow = lnk[n];
940     while (prow < i) {
941       nnz      = bdiag[prow];
942       cols     = bj_ptr[prow] + nnz + 1;
943       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
944       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
945       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
946       nzi += nlnk;
947       prow = lnk[prow];
948       nzbd++;
949     }
950     bdiag[i] = nzbd;
951     bi[i+1]  = bi[i] + nzi;
952 
953     /* if free space is not available, make more free space */
954     if (current_space->local_remaining<nzi) {
955       nnz = nzi*(n - i); /* estimated and max additional space needed */
956       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
957       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
958       reallocs++;
959     }
960 
961     /* copy data into free_space and free_space_lvl, then initialize lnk */
962     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
963     bj_ptr[i]    = current_space->array;
964     bjlvl_ptr[i] = current_space_lvl->array;
965 
966     /* make sure the active row i has diagonal entry */
967     if (*(bj_ptr[i]+bdiag[i]) != i) {
968       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
969     try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",i);
970     }
971 
972     current_space->array           += nzi;
973     current_space->local_used      += nzi;
974     current_space->local_remaining -= nzi;
975     current_space_lvl->array           += nzi;
976     current_space_lvl->local_used      += nzi;
977     current_space_lvl->local_remaining -= nzi;
978   }
979 
980   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
981   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
982 
983   /* destroy list of free space and other temporary arrays */
984   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
985   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
986   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
987   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
988   ierr = PetscFree(bj_ptr);CHKERRQ(ierr);
989 
990 #if defined(PETSC_USE_DEBUG)
991   {
992     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
993     ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af));CHKERRQ(ierr);
994     ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af));CHKERRQ(ierr);
995     ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af));CHKERRQ(ierr);
996     ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"));CHKERRQ(ierr);
997     if (diagonal_fill) {
998       ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount));CHKERRQ(ierr);
999     }
1000   }
1001 #endif
1002 
1003   /* put together the new matrix */
1004   ierr = MatCreate(A->comm,fact);CHKERRQ(ierr);
1005   ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr);
1006   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
1007   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1008   ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr);
1009   b = (Mat_SeqAIJ*)(*fact)->data;
1010   b->freedata     = PETSC_TRUE;
1011   b->singlemalloc = PETSC_FALSE;
1012   len = (bi[n] )*sizeof(PetscScalar);
1013   ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr);
1014   b->j          = bj;
1015   b->i          = bi;
1016   for (i=0; i<n; i++) bdiag[i] += bi[i];
1017   b->diag       = bdiag;
1018   b->ilen       = 0;
1019   b->imax       = 0;
1020   b->row        = isrow;
1021   b->col        = iscol;
1022   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1023   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1024   b->icol       = isicol;
1025   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1026   /* In b structure:  Free imax, ilen, old a, old j.
1027      Allocate bdiag, solve_work, new a, new j */
1028   ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
1029   b->maxnz             = b->nz = bi[n] ;
1030   (*fact)->factor = FACTOR_LU;
1031   (*fact)->info.factor_mallocs    = reallocs;
1032   (*fact)->info.fill_ratio_given  = f;
1033   (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1034 
1035   ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr);
1036   (*fact)->ops->lufactornumeric =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
1037 
1038   PetscFunctionReturn(0);
1039 }
1040 
1041 #include "src/mat/impls/sbaij/seq/sbaij.h"
1042 #undef __FUNCT__
1043 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
1044 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
1045 {
1046   Mat            C = *B;
1047   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1048   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
1049   IS             ip=b->row;
1050   PetscErrorCode ierr;
1051   PetscInt       *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol;
1052   PetscInt       *ai=a->i,*aj=a->j;
1053   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1054   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1055   PetscReal      zeropivot,rs,shiftnz;
1056   PetscReal      shiftpd;
1057   ChShift_Ctx    sctx;
1058   PetscInt       newshift;
1059 
1060   PetscFunctionBegin;
1061   shiftnz   = info->shiftnz;
1062   shiftpd   = info->shiftpd;
1063   zeropivot = info->zeropivot;
1064 
1065   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
1066 
1067   /* initialization */
1068   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1069   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
1070   jl   = il + mbs;
1071   rtmp = (MatScalar*)(jl + mbs);
1072 
1073   sctx.shift_amount = 0;
1074   sctx.nshift       = 0;
1075   do {
1076     sctx.chshift = PETSC_FALSE;
1077     for (i=0; i<mbs; i++) {
1078       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1079     }
1080 
1081     for (k = 0; k<mbs; k++){
1082       bval = ba + bi[k];
1083       /* initialize k-th row by the perm[k]-th row of A */
1084       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1085       for (j = jmin; j < jmax; j++){
1086         col = rip[aj[j]];
1087         if (col >= k){ /* only take upper triangular entry */
1088           rtmp[col] = aa[j];
1089           *bval++  = 0.0; /* for in-place factorization */
1090         }
1091       }
1092       /* shift the diagonal of the matrix */
1093       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1094 
1095       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1096       dk = rtmp[k];
1097       i = jl[k]; /* first row to be added to k_th row  */
1098 
1099       while (i < k){
1100         nexti = jl[i]; /* next row to be added to k_th row */
1101 
1102         /* compute multiplier, update diag(k) and U(i,k) */
1103         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1104         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
1105         dk += uikdi*ba[ili];
1106         ba[ili] = uikdi; /* -U(i,k) */
1107 
1108         /* add multiple of row i to k-th row */
1109         jmin = ili + 1; jmax = bi[i+1];
1110         if (jmin < jmax){
1111           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1112           /* update il and jl for row i */
1113           il[i] = jmin;
1114           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1115         }
1116         i = nexti;
1117       }
1118 
1119       /* shift the diagonals when zero pivot is detected */
1120       /* compute rs=sum of abs(off-diagonal) */
1121       rs   = 0.0;
1122       jmin = bi[k]+1;
1123       nz   = bi[k+1] - jmin;
1124       if (nz){
1125         bcol = bj + jmin;
1126         while (nz--){
1127           rs += PetscAbsScalar(rtmp[*bcol]);
1128           bcol++;
1129         }
1130       }
1131 
1132       sctx.rs = rs;
1133       sctx.pv = dk;
1134       ierr = MatCholeskyCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr);
1135       if (newshift == 1){
1136         break;    /* sctx.shift_amount is updated */
1137       } else if (newshift == -1){
1138         SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs);
1139       }
1140 
1141       /* copy data into U(k,:) */
1142       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1143       jmin = bi[k]+1; jmax = bi[k+1];
1144       if (jmin < jmax) {
1145         for (j=jmin; j<jmax; j++){
1146           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1147         }
1148         /* add the k-th row into il and jl */
1149         il[k] = jmin;
1150         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1151       }
1152     }
1153   } while (sctx.chshift);
1154   ierr = PetscFree(il);CHKERRQ(ierr);
1155 
1156   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
1157   C->factor       = FACTOR_CHOLESKY;
1158   C->assembled    = PETSC_TRUE;
1159   C->preallocated = PETSC_TRUE;
1160   ierr = PetscLogFlops(C->m);CHKERRQ(ierr);
1161   if (sctx.nshift){
1162     if (shiftnz) {
1163       ierr = PetscLogInfo((0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr);
1164     } else if (shiftpd) {
1165       ierr = PetscLogInfo((0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr);
1166     }
1167   }
1168   PetscFunctionReturn(0);
1169 }
1170 
1171 #undef __FUNCT__
1172 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
1173 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1174 {
1175   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
1176   Mat_SeqSBAIJ       *b;
1177   Mat                B;
1178   PetscErrorCode     ierr;
1179   PetscTruth         perm_identity;
1180   PetscInt           reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui;
1181   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1182   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1183   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
1184   PetscReal          fill=info->fill,levels=info->levels;
1185   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1186   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1187   PetscBT            lnkbt;
1188 
1189   PetscFunctionBegin;
1190   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1191   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1192 
1193   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1194   ui[0] = 0;
1195 
1196   /* special case that simply copies fill pattern */
1197   if (!levels && perm_identity) {
1198     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
1199     for (i=0; i<am; i++) {
1200       ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1201     }
1202     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1203     cols = uj;
1204     for (i=0; i<am; i++) {
1205       aj    = a->j + a->diag[i];
1206       ncols = ui[i+1] - ui[i];
1207       for (j=0; j<ncols; j++) *cols++ = *aj++;
1208     }
1209   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1210     /* initialization */
1211     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
1212 
1213     /* jl: linked list for storing indices of the pivot rows
1214        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1215     ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1216     il         = jl + am;
1217     uj_ptr     = (PetscInt**)(il + am);
1218     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1219     for (i=0; i<am; i++){
1220       jl[i] = am; il[i] = 0;
1221     }
1222 
1223     /* create and initialize a linked list for storing column indices of the active row k */
1224     nlnk = am + 1;
1225     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1226 
1227     /* initial FreeSpace size is fill*(ai[am]+1) */
1228     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1229     current_space = free_space;
1230     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
1231     current_space_lvl = free_space_lvl;
1232 
1233     for (k=0; k<am; k++){  /* for each active row k */
1234       /* initialize lnk by the column indices of row rip[k] of A */
1235       nzk   = 0;
1236       ncols = ai[rip[k]+1] - ai[rip[k]];
1237       ncols_upper = 0;
1238       for (j=0; j<ncols; j++){
1239         i = *(aj + ai[rip[k]] + j);
1240         if (rip[i] >= k){ /* only take upper triangular entry */
1241           ajtmp[ncols_upper] = i;
1242           ncols_upper++;
1243         }
1244       }
1245       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1246       nzk += nlnk;
1247 
1248       /* update lnk by computing fill-in for each pivot row to be merged in */
1249       prow = jl[k]; /* 1st pivot row */
1250 
1251       while (prow < k){
1252         nextprow = jl[prow];
1253 
1254         /* merge prow into k-th row */
1255         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1256         jmax = ui[prow+1];
1257         ncols = jmax-jmin;
1258         i     = jmin - ui[prow];
1259         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1260         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
1261         j     = *(uj - 1);
1262         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
1263         nzk += nlnk;
1264 
1265         /* update il and jl for prow */
1266         if (jmin < jmax){
1267           il[prow] = jmin;
1268           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1269         }
1270         prow = nextprow;
1271       }
1272 
1273       /* if free space is not available, make more free space */
1274       if (current_space->local_remaining<nzk) {
1275         i = am - k + 1; /* num of unfactored rows */
1276         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1277         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
1278         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
1279         reallocs++;
1280       }
1281 
1282       /* copy data into free_space and free_space_lvl, then initialize lnk */
1283       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1284 
1285       /* add the k-th row into il and jl */
1286       if (nzk > 1){
1287         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1288         jl[k] = jl[i]; jl[i] = k;
1289         il[k] = ui[k] + 1;
1290       }
1291       uj_ptr[k]     = current_space->array;
1292       uj_lvl_ptr[k] = current_space_lvl->array;
1293 
1294       current_space->array           += nzk;
1295       current_space->local_used      += nzk;
1296       current_space->local_remaining -= nzk;
1297 
1298       current_space_lvl->array           += nzk;
1299       current_space_lvl->local_used      += nzk;
1300       current_space_lvl->local_remaining -= nzk;
1301 
1302       ui[k+1] = ui[k] + nzk;
1303     }
1304 
1305 #if defined(PETSC_USE_DEBUG)
1306     if (ai[am] != 0) {
1307       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1308       ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af));CHKERRQ(ierr);
1309       ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af));CHKERRQ(ierr);
1310       ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af));CHKERRQ(ierr);
1311     } else {
1312       ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n"));CHKERRQ(ierr);
1313     }
1314 #endif
1315 
1316     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1317     ierr = PetscFree(jl);CHKERRQ(ierr);
1318     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
1319 
1320     /* destroy list of free space and other temporary array(s) */
1321     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1322     ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1323     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1324     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1325 
1326   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
1327 
1328   /* put together the new matrix in MATSEQSBAIJ format */
1329   ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr);
1330   ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr);
1331   B = *fact;
1332   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1333   ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1334 
1335   b    = (Mat_SeqSBAIJ*)B->data;
1336   b->singlemalloc = PETSC_FALSE;
1337   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1338   b->j    = uj;
1339   b->i    = ui;
1340   b->diag = 0;
1341   b->ilen = 0;
1342   b->imax = 0;
1343   b->row  = perm;
1344   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1345   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1346   b->icol = perm;
1347   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1348   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1349   ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1350   b->maxnz = b->nz = ui[am];
1351 
1352   B->factor                 = FACTOR_CHOLESKY;
1353   B->info.factor_mallocs    = reallocs;
1354   B->info.fill_ratio_given  = fill;
1355   if (ai[am] != 0) {
1356     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1357   } else {
1358     B->info.fill_ratio_needed = 0.0;
1359   }
1360   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1361   if (perm_identity){
1362     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1363     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1364   }
1365   PetscFunctionReturn(0);
1366 }
1367 
1368 #undef __FUNCT__
1369 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
1370 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1371 {
1372   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
1373   Mat_SeqSBAIJ       *b;
1374   Mat                B;
1375   PetscErrorCode     ierr;
1376   PetscTruth         perm_identity;
1377   PetscReal          fill = info->fill;
1378   PetscInt           *rip,*riip,i,am=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow;
1379   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1380   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1381   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1382   PetscBT            lnkbt;
1383   IS                 iperm;
1384 
1385   PetscFunctionBegin;
1386   /* check whether perm is the identity mapping */
1387   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1388   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1389 
1390   if (!perm_identity){
1391     /* check if perm is symmetric! */
1392     ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1393     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1394     for (i=0; i<am; i++) {
1395       if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation");
1396     }
1397     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1398     ierr = ISDestroy(iperm);CHKERRQ(ierr);
1399   }
1400 
1401   /* initialization */
1402   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1403   ui[0] = 0;
1404 
1405   /* jl: linked list for storing indices of the pivot rows
1406      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1407   ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1408   il     = jl + am;
1409   cols   = il + am;
1410   ui_ptr = (PetscInt**)(cols + am);
1411   for (i=0; i<am; i++){
1412     jl[i] = am; il[i] = 0;
1413   }
1414 
1415   /* create and initialize a linked list for storing column indices of the active row k */
1416   nlnk = am + 1;
1417   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1418 
1419   /* initial FreeSpace size is fill*(ai[am]+1) */
1420   ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1421   current_space = free_space;
1422 
1423   for (k=0; k<am; k++){  /* for each active row k */
1424     /* initialize lnk by the column indices of row rip[k] of A */
1425     nzk   = 0;
1426     ncols = ai[rip[k]+1] - ai[rip[k]];
1427     ncols_upper = 0;
1428     for (j=0; j<ncols; j++){
1429       i = rip[*(aj + ai[rip[k]] + j)];
1430       if (i >= k){ /* only take upper triangular entry */
1431         cols[ncols_upper] = i;
1432         ncols_upper++;
1433       }
1434     }
1435     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1436     nzk += nlnk;
1437 
1438     /* update lnk by computing fill-in for each pivot row to be merged in */
1439     prow = jl[k]; /* 1st pivot row */
1440 
1441     while (prow < k){
1442       nextprow = jl[prow];
1443       /* merge prow into k-th row */
1444       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1445       jmax = ui[prow+1];
1446       ncols = jmax-jmin;
1447       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1448       ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1449       nzk += nlnk;
1450 
1451       /* update il and jl for prow */
1452       if (jmin < jmax){
1453         il[prow] = jmin;
1454         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1455       }
1456       prow = nextprow;
1457     }
1458 
1459     /* if free space is not available, make more free space */
1460     if (current_space->local_remaining<nzk) {
1461       i = am - k + 1; /* num of unfactored rows */
1462       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1463       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
1464       reallocs++;
1465     }
1466 
1467     /* copy data into free space, then initialize lnk */
1468     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1469 
1470     /* add the k-th row into il and jl */
1471     if (nzk-1 > 0){
1472       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1473       jl[k] = jl[i]; jl[i] = k;
1474       il[k] = ui[k] + 1;
1475     }
1476     ui_ptr[k] = current_space->array;
1477     current_space->array           += nzk;
1478     current_space->local_used      += nzk;
1479     current_space->local_remaining -= nzk;
1480 
1481     ui[k+1] = ui[k] + nzk;
1482   }
1483 
1484 #if defined(PETSC_USE_DEBUG)
1485   if (ai[am] != 0) {
1486     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
1487     ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af));CHKERRQ(ierr);
1488     ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af));CHKERRQ(ierr);
1489     ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af));CHKERRQ(ierr);
1490   } else {
1491      ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n"));CHKERRQ(ierr);
1492   }
1493 #endif
1494 
1495   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1496   ierr = PetscFree(jl);CHKERRQ(ierr);
1497 
1498   /* destroy list of free space and other temporary array(s) */
1499   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1500   ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1501   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1502 
1503   /* put together the new matrix in MATSEQSBAIJ format */
1504   ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr);
1505   ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr);
1506   B    = *fact;
1507   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1508   ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1509 
1510   b = (Mat_SeqSBAIJ*)B->data;
1511   b->singlemalloc = PETSC_FALSE;
1512   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1513   b->j    = uj;
1514   b->i    = ui;
1515   b->diag = 0;
1516   b->ilen = 0;
1517   b->imax = 0;
1518   b->row  = perm;
1519   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1520   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1521   b->icol = perm;
1522   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1523   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1524   ierr    = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1525   b->maxnz = b->nz = ui[am];
1526 
1527   B->factor                 = FACTOR_CHOLESKY;
1528   B->info.factor_mallocs    = reallocs;
1529   B->info.fill_ratio_given  = fill;
1530   if (ai[am] != 0) {
1531     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1532   } else {
1533     B->info.fill_ratio_needed = 0.0;
1534   }
1535   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1536   if (perm_identity){
1537     (*fact)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1538     (*fact)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1539   }
1540   PetscFunctionReturn(0);
1541 }
1542