xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision 39d35d2dd2fd359ed0df7a061277ef1709a6de28)
1 
2 #include "src/mat/impls/aij/seq/aij.h"
3 #include "src/inline/dot.h"
4 #include "src/inline/spops.h"
5 #include "petscbt.h"
6 #include "src/mat/utils/freespace.h"
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ"
10 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
11 {
12   PetscFunctionBegin;
13 
14   SETERRQ(PETSC_ERR_SUP,"Code not written");
15 #if !defined(PETSC_USE_DEBUG)
16   PetscFunctionReturn(0);
17 #endif
18 }
19 
20 
21 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat);
22 
23 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
24 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
25 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*);
26 
27 #undef __FUNCT__
28 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
29   /* ------------------------------------------------------------
30 
31           This interface was contribed by Tony Caola
32 
33      This routine is an interface to the pivoting drop-tolerance
34      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
35      SPARSEKIT2.
36 
37      The SPARSEKIT2 routines used here are covered by the GNU
38      copyright; see the file gnu in this directory.
39 
40      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
41      help in getting this routine ironed out.
42 
43      The major drawback to this routine is that if info->fill is
44      not large enough it fails rather than allocating more space;
45      this can be fixed by hacking/improving the f2c version of
46      Yousef Saad's code.
47 
48      ------------------------------------------------------------
49 */
50 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
51 {
52 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
53   PetscFunctionBegin;
54   SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
55   You can obtain the drop tolerance routines by installing PETSc from\n\
56   www.mcs.anl.gov/petsc\n");
57 #else
58   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
59   IS             iscolf,isicol,isirow;
60   PetscTruth     reorder;
61   PetscErrorCode ierr,sierr;
62   PetscInt       *c,*r,*ic,i,n = A->m;
63   PetscInt       *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
64   PetscInt       *ordcol,*iwk,*iperm,*jw;
65   PetscInt       jmax,lfill,job,*o_i,*o_j;
66   PetscScalar    *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
67   PetscReal      af;
68 
69   PetscFunctionBegin;
70 
71   if (info->dt == PETSC_DEFAULT)      info->dt      = .005;
72   if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax);
73   if (info->dtcol == PETSC_DEFAULT)   info->dtcol   = .01;
74   if (info->fill == PETSC_DEFAULT)    info->fill    = ((double)(n*(info->dtcount+1)))/a->nz;
75   lfill   = (PetscInt)(info->dtcount/2.0);
76   jmax    = (PetscInt)(info->fill*a->nz);
77 
78 
79   /* ------------------------------------------------------------
80      If reorder=.TRUE., then the original matrix has to be
81      reordered to reflect the user selected ordering scheme, and
82      then de-reordered so it is in it's original format.
83      Because Saad's dperm() is NOT in place, we have to copy
84      the original matrix and allocate more storage. . .
85      ------------------------------------------------------------
86   */
87 
88   /* set reorder to true if either isrow or iscol is not identity */
89   ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr);
90   if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);}
91   reorder = PetscNot(reorder);
92 
93 
94   /* storage for ilu factor */
95   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr);
96   ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr);
97   ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr);
98   ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr);
99 
100   /* ------------------------------------------------------------
101      Make sure that everything is Fortran formatted (1-Based)
102      ------------------------------------------------------------
103   */
104   for (i=old_i[0];i<old_i[n];i++) {
105     old_j[i]++;
106   }
107   for(i=0;i<n+1;i++) {
108     old_i[i]++;
109   };
110 
111 
112   if (reorder) {
113     ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr);
114     ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
115     for(i=0;i<n;i++) {
116       r[i]  = r[i]+1;
117       c[i]  = c[i]+1;
118     }
119     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr);
120     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr);
121     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr);
122     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
123     for (i=0;i<n;i++) {
124       r[i]  = r[i]-1;
125       c[i]  = c[i]-1;
126     }
127     ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr);
128     ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
129     o_a = old_a2;
130     o_j = old_j2;
131     o_i = old_i2;
132   } else {
133     o_a = old_a;
134     o_j = old_j;
135     o_i = old_i;
136   }
137 
138   /* ------------------------------------------------------------
139      Call Saad's ilutp() routine to generate the factorization
140      ------------------------------------------------------------
141   */
142 
143   ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr);
144   ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr);
145   ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr);
146 
147   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
148   if (sierr) {
149     switch (sierr) {
150       case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
151       case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
152       case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
153       case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
154       case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax);
155       default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
156     }
157   }
158 
159   ierr = PetscFree(w);CHKERRQ(ierr);
160   ierr = PetscFree(jw);CHKERRQ(ierr);
161 
162   /* ------------------------------------------------------------
163      Saad's routine gives the result in Modified Sparse Row (msr)
164      Convert to Compressed Sparse Row format (csr)
165      ------------------------------------------------------------
166   */
167 
168   ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr);
169   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr);
170 
171   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
172 
173   ierr = PetscFree(iwk);CHKERRQ(ierr);
174   ierr = PetscFree(wk);CHKERRQ(ierr);
175 
176   if (reorder) {
177     ierr = PetscFree(old_a2);CHKERRQ(ierr);
178     ierr = PetscFree(old_j2);CHKERRQ(ierr);
179     ierr = PetscFree(old_i2);CHKERRQ(ierr);
180   } else {
181     /* fix permutation of old_j that the factorization introduced */
182     for (i=old_i[0]; i<old_i[n]; i++) {
183       old_j[i-1] = iperm[old_j[i-1]-1];
184     }
185   }
186 
187   /* get rid of the shift to indices starting at 1 */
188   for (i=0; i<n+1; i++) {
189     old_i[i]--;
190   }
191   for (i=old_i[0];i<old_i[n];i++) {
192     old_j[i]--;
193   }
194 
195   /* Make the factored matrix 0-based */
196   for (i=0; i<n+1; i++) {
197     new_i[i]--;
198   }
199   for (i=new_i[0];i<new_i[n];i++) {
200     new_j[i]--;
201   }
202 
203   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
204   /*-- permute the right-hand-side and solution vectors           --*/
205   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
206   ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr);
207   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
208   for(i=0; i<n; i++) {
209     ordcol[i] = ic[iperm[i]-1];
210   };
211   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
212   ierr = ISDestroy(isicol);CHKERRQ(ierr);
213 
214   ierr = PetscFree(iperm);CHKERRQ(ierr);
215 
216   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr);
217   ierr = PetscFree(ordcol);CHKERRQ(ierr);
218 
219   /*----- put together the new matrix -----*/
220 
221   ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr);
222   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
223   ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr);
224   (*fact)->factor    = FACTOR_LU;
225   (*fact)->assembled = PETSC_TRUE;
226 
227   b = (Mat_SeqAIJ*)(*fact)->data;
228   ierr = PetscFree(b->imax);CHKERRQ(ierr);
229   b->sorted        = PETSC_FALSE;
230   b->singlemalloc  = PETSC_FALSE;
231   /* the next line frees the default space generated by the MatCreate() */
232   ierr             = PetscFree(b->a);CHKERRQ(ierr);
233   ierr             = PetscFree(b->ilen);CHKERRQ(ierr);
234   b->a             = new_a;
235   b->j             = new_j;
236   b->i             = new_i;
237   b->ilen          = 0;
238   b->imax          = 0;
239   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
240   b->row           = isirow;
241   b->col           = iscolf;
242   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
243   b->maxnz = b->nz = new_i[n];
244   ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
245   (*fact)->info.factor_mallocs = 0;
246 
247   ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
248 
249   af = ((double)b->nz)/((double)a->nz) + .001;
250   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af);
251   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af);
252   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af);
253   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n");
254 
255   ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr);
256 
257   PetscFunctionReturn(0);
258 #endif
259 }
260 
261 #undef __FUNCT__
262 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
263 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
264 {
265   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
266   IS             isicol;
267   PetscErrorCode ierr;
268   PetscInt       *r,*ic,i,n=A->m,*ai=a->i,*aj=a->j;
269   PetscInt       *bi,*bj,*ajtmp;
270   PetscInt       *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
271   PetscReal      f;
272   PetscInt       nlnk,*lnk,k,*cols,**bi_ptr;
273   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
274   PetscBT        lnkbt;
275 
276   PetscFunctionBegin;
277   if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
278   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
279   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
280   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
281 
282   /* get new row pointers */
283   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
284   bi[0] = 0;
285 
286   /* bdiag is location of diagonal in factor */
287   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
288   bdiag[0] = 0;
289 
290   /* linked list for storing column indices of the active row */
291   nlnk = n + 1;
292   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
293 
294   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt)+n*sizeof(PetscInt**),&cols);CHKERRQ(ierr);
295   im     = cols + n;
296   bi_ptr = (PetscInt**)(im + n);
297 
298   /* initial FreeSpace size is f*(ai[n]+1) */
299   f = info->fill;
300   ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
301   current_space = free_space;
302 
303   for (i=0; i<n; i++) {
304     /* copy previous fill into linked list */
305     nzi = 0;
306     nnz = ai[r[i]+1] - ai[r[i]];
307     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
308     ajtmp = aj + ai[r[i]];
309     for (k=0; k<nnz; k++) cols[k] = ic[*(ajtmp+k)]; /* note: cols is not sorted when iscol!=indentity */
310     ierr = PetscLLAdd(nnz,cols,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
311     nzi += nlnk;
312 
313     /* add pivot rows into linked list */
314     row = lnk[n];
315     while (row < i) {
316       nzbd    = bdiag[row] - bi[row] + 1;
317       ajtmp   = bi_ptr[row] + nzbd;
318       nnz     = im[row] - nzbd; /* num of columns with row<indices<=i */
319       im[row] = nzbd;
320       ierr = PetscLLAddSortedLU(nnz,ajtmp,row,nlnk,lnk,lnkbt,i,nzbd);CHKERRQ(ierr);
321       nzi     += nlnk;
322       im[row] += nzbd;  /* update im[row]: num of cols with index<=i */
323 
324       row = lnk[row];
325     }
326 
327     bi[i+1] = bi[i] + nzi;
328     im[i]   = nzi;
329 
330     /* mark bdiag */
331     nzbd = 0;
332     nnz  = nzi;
333     k    = lnk[n];
334     while (nnz-- && k < i){
335       nzbd++;
336       k = lnk[k];
337     }
338     bdiag[i] = bi[i] + nzbd;
339 
340     /* if free space is not available, make more free space */
341     if (current_space->local_remaining<nzi) {
342       nnz = (n - i)*nzi; /* estimated and max additional space needed */
343       ierr = GetMoreSpace(nnz,&current_space);CHKERRQ(ierr);
344       reallocs++;
345     }
346 
347     /* copy data into free space, then initialize lnk */
348     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
349     bi_ptr[i] = current_space->array;
350     current_space->array           += nzi;
351     current_space->local_used      += nzi;
352     current_space->local_remaining -= nzi;
353   }
354   if (ai[n] != 0) {
355     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
356     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
357     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af);
358     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af);
359     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n");
360   } else {
361     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n");
362   }
363 
364   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
365   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
366 
367   /* destroy list of free space and other temporary array(s) */
368   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
369   ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
370   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
371   ierr = PetscFree(cols);CHKERRQ(ierr);
372 
373   /* put together the new matrix */
374   ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr);
375   ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr);
376   ierr = MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);CHKERRQ(ierr);
377   ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr);
378   b    = (Mat_SeqAIJ*)(*B)->data;
379   ierr = PetscFree(b->imax);CHKERRQ(ierr);
380   b->singlemalloc = PETSC_FALSE;
381   /* the next line frees the default space generated by the Create() */
382   ierr = PetscFree(b->a);CHKERRQ(ierr);
383   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
384   ierr          = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
385   b->j          = bj;
386   b->i          = bi;
387   b->diag       = bdiag;
388   b->ilen       = 0;
389   b->imax       = 0;
390   b->row        = isrow;
391   b->col        = iscol;
392   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
393   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
394   b->icol       = isicol;
395   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
396 
397   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
398   ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
399   b->maxnz = b->nz = bi[n] ;
400 
401   (*B)->factor                 =  FACTOR_LU;
402   (*B)->info.factor_mallocs    = reallocs;
403   (*B)->info.fill_ratio_given  = f;
404 
405   if (ai[n] != 0) {
406     (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
407   } else {
408     (*B)->info.fill_ratio_needed = 0.0;
409   }
410   ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr);
411   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
412   PetscFunctionReturn(0);
413 }
414 
415 /* ----------------------------------------------------------- */
416 #undef __FUNCT__
417 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
418 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
419 {
420   Mat            C=*B;
421   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
422   IS             isrow = b->row,isicol = b->icol;
423   PetscErrorCode ierr;
424   PetscInt       *r,*ic,i,j,n=A->m,*bi=b->i,*bj=b->j;
425   PetscInt       *ajtmp,*bjtmp,nz,row,*ics;
426   PetscInt       *diag_offset = b->diag,diag,*pj;
427   PetscScalar    *rtmp,*v,*pc,multiplier,*pv,*rtmps;
428   PetscReal      rs,d;
429   PetscReal      row_shift;
430   LUShift_Ctx    sctx;
431   PetscInt       newshift;
432 
433   PetscFunctionBegin;
434   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
435   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
436   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
437   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
438   rtmps = rtmp; ics = ic;
439 
440   if (!a->diag) {
441     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
442   }
443   /* if both shift schemes are chosen by user, only use info->shiftpd */
444   if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0;
445   if (info->shiftpd) { /* set sctx.shift_top=max{row_shift} */
446     PetscInt *aai = a->i,*ddiag = a->diag;
447     sctx.shift_top = 0;
448     for (i=0; i<n; i++) {
449       d = PetscRealPart((a->a)[ddiag[i]]);
450       /* calculate amt of shift needed for this row */
451       if (d<=0) {
452         row_shift = 0;
453       } else {
454         row_shift = -2*PetscAbsScalar((a->a)[ddiag[i]]);
455       }
456       v  = a->a+aai[i];
457       nz = aai[i+1] - aai[i];
458       for (j=0; j<nz; j++)
459 	row_shift += PetscAbsScalar(v[j]);
460       if (row_shift>sctx.shift_top) sctx.shift_top = row_shift;
461     }
462     if (sctx.shift_top == 0.0) sctx.shift_top += 1.e-12;
463     sctx.shift_top    *= 1.1;
464     sctx.nshift_max   = 5;
465     sctx.shift_lo     = 0.;
466     sctx.shift_hi     = 1.;
467   }
468 
469   sctx.shift_amount = 0;
470   sctx.nshift       = 0;
471   do {
472     sctx.lushift = PETSC_FALSE;
473     for (i=0; i<n; i++){
474       nz    = bi[i+1] - bi[i];
475       bjtmp = bj + bi[i];
476       for  (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0;
477 
478       /* load in initial (unfactored row) */
479       nz    = a->i[r[i]+1] - a->i[r[i]];
480       ajtmp = a->j + a->i[r[i]];
481       v     = a->a + a->i[r[i]];
482       for (j=0; j<nz; j++) {
483         rtmp[ics[ajtmp[j]]] = v[j];
484       }
485       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
486 
487       row = *bjtmp++;
488       while  (row < i) {
489         pc = rtmp + row;
490         if (*pc != 0.0) {
491           pv         = b->a + diag_offset[row];
492           pj         = b->j + diag_offset[row] + 1;
493           multiplier = *pc / *pv++;
494           *pc        = multiplier;
495           nz         = bi[row+1] - diag_offset[row] - 1;
496           for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
497           ierr = PetscLogFlops(2*nz);CHKERRQ(ierr);
498         }
499         row = *bjtmp++;
500       }
501       /* finished row so stick it into b->a */
502       pv   = b->a + bi[i] ;
503       pj   = b->j + bi[i] ;
504       nz   = bi[i+1] - bi[i];
505       diag = diag_offset[i] - bi[i];
506       rs   = 0.0;
507       for (j=0; j<nz; j++) {
508         pv[j] = rtmps[pj[j]];
509         if (j != diag) rs += PetscAbsScalar(pv[j]);
510       }
511 
512       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
513       sctx.rs  = rs;
514       sctx.pv  = pv[diag];
515       ierr = MatLUCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr);
516       if (newshift == 1){
517         break;    /* sctx.shift_amount is updated */
518       } else if (newshift == -1){
519         SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs);
520       }
521     }
522 
523     if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
524       /*
525        * if no shift in this attempt & shifting & started shifting & can refine,
526        * then try lower shift
527        */
528       sctx.shift_hi        = info->shift_fraction;
529       info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
530       sctx.shift_amount    = info->shift_fraction * sctx.shift_top;
531       sctx.lushift         = PETSC_TRUE;
532       sctx.nshift++;
533     }
534   } while (sctx.lushift);
535 
536   /* invert diagonal entries for simplier triangular solves */
537   for (i=0; i<n; i++) {
538     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
539   }
540 
541   ierr = PetscFree(rtmp);CHKERRQ(ierr);
542   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
543   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
544   C->factor = FACTOR_LU;
545   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
546   C->assembled = PETSC_TRUE;
547   ierr = PetscLogFlops(C->n);CHKERRQ(ierr);
548   if (sctx.nshift){
549     if (info->shiftnz) {
550       PetscLogInfo(0,"MatLUFactorNumeric_SeqAIJ: number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount);
551     } else if (info->shiftpd) {
552       b->lu_shift_fraction = info->shift_fraction;
553       PetscLogInfo(0,"MatLUFactorNumeric_SeqAIJ: number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);
554     }
555   }
556   PetscFunctionReturn(0);
557 }
558 
559 #undef __FUNCT__
560 #define __FUNCT__ "MatUsePETSc_SeqAIJ"
561 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A)
562 {
563   PetscFunctionBegin;
564   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
565   A->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
566   PetscFunctionReturn(0);
567 }
568 
569 
570 /* ----------------------------------------------------------- */
571 #undef __FUNCT__
572 #define __FUNCT__ "MatLUFactor_SeqAIJ"
573 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
574 {
575   PetscErrorCode ierr;
576   Mat            C;
577 
578   PetscFunctionBegin;
579   ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr);
580   ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr);
581   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
582   ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr);
583   PetscFunctionReturn(0);
584 }
585 /* ----------------------------------------------------------- */
586 #undef __FUNCT__
587 #define __FUNCT__ "MatSolve_SeqAIJ"
588 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
589 {
590   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
591   IS             iscol = a->col,isrow = a->row;
592   PetscErrorCode ierr;
593   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
594   PetscInt       nz,*rout,*cout;
595   PetscScalar    *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
596 
597   PetscFunctionBegin;
598   if (!n) PetscFunctionReturn(0);
599 
600   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
601   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
602   tmp  = a->solve_work;
603 
604   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
605   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
606 
607   /* forward solve the lower triangular */
608   tmp[0] = b[*r++];
609   tmps   = tmp;
610   for (i=1; i<n; i++) {
611     v   = aa + ai[i] ;
612     vi  = aj + ai[i] ;
613     nz  = a->diag[i] - ai[i];
614     sum = b[*r++];
615     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
616     tmp[i] = sum;
617   }
618 
619   /* backward solve the upper triangular */
620   for (i=n-1; i>=0; i--){
621     v   = aa + a->diag[i] + 1;
622     vi  = aj + a->diag[i] + 1;
623     nz  = ai[i+1] - a->diag[i] - 1;
624     sum = tmp[i];
625     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
626     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
627   }
628 
629   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
630   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
631   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
632   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
633   ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr);
634   PetscFunctionReturn(0);
635 }
636 
637 /* ----------------------------------------------------------- */
638 #undef __FUNCT__
639 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
640 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
641 {
642   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
643   PetscErrorCode ierr;
644   PetscInt       n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag;
645   PetscScalar    *x,*b,*aa = a->a;
646 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
647   PetscInt       adiag_i,i,*vi,nz,ai_i;
648   PetscScalar    *v,sum;
649 #endif
650 
651   PetscFunctionBegin;
652   if (!n) PetscFunctionReturn(0);
653 
654   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
655   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
656 
657 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
658   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
659 #else
660   /* forward solve the lower triangular */
661   x[0] = b[0];
662   for (i=1; i<n; i++) {
663     ai_i = ai[i];
664     v    = aa + ai_i;
665     vi   = aj + ai_i;
666     nz   = adiag[i] - ai_i;
667     sum  = b[i];
668     while (nz--) sum -= *v++ * x[*vi++];
669     x[i] = sum;
670   }
671 
672   /* backward solve the upper triangular */
673   for (i=n-1; i>=0; i--){
674     adiag_i = adiag[i];
675     v       = aa + adiag_i + 1;
676     vi      = aj + adiag_i + 1;
677     nz      = ai[i+1] - adiag_i - 1;
678     sum     = x[i];
679     while (nz--) sum -= *v++ * x[*vi++];
680     x[i]    = sum*aa[adiag_i];
681   }
682 #endif
683   ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr);
684   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
685   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
686   PetscFunctionReturn(0);
687 }
688 
689 #undef __FUNCT__
690 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
691 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
692 {
693   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
694   IS             iscol = a->col,isrow = a->row;
695   PetscErrorCode ierr;
696   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
697   PetscInt       nz,*rout,*cout;
698   PetscScalar    *x,*b,*tmp,*aa = a->a,sum,*v;
699 
700   PetscFunctionBegin;
701   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
702 
703   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
704   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
705   tmp  = a->solve_work;
706 
707   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
708   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
709 
710   /* forward solve the lower triangular */
711   tmp[0] = b[*r++];
712   for (i=1; i<n; i++) {
713     v   = aa + ai[i] ;
714     vi  = aj + ai[i] ;
715     nz  = a->diag[i] - ai[i];
716     sum = b[*r++];
717     while (nz--) sum -= *v++ * tmp[*vi++ ];
718     tmp[i] = sum;
719   }
720 
721   /* backward solve the upper triangular */
722   for (i=n-1; i>=0; i--){
723     v   = aa + a->diag[i] + 1;
724     vi  = aj + a->diag[i] + 1;
725     nz  = ai[i+1] - a->diag[i] - 1;
726     sum = tmp[i];
727     while (nz--) sum -= *v++ * tmp[*vi++ ];
728     tmp[i] = sum*aa[a->diag[i]];
729     x[*c--] += tmp[i];
730   }
731 
732   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
733   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
734   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
735   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
736   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
737 
738   PetscFunctionReturn(0);
739 }
740 /* -------------------------------------------------------------------*/
741 #undef __FUNCT__
742 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
743 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
744 {
745   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
746   IS             iscol = a->col,isrow = a->row;
747   PetscErrorCode ierr;
748   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
749   PetscInt       nz,*rout,*cout,*diag = a->diag;
750   PetscScalar    *x,*b,*tmp,*aa = a->a,*v,s1;
751 
752   PetscFunctionBegin;
753   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
754   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
755   tmp  = a->solve_work;
756 
757   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
758   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
759 
760   /* copy the b into temp work space according to permutation */
761   for (i=0; i<n; i++) tmp[i] = b[c[i]];
762 
763   /* forward solve the U^T */
764   for (i=0; i<n; i++) {
765     v   = aa + diag[i] ;
766     vi  = aj + diag[i] + 1;
767     nz  = ai[i+1] - diag[i] - 1;
768     s1  = tmp[i];
769     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
770     while (nz--) {
771       tmp[*vi++ ] -= (*v++)*s1;
772     }
773     tmp[i] = s1;
774   }
775 
776   /* backward solve the L^T */
777   for (i=n-1; i>=0; i--){
778     v   = aa + diag[i] - 1 ;
779     vi  = aj + diag[i] - 1 ;
780     nz  = diag[i] - ai[i];
781     s1  = tmp[i];
782     while (nz--) {
783       tmp[*vi-- ] -= (*v--)*s1;
784     }
785   }
786 
787   /* copy tmp into x according to permutation */
788   for (i=0; i<n; i++) x[r[i]] = tmp[i];
789 
790   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
791   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
792   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
793   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
794 
795   ierr = PetscLogFlops(2*a->nz-A->n);CHKERRQ(ierr);
796   PetscFunctionReturn(0);
797 }
798 
799 #undef __FUNCT__
800 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
801 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
802 {
803   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
804   IS             iscol = a->col,isrow = a->row;
805   PetscErrorCode ierr;
806   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
807   PetscInt       nz,*rout,*cout,*diag = a->diag;
808   PetscScalar    *x,*b,*tmp,*aa = a->a,*v;
809 
810   PetscFunctionBegin;
811   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
812 
813   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
814   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
815   tmp = a->solve_work;
816 
817   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
818   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
819 
820   /* copy the b into temp work space according to permutation */
821   for (i=0; i<n; i++) tmp[i] = b[c[i]];
822 
823   /* forward solve the U^T */
824   for (i=0; i<n; i++) {
825     v   = aa + diag[i] ;
826     vi  = aj + diag[i] + 1;
827     nz  = ai[i+1] - diag[i] - 1;
828     tmp[i] *= *v++;
829     while (nz--) {
830       tmp[*vi++ ] -= (*v++)*tmp[i];
831     }
832   }
833 
834   /* backward solve the L^T */
835   for (i=n-1; i>=0; i--){
836     v   = aa + diag[i] - 1 ;
837     vi  = aj + diag[i] - 1 ;
838     nz  = diag[i] - ai[i];
839     while (nz--) {
840       tmp[*vi-- ] -= (*v--)*tmp[i];
841     }
842   }
843 
844   /* copy tmp into x according to permutation */
845   for (i=0; i<n; i++) x[r[i]] += tmp[i];
846 
847   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
848   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
849   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
850   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
851 
852   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
853   PetscFunctionReturn(0);
854 }
855 /* ----------------------------------------------------------------*/
856 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat);
857 
858 #undef __FUNCT__
859 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
860 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
861 {
862   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
863   IS             isicol;
864   PetscErrorCode ierr;
865   PetscInt       *r,*ic,n=A->m,*ai=a->i,*aj=a->j;
866   PetscInt       *bi,*cols,nnz,*cols_lvl;
867   PetscInt       *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0;
868   PetscInt       i,levels,diagonal_fill;
869   PetscTruth     col_identity,row_identity;
870   PetscReal      f;
871   PetscInt       nlnk,*lnk,*lnk_lvl=PETSC_NULL;
872   PetscBT        lnkbt;
873   PetscInt       nzi,*bj,**bj_ptr,**bjlvl_ptr;
874   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
875   FreeSpaceList  free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
876 
877   PetscFunctionBegin;
878   f             = info->fill;
879   levels        = (PetscInt)info->levels;
880   diagonal_fill = (PetscInt)info->diagonal_fill;
881   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
882 
883   /* special case that simply copies fill pattern */
884   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
885   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
886   if (!levels && row_identity && col_identity) {
887     ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
888     (*fact)->factor = FACTOR_LU;
889     b               = (Mat_SeqAIJ*)(*fact)->data;
890     if (!b->diag) {
891       ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
892     }
893     ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
894     b->row              = isrow;
895     b->col              = iscol;
896     b->icol             = isicol;
897     ierr                = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
898     (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
899     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
900     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
901     PetscFunctionReturn(0);
902   }
903 
904   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
905   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
906 
907   /* get new row pointers */
908   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
909   bi[0] = 0;
910   /* bdiag is location of diagonal in factor */
911   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
912   bdiag[0]  = 0;
913 
914   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr);
915   bjlvl_ptr = (PetscInt**)(bj_ptr + n);
916 
917   /* create a linked list for storing column indices of the active row */
918   nlnk = n + 1;
919   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
920 
921   /* initial FreeSpace size is f*(ai[n]+1) */
922   ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
923   current_space = free_space;
924   ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
925   current_space_lvl = free_space_lvl;
926 
927   for (i=0; i<n; i++) {
928     nzi = 0;
929     /* copy current row into linked list */
930     nnz  = ai[r[i]+1] - ai[r[i]];
931     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
932     cols = aj + ai[r[i]];
933     lnk[i] = -1; /* marker to indicate if diagonal exists */
934     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
935     nzi += nlnk;
936 
937     /* make sure diagonal entry is included */
938     if (diagonal_fill && lnk[i] == -1) {
939       fm = n;
940       while (lnk[fm] < i) fm = lnk[fm];
941       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
942       lnk[fm]    = i;
943       lnk_lvl[i] = 0;
944       nzi++; dcount++;
945     }
946 
947     /* add pivot rows into the active row */
948     nzbd = 0;
949     prow = lnk[n];
950     while (prow < i) {
951       nnz      = bdiag[prow];
952       cols     = bj_ptr[prow] + nnz + 1;
953       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
954       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
955       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
956       nzi += nlnk;
957       prow = lnk[prow];
958       nzbd++;
959     }
960     bdiag[i] = nzbd;
961     bi[i+1]  = bi[i] + nzi;
962 
963     /* if free space is not available, make more free space */
964     if (current_space->local_remaining<nzi) {
965       nnz = nzi*(n - i); /* estimated and max additional space needed */
966       ierr = GetMoreSpace(nnz,&current_space);CHKERRQ(ierr);
967       ierr = GetMoreSpace(nnz,&current_space_lvl);CHKERRQ(ierr);
968       reallocs++;
969     }
970 
971     /* copy data into free_space and free_space_lvl, then initialize lnk */
972     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
973     bj_ptr[i]    = current_space->array;
974     bjlvl_ptr[i] = current_space_lvl->array;
975 
976     /* make sure the active row i has diagonal entry */
977     if (*(bj_ptr[i]+bdiag[i]) != i) {
978       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
979     try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",i);
980     }
981 
982     current_space->array           += nzi;
983     current_space->local_used      += nzi;
984     current_space->local_remaining -= nzi;
985     current_space_lvl->array           += nzi;
986     current_space_lvl->local_used      += nzi;
987     current_space_lvl->local_remaining -= nzi;
988   }
989 
990   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
991   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
992 
993   /* destroy list of free space and other temporary arrays */
994   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
995   ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
996   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
997   ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr);
998   ierr = PetscFree(bj_ptr);CHKERRQ(ierr);
999 
1000   {
1001     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1002     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
1003     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af);
1004     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af);
1005     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n");
1006     if (diagonal_fill) {
1007       PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount);
1008     }
1009   }
1010 
1011   /* put together the new matrix */
1012   ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr);
1013   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
1014   ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr);
1015   ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr);
1016   b = (Mat_SeqAIJ*)(*fact)->data;
1017   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1018   b->singlemalloc = PETSC_FALSE;
1019   len = (bi[n] )*sizeof(PetscScalar);
1020   /* the next line frees the default space generated by the Create() */
1021   ierr = PetscFree(b->a);CHKERRQ(ierr);
1022   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1023   ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr);
1024   b->j          = bj;
1025   b->i          = bi;
1026   for (i=0; i<n; i++) bdiag[i] += bi[i];
1027   b->diag       = bdiag;
1028   b->ilen       = 0;
1029   b->imax       = 0;
1030   b->row        = isrow;
1031   b->col        = iscol;
1032   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1033   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1034   b->icol       = isicol;
1035   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1036   /* In b structure:  Free imax, ilen, old a, old j.
1037      Allocate bdiag, solve_work, new a, new j */
1038   ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
1039   b->maxnz             = b->nz = bi[n] ;
1040   (*fact)->factor = FACTOR_LU;
1041   (*fact)->info.factor_mallocs    = reallocs;
1042   (*fact)->info.fill_ratio_given  = f;
1043   (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1044 
1045   ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr);
1046   (*fact)->ops->lufactornumeric =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
1047 
1048   PetscFunctionReturn(0);
1049 }
1050 
1051 #include "src/mat/impls/sbaij/seq/sbaij.h"
1052 #undef __FUNCT__
1053 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
1054 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
1055 {
1056   Mat            C = *B;
1057   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1058   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
1059   IS             ip=b->row;
1060   PetscErrorCode ierr;
1061   PetscInt       *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol;
1062   PetscInt       *ai=a->i,*aj=a->j;
1063   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1064   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1065   PetscReal      zeropivot,rs,shiftnz;
1066   PetscTruth     shiftpd;
1067   ChShift_Ctx    sctx;
1068   PetscInt       newshift;
1069 
1070   PetscFunctionBegin;
1071   shiftnz   = info->shiftnz;
1072   shiftpd   = info->shiftpd;
1073   zeropivot = info->zeropivot;
1074 
1075   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
1076 
1077   /* initialization */
1078   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1079   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
1080   jl   = il + mbs;
1081   rtmp = (MatScalar*)(jl + mbs);
1082 
1083   sctx.shift_amount = 0;
1084   sctx.nshift       = 0;
1085   do {
1086     sctx.chshift = PETSC_FALSE;
1087     for (i=0; i<mbs; i++) {
1088       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1089     }
1090 
1091     for (k = 0; k<mbs; k++){
1092       bval = ba + bi[k];
1093       /* initialize k-th row by the perm[k]-th row of A */
1094       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1095       for (j = jmin; j < jmax; j++){
1096         col = rip[aj[j]];
1097         if (col >= k){ /* only take upper triangular entry */
1098           rtmp[col] = aa[j];
1099           *bval++  = 0.0; /* for in-place factorization */
1100         }
1101       }
1102       /* shift the diagonal of the matrix */
1103       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1104 
1105       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1106       dk = rtmp[k];
1107       i = jl[k]; /* first row to be added to k_th row  */
1108 
1109       while (i < k){
1110         nexti = jl[i]; /* next row to be added to k_th row */
1111 
1112         /* compute multiplier, update diag(k) and U(i,k) */
1113         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1114         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
1115         dk += uikdi*ba[ili];
1116         ba[ili] = uikdi; /* -U(i,k) */
1117 
1118         /* add multiple of row i to k-th row */
1119         jmin = ili + 1; jmax = bi[i+1];
1120         if (jmin < jmax){
1121           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1122           /* update il and jl for row i */
1123           il[i] = jmin;
1124           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1125         }
1126         i = nexti;
1127       }
1128 
1129       /* shift the diagonals when zero pivot is detected */
1130       /* compute rs=sum of abs(off-diagonal) */
1131       rs   = 0.0;
1132       jmin = bi[k]+1;
1133       nz   = bi[k+1] - jmin;
1134       if (nz){
1135         bcol = bj + jmin;
1136         while (nz--){
1137           rs += PetscAbsScalar(rtmp[*bcol]);
1138           bcol++;
1139         }
1140       }
1141 
1142       sctx.rs = rs;
1143       sctx.pv = dk;
1144       ierr = MatCholeskyCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr);
1145       if (newshift == 1){
1146         break;    /* sctx.shift_amount is updated */
1147       } else if (newshift == -1){
1148         SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs);
1149       }
1150 
1151       /* copy data into U(k,:) */
1152       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1153       jmin = bi[k]+1; jmax = bi[k+1];
1154       if (jmin < jmax) {
1155         for (j=jmin; j<jmax; j++){
1156           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1157         }
1158         /* add the k-th row into il and jl */
1159         il[k] = jmin;
1160         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1161       }
1162     }
1163   } while (sctx.chshift);
1164   ierr = PetscFree(il);CHKERRQ(ierr);
1165 
1166   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
1167   C->factor       = FACTOR_CHOLESKY;
1168   C->assembled    = PETSC_TRUE;
1169   C->preallocated = PETSC_TRUE;
1170   ierr = PetscLogFlops(C->m);CHKERRQ(ierr);
1171   if (sctx.nshift){
1172     if (shiftnz) {
1173       PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount);
1174     } else if (shiftpd) {
1175       PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount);
1176     }
1177   }
1178   PetscFunctionReturn(0);
1179 }
1180 
1181 #undef __FUNCT__
1182 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
1183 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1184 {
1185   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1186   Mat_SeqSBAIJ   *b;
1187   Mat            B;
1188   PetscErrorCode ierr;
1189   PetscTruth     perm_identity;
1190   PetscInt       reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui;
1191   PetscInt       jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1192   PetscInt       nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1193   PetscInt       ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
1194   PetscReal      fill=info->fill,levels=info->levels;
1195   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1196   FreeSpaceList  free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1197   PetscBT        lnkbt;
1198 
1199   PetscFunctionBegin;
1200   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1201   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1202 
1203   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1204   ui[0] = 0;
1205 
1206   /* special case that simply copies fill pattern */
1207   if (!levels && perm_identity) {
1208     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
1209     for (i=0; i<am; i++) {
1210       ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1211     }
1212     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1213     cols = uj;
1214     for (i=0; i<am; i++) {
1215       aj    = a->j + a->diag[i];
1216       ncols = ui[i+1] - ui[i];
1217       for (j=0; j<ncols; j++) *cols++ = *aj++;
1218     }
1219   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1220     /* initialization */
1221     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
1222 
1223     /* jl: linked list for storing indices of the pivot rows
1224        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1225     ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1226     il         = jl + am;
1227     uj_ptr     = (PetscInt**)(il + am);
1228     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1229     for (i=0; i<am; i++){
1230       jl[i] = am; il[i] = 0;
1231     }
1232 
1233     /* create and initialize a linked list for storing column indices of the active row k */
1234     nlnk = am + 1;
1235     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1236 
1237     /* initial FreeSpace size is fill*(ai[am]+1) */
1238     ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1239     current_space = free_space;
1240     ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
1241     current_space_lvl = free_space_lvl;
1242 
1243     for (k=0; k<am; k++){  /* for each active row k */
1244       /* initialize lnk by the column indices of row rip[k] of A */
1245       nzk   = 0;
1246       ncols = ai[rip[k]+1] - ai[rip[k]];
1247       ncols_upper = 0;
1248       for (j=0; j<ncols; j++){
1249         i = *(aj + ai[rip[k]] + j);
1250         if (rip[i] >= k){ /* only take upper triangular entry */
1251           ajtmp[ncols_upper] = i;
1252           ncols_upper++;
1253         }
1254       }
1255       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1256       nzk += nlnk;
1257 
1258       /* update lnk by computing fill-in for each pivot row to be merged in */
1259       prow = jl[k]; /* 1st pivot row */
1260 
1261       while (prow < k){
1262         nextprow = jl[prow];
1263 
1264         /* merge prow into k-th row */
1265         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1266         jmax = ui[prow+1];
1267         ncols = jmax-jmin;
1268         i     = jmin - ui[prow];
1269         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1270         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
1271         j     = *(uj - 1);
1272         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
1273         nzk += nlnk;
1274 
1275         /* update il and jl for prow */
1276         if (jmin < jmax){
1277           il[prow] = jmin;
1278           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1279         }
1280         prow = nextprow;
1281       }
1282 
1283       /* if free space is not available, make more free space */
1284       if (current_space->local_remaining<nzk) {
1285         i = am - k + 1; /* num of unfactored rows */
1286         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1287         ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
1288         ierr = GetMoreSpace(i,&current_space_lvl);CHKERRQ(ierr);
1289         reallocs++;
1290       }
1291 
1292       /* copy data into free_space and free_space_lvl, then initialize lnk */
1293       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1294 
1295       /* add the k-th row into il and jl */
1296       if (nzk > 1){
1297         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1298         jl[k] = jl[i]; jl[i] = k;
1299         il[k] = ui[k] + 1;
1300       }
1301       uj_ptr[k]     = current_space->array;
1302       uj_lvl_ptr[k] = current_space_lvl->array;
1303 
1304       current_space->array           += nzk;
1305       current_space->local_used      += nzk;
1306       current_space->local_remaining -= nzk;
1307 
1308       current_space_lvl->array           += nzk;
1309       current_space_lvl->local_used      += nzk;
1310       current_space_lvl->local_remaining -= nzk;
1311 
1312       ui[k+1] = ui[k] + nzk;
1313     }
1314 
1315     if (ai[am] != 0) {
1316       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1317       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af);
1318       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af);
1319       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af);
1320     } else {
1321       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n");
1322     }
1323 
1324     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1325     ierr = PetscFree(jl);CHKERRQ(ierr);
1326     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
1327 
1328     /* destroy list of free space and other temporary array(s) */
1329     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1330     ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1331     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1332     ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr);
1333 
1334   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
1335 
1336   /* put together the new matrix in MATSEQSBAIJ format */
1337   ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr);
1338   B = *fact;
1339   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1340   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
1341 
1342   b    = (Mat_SeqSBAIJ*)B->data;
1343   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1344   b->singlemalloc = PETSC_FALSE;
1345   /* the next line frees the default space generated by the Create() */
1346   ierr = PetscFree(b->a);CHKERRQ(ierr);
1347   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1348   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1349   b->j    = uj;
1350   b->i    = ui;
1351   b->diag = 0;
1352   b->ilen = 0;
1353   b->imax = 0;
1354   b->row  = perm;
1355   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1356   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1357   b->icol = perm;
1358   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1359   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1360   ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1361   b->maxnz = b->nz = ui[am];
1362 
1363   B->factor                 = FACTOR_CHOLESKY;
1364   B->info.factor_mallocs    = reallocs;
1365   B->info.fill_ratio_given  = fill;
1366   if (ai[am] != 0) {
1367     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1368   } else {
1369     B->info.fill_ratio_needed = 0.0;
1370   }
1371   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1372   if (perm_identity){
1373     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1374     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1375   }
1376   PetscFunctionReturn(0);
1377 }
1378 
1379 #undef __FUNCT__
1380 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
1381 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1382 {
1383   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1384   Mat_SeqSBAIJ   *b;
1385   Mat            B;
1386   PetscErrorCode ierr;
1387   PetscTruth     perm_identity;
1388   PetscReal      fill = info->fill;
1389   PetscInt       *rip,*riip,i,am=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow;
1390   PetscInt       *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1391   PetscInt       nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1392   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1393   PetscBT        lnkbt;
1394   IS             iperm;
1395 
1396   PetscFunctionBegin;
1397   /* check whether perm is the identity mapping */
1398   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1399   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1400 
1401   if (!perm_identity){
1402     /* check if perm is symmetric! */
1403     ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1404     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1405     for (i=0; i<am; i++) {
1406       if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation");
1407     }
1408     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1409     ierr = ISDestroy(iperm);CHKERRQ(ierr);
1410   }
1411 
1412   /* initialization */
1413   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1414   ui[0] = 0;
1415 
1416   /* jl: linked list for storing indices of the pivot rows
1417      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1418   ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1419   il     = jl + am;
1420   cols   = il + am;
1421   ui_ptr = (PetscInt**)(cols + am);
1422   for (i=0; i<am; i++){
1423     jl[i] = am; il[i] = 0;
1424   }
1425 
1426   /* create and initialize a linked list for storing column indices of the active row k */
1427   nlnk = am + 1;
1428   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1429 
1430   /* initial FreeSpace size is fill*(ai[am]+1) */
1431   ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1432   current_space = free_space;
1433 
1434   for (k=0; k<am; k++){  /* for each active row k */
1435     /* initialize lnk by the column indices of row rip[k] of A */
1436     nzk   = 0;
1437     ncols = ai[rip[k]+1] - ai[rip[k]];
1438     ncols_upper = 0;
1439     for (j=0; j<ncols; j++){
1440       i = rip[*(aj + ai[rip[k]] + j)];
1441       if (i >= k){ /* only take upper triangular entry */
1442         cols[ncols_upper] = i;
1443         ncols_upper++;
1444       }
1445     }
1446     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1447     nzk += nlnk;
1448 
1449     /* update lnk by computing fill-in for each pivot row to be merged in */
1450     prow = jl[k]; /* 1st pivot row */
1451 
1452     while (prow < k){
1453       nextprow = jl[prow];
1454       /* merge prow into k-th row */
1455       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1456       jmax = ui[prow+1];
1457       ncols = jmax-jmin;
1458       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1459       ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1460       nzk += nlnk;
1461 
1462       /* update il and jl for prow */
1463       if (jmin < jmax){
1464         il[prow] = jmin;
1465         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1466       }
1467       prow = nextprow;
1468     }
1469 
1470     /* if free space is not available, make more free space */
1471     if (current_space->local_remaining<nzk) {
1472       i = am - k + 1; /* num of unfactored rows */
1473       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1474       ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
1475       reallocs++;
1476     }
1477 
1478     /* copy data into free space, then initialize lnk */
1479     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1480 
1481     /* add the k-th row into il and jl */
1482     if (nzk-1 > 0){
1483       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1484       jl[k] = jl[i]; jl[i] = k;
1485       il[k] = ui[k] + 1;
1486     }
1487     ui_ptr[k] = current_space->array;
1488     current_space->array           += nzk;
1489     current_space->local_used      += nzk;
1490     current_space->local_remaining -= nzk;
1491 
1492     ui[k+1] = ui[k] + nzk;
1493   }
1494 
1495   if (ai[am] != 0) {
1496     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
1497     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af);
1498     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af);
1499     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af);
1500   } else {
1501      PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n");
1502   }
1503 
1504   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1505   ierr = PetscFree(jl);CHKERRQ(ierr);
1506 
1507   /* destroy list of free space and other temporary array(s) */
1508   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1509   ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1510   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1511 
1512   /* put together the new matrix in MATSEQSBAIJ format */
1513   ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr);
1514   B    = *fact;
1515   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1516   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
1517 
1518   b = (Mat_SeqSBAIJ*)B->data;
1519   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1520   b->singlemalloc = PETSC_FALSE;
1521   /* the next line frees the default space generated by the Create() */
1522   ierr = PetscFree(b->a);CHKERRQ(ierr);
1523   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1524   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1525   b->j    = uj;
1526   b->i    = ui;
1527   b->diag = 0;
1528   b->ilen = 0;
1529   b->imax = 0;
1530   b->row  = perm;
1531   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1532   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1533   b->icol = perm;
1534   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1535   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1536   ierr    = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1537   b->maxnz = b->nz = ui[am];
1538 
1539   B->factor                 = FACTOR_CHOLESKY;
1540   B->info.factor_mallocs    = reallocs;
1541   B->info.fill_ratio_given  = fill;
1542   if (ai[am] != 0) {
1543     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1544   } else {
1545     B->info.fill_ratio_needed = 0.0;
1546   }
1547   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1548   if (perm_identity){
1549     (*fact)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1550     (*fact)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1551   }
1552   PetscFunctionReturn(0);
1553 }
1554