xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision 540703ac956e6d314e3cfa00dc59d2fd52f10e40)
1 
2 #include "src/mat/impls/aij/seq/aij.h"
3 #include "src/inline/dot.h"
4 #include "src/inline/spops.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ"
8 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
9 {
10   PetscFunctionBegin;
11 
12   SETERRQ(PETSC_ERR_SUP,"Code not written");
13 #if !defined(PETSC_USE_DEBUG)
14   PetscFunctionReturn(0);
15 #endif
16 }
17 
18 
19 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat);
20 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth);
21 
22 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
23 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
24 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*);
25 
26 #undef __FUNCT__
27 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
28   /* ------------------------------------------------------------
29 
30           This interface was contribed by Tony Caola
31 
32      This routine is an interface to the pivoting drop-tolerance
33      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
34      SPARSEKIT2.
35 
36      The SPARSEKIT2 routines used here are covered by the GNU
37      copyright; see the file gnu in this directory.
38 
39      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
40      help in getting this routine ironed out.
41 
42      The major drawback to this routine is that if info->fill is
43      not large enough it fails rather than allocating more space;
44      this can be fixed by hacking/improving the f2c version of
45      Yousef Saad's code.
46 
47      ------------------------------------------------------------
48 */
49 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact)
50 {
51 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
52   PetscFunctionBegin;
53   SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
54   You can obtain the drop tolerance routines by installing PETSc from\n\
55   www.mcs.anl.gov/petsc\n");
56 #else
57   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
58   IS             iscolf,isicol,isirow;
59   PetscTruth     reorder;
60   PetscErrorCode ierr,sierr;
61   PetscInt       *c,*r,*ic,i,n = A->m;
62   PetscInt       *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
63   PetscInt       *ordcol,*iwk,*iperm,*jw;
64   PetscInt       jmax,lfill,job,*o_i,*o_j;
65   PetscScalar    *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
66   PetscReal      af;
67 
68   PetscFunctionBegin;
69 
70   if (info->dt == PETSC_DEFAULT)      info->dt      = .005;
71   if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax);
72   if (info->dtcol == PETSC_DEFAULT)   info->dtcol   = .01;
73   if (info->fill == PETSC_DEFAULT)    info->fill    = ((double)(n*(info->dtcount+1)))/a->nz;
74   lfill   = (PetscInt)(info->dtcount/2.0);
75   jmax    = (PetscInt)(info->fill*a->nz);
76 
77 
78   /* ------------------------------------------------------------
79      If reorder=.TRUE., then the original matrix has to be
80      reordered to reflect the user selected ordering scheme, and
81      then de-reordered so it is in it's original format.
82      Because Saad's dperm() is NOT in place, we have to copy
83      the original matrix and allocate more storage. . .
84      ------------------------------------------------------------
85   */
86 
87   /* set reorder to true if either isrow or iscol is not identity */
88   ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr);
89   if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);}
90   reorder = PetscNot(reorder);
91 
92 
93   /* storage for ilu factor */
94   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr);
95   ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr);
96   ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr);
97   ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr);
98 
99   /* ------------------------------------------------------------
100      Make sure that everything is Fortran formatted (1-Based)
101      ------------------------------------------------------------
102   */
103   for (i=old_i[0];i<old_i[n];i++) {
104     old_j[i]++;
105   }
106   for(i=0;i<n+1;i++) {
107     old_i[i]++;
108   };
109 
110 
111   if (reorder) {
112     ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr);
113     ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
114     for(i=0;i<n;i++) {
115       r[i]  = r[i]+1;
116       c[i]  = c[i]+1;
117     }
118     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr);
119     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr);
120     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr);
121     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
122     for (i=0;i<n;i++) {
123       r[i]  = r[i]-1;
124       c[i]  = c[i]-1;
125     }
126     ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr);
127     ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
128     o_a = old_a2;
129     o_j = old_j2;
130     o_i = old_i2;
131   } else {
132     o_a = old_a;
133     o_j = old_j;
134     o_i = old_i;
135   }
136 
137   /* ------------------------------------------------------------
138      Call Saad's ilutp() routine to generate the factorization
139      ------------------------------------------------------------
140   */
141 
142   ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr);
143   ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr);
144   ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr);
145 
146   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
147   if (sierr) {
148     switch (sierr) {
149       case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
150       case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
151       case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
152       case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
153       case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax);
154       default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
155     }
156   }
157 
158   ierr = PetscFree(w);CHKERRQ(ierr);
159   ierr = PetscFree(jw);CHKERRQ(ierr);
160 
161   /* ------------------------------------------------------------
162      Saad's routine gives the result in Modified Sparse Row (msr)
163      Convert to Compressed Sparse Row format (csr)
164      ------------------------------------------------------------
165   */
166 
167   ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr);
168   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr);
169 
170   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
171 
172   ierr = PetscFree(iwk);CHKERRQ(ierr);
173   ierr = PetscFree(wk);CHKERRQ(ierr);
174 
175   if (reorder) {
176     ierr = PetscFree(old_a2);CHKERRQ(ierr);
177     ierr = PetscFree(old_j2);CHKERRQ(ierr);
178     ierr = PetscFree(old_i2);CHKERRQ(ierr);
179   } else {
180     /* fix permutation of old_j that the factorization introduced */
181     for (i=old_i[0]; i<old_i[n]; i++) {
182       old_j[i-1] = iperm[old_j[i-1]-1];
183     }
184   }
185 
186   /* get rid of the shift to indices starting at 1 */
187   for (i=0; i<n+1; i++) {
188     old_i[i]--;
189   }
190   for (i=old_i[0];i<old_i[n];i++) {
191     old_j[i]--;
192   }
193 
194   /* Make the factored matrix 0-based */
195   for (i=0; i<n+1; i++) {
196     new_i[i]--;
197   }
198   for (i=new_i[0];i<new_i[n];i++) {
199     new_j[i]--;
200   }
201 
202   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
203   /*-- permute the right-hand-side and solution vectors           --*/
204   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
205   ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr);
206   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
207   for(i=0; i<n; i++) {
208     ordcol[i] = ic[iperm[i]-1];
209   };
210   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
211   ierr = ISDestroy(isicol);CHKERRQ(ierr);
212 
213   ierr = PetscFree(iperm);CHKERRQ(ierr);
214 
215   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr);
216   ierr = PetscFree(ordcol);CHKERRQ(ierr);
217 
218   /*----- put together the new matrix -----*/
219 
220   ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr);
221   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
222   ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr);
223   (*fact)->factor    = FACTOR_LU;
224   (*fact)->assembled = PETSC_TRUE;
225 
226   b = (Mat_SeqAIJ*)(*fact)->data;
227   ierr = PetscFree(b->imax);CHKERRQ(ierr);
228   b->sorted        = PETSC_FALSE;
229   b->singlemalloc  = PETSC_FALSE;
230   /* the next line frees the default space generated by the MatCreate() */
231   ierr             = PetscFree(b->a);CHKERRQ(ierr);
232   ierr             = PetscFree(b->ilen);CHKERRQ(ierr);
233   b->a             = new_a;
234   b->j             = new_j;
235   b->i             = new_i;
236   b->ilen          = 0;
237   b->imax          = 0;
238   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
239   b->row           = isirow;
240   b->col           = iscolf;
241   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
242   b->maxnz = b->nz = new_i[n];
243   ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
244   (*fact)->info.factor_mallocs = 0;
245 
246   ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
247 
248   /* check out for identical nodes. If found, use inode functions */
249   ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr);
250 
251   af = ((double)b->nz)/((double)a->nz) + .001;
252   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af);
253   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af);
254   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af);
255   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n");
256 
257   PetscFunctionReturn(0);
258 #endif
259 }
260 
261 /*
262     Factorization code for AIJ format.
263 */
264 #undef __FUNCT__
265 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
266 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
267 {
268   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
269   IS             isicol;
270   PetscErrorCode ierr;
271   PetscInt       *r,*ic,i,n = A->m,*ai = a->i,*aj = a->j;
272   PetscInt       *ainew,*ajnew,jmax,*fill,*ajtmp,nz;
273   PetscInt       *idnew,idx,row,m,fm,nnz,nzi,reallocs = 0,nzbd,*im;
274   PetscReal      f;
275 
276   PetscFunctionBegin;
277   if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
278   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
279   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
280   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
281 
282   /* get new row pointers */
283   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr);
284   ainew[0] = 0;
285   /* don't know how many column pointers are needed so estimate */
286   f = info->fill;
287   jmax  = (PetscInt)(f*ai[n]+1);
288   ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr);
289   /* fill is a linked list of nonzeros in active row */
290   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr);
291   im   = fill + n + 1;
292   /* idnew is location of diagonal in factor */
293   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&idnew);CHKERRQ(ierr);
294   idnew[0] = 0;
295 
296   for (i=0; i<n; i++) {
297     /* first copy previous fill into linked list */
298     nnz     = nz    = ai[r[i]+1] - ai[r[i]];
299     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
300     ajtmp   = aj + ai[r[i]];
301     fill[n] = n;
302     while (nz--) {
303       fm  = n;
304       idx = ic[*ajtmp++];
305       do {
306         m  = fm;
307         fm = fill[m];
308       } while (fm < idx);
309       fill[m]   = idx;
310       fill[idx] = fm;
311     }
312     row = fill[n];
313     while (row < i) {
314       ajtmp = ajnew + idnew[row] + 1;
315       nzbd  = 1 + idnew[row] - ainew[row];
316       nz    = im[row] - nzbd;
317       fm    = row;
318       while (nz-- > 0) {
319         idx = *ajtmp++ ;
320         nzbd++;
321         if (idx == i) im[row] = nzbd;
322         do {
323           m  = fm;
324           fm = fill[m];
325         } while (fm < idx);
326         if (fm != idx) {
327           fill[m]   = idx;
328           fill[idx] = fm;
329           fm        = idx;
330           nnz++;
331         }
332       }
333       row = fill[row];
334     }
335     /* copy new filled row into permanent storage */
336     ainew[i+1] = ainew[i] + nnz;
337     if (ainew[i+1] > jmax) {
338       /* estimate how much additional space we will need */
339       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
340       /* just double the memory each time */
341       PetscInt maxadd = jmax;
342       /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */
343       if (maxadd < nnz) maxadd = (n-i)*(nnz+1);
344       jmax += maxadd;
345 
346       /* allocate a longer ajnew */
347       ierr  = PetscMalloc(jmax*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
348       ierr  = PetscMemcpy(ajtmp,ajnew,(ainew[i])*sizeof(PetscInt));CHKERRQ(ierr);
349       ierr  = PetscFree(ajnew);CHKERRQ(ierr);
350       ajnew = ajtmp;
351       reallocs++; /* count how many times we realloc */
352     }
353     ajtmp = ajnew + ainew[i];
354     fm    = fill[n];
355     nzi   = 0;
356     im[i] = nnz;
357     while (nnz--) {
358       if (fm < i) nzi++;
359       *ajtmp++ = fm ;
360       fm       = fill[fm];
361     }
362     idnew[i] = ainew[i] + nzi;
363   }
364   if (ai[n] != 0) {
365     PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
366     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
367     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af);
368     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af);
369     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n");
370   } else {
371     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n");
372   }
373 
374   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
375   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
376 
377   ierr = PetscFree(fill);CHKERRQ(ierr);
378 
379   /* put together the new matrix */
380   ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr);
381   ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr);
382   ierr = MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);CHKERRQ(ierr);
383   PetscLogObjectParent(*B,isicol);
384   b    = (Mat_SeqAIJ*)(*B)->data;
385   ierr = PetscFree(b->imax);CHKERRQ(ierr);
386   b->singlemalloc = PETSC_FALSE;
387   /* the next line frees the default space generated by the Create() */
388   ierr = PetscFree(b->a);CHKERRQ(ierr);
389   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
390   ierr          = PetscMalloc((ainew[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
391   b->j          = ajnew;
392   b->i          = ainew;
393   b->diag       = idnew;
394   b->ilen       = 0;
395   b->imax       = 0;
396   b->row        = isrow;
397   b->col        = iscol;
398   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
399   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
400   b->icol       = isicol;
401   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
402   /* In b structure:  Free imax, ilen, old a, old j.
403      Allocate idnew, solve_work, new a, new j */
404   PetscLogObjectMemory(*B,(ainew[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));
405   b->maxnz = b->nz = ainew[n] ;
406 
407   (*B)->factor                 =  FACTOR_LU;
408   (*B)->info.factor_mallocs    = reallocs;
409   (*B)->info.fill_ratio_given  = f;
410   ierr = Mat_AIJ_CheckInode(*B,PETSC_FALSE);CHKERRQ(ierr);
411   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
412 
413   if (ai[n] != 0) {
414     (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
415   } else {
416     (*B)->info.fill_ratio_needed = 0.0;
417   }
418   PetscFunctionReturn(0);
419 }
420 /* ----------------------------------------------------------- */
421 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth);
422 
423 #undef __FUNCT__
424 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
425 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
426 {
427   Mat            C=*B;
428   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
429   IS             isrow = b->row,isicol = b->icol;
430   PetscErrorCode ierr;
431   PetscInt       *r,*ic,i,j,n=A->m,*bi=b->i,*bj=b->j;
432   PetscInt       *ajtmp,*bjtmp,nz,row,*ics;
433   PetscInt       *diag_offset = b->diag,diag,*pj;
434   PetscScalar    *rtmp,*v,*pc,multiplier,*pv,*rtmps;
435   PetscReal      zeropivot,rs,d,shift_nz;
436   PetscReal      row_shift,shift_top=0.;
437   PetscTruth     shift_pd;
438   LUShift_Ctx    sctx;
439   PetscInt       newshift;
440 
441   PetscFunctionBegin;
442   shift_nz  = info->shiftnz;
443   shift_pd  = info->shiftpd;
444   zeropivot = info->zeropivot;
445 
446   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
447   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
448   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
449   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
450   rtmps = rtmp; ics = ic;
451 
452   if (!a->diag) {
453     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
454   }
455   /* if both shift schemes are chosen by user, only use shift_pd */
456   if (shift_pd && shift_nz) shift_nz = 0.0;
457   if (shift_pd) { /* set shift_top=max{row_shift} */
458     PetscInt *aai = a->i,*ddiag = a->diag;
459     shift_top = 0;
460     for (i=0; i<n; i++) {
461       d = PetscAbsScalar((a->a)[ddiag[i]]);
462       /* calculate amt of shift needed for this row */
463       if (d<=0) {
464         row_shift = 0;
465       } else {
466         row_shift = -2*d;
467       }
468       v  = a->a+aai[i];
469       nz = aai[i+1] - aai[i];
470       for (j=0; j<nz; j++)
471 	row_shift += PetscAbsScalar(v[j]);
472       if (row_shift>shift_top) shift_top = row_shift;
473     }
474   }
475 
476   sctx.shift_amount = 0;
477   sctx.shift_top    = shift_top;
478   sctx.nshift       = 0;
479   sctx.nshift_max   = 5;
480   sctx.shift_lo     = 0.;
481   sctx.shift_hi     = 1.;
482   do {
483     sctx.lushift = PETSC_FALSE;
484     for (i=0; i<n; i++){
485       nz    = bi[i+1] - bi[i];
486       bjtmp = bj + bi[i];
487       for  (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0;
488 
489       /* load in initial (unfactored row) */
490       nz    = a->i[r[i]+1] - a->i[r[i]];
491       ajtmp = a->j + a->i[r[i]];
492       v     = a->a + a->i[r[i]];
493       for (j=0; j<nz; j++) {
494         rtmp[ics[ajtmp[j]]] = v[j];
495       }
496       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
497 
498       row = *bjtmp++;
499       while  (row < i) {
500         pc = rtmp + row;
501         if (*pc != 0.0) {
502           pv         = b->a + diag_offset[row];
503           pj         = b->j + diag_offset[row] + 1;
504           multiplier = *pc / *pv++;
505           *pc        = multiplier;
506           nz         = bi[row+1] - diag_offset[row] - 1;
507           for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
508           PetscLogFlops(2*nz);
509         }
510         row = *bjtmp++;
511       }
512       /* finished row so stick it into b->a */
513       pv   = b->a + bi[i] ;
514       pj   = b->j + bi[i] ;
515       nz   = bi[i+1] - bi[i];
516       diag = diag_offset[i] - bi[i];
517       rs   = 0.0;
518       for (j=0; j<nz; j++) {
519         pv[j] = rtmps[pj[j]];
520         if (j != diag) rs += PetscAbsScalar(pv[j]);
521       }
522 
523       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
524       sctx.rs  = rs;
525       sctx.pv  = pv[diag];
526       ierr = Mat_LUCheckShift(info,&sctx,&newshift);CHKERRQ(ierr);
527       if (newshift == 1){
528         break;    /* sctx.shift_amount is updated */
529       } else if (newshift == -1){
530         SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs);
531       }
532     }
533 
534     if (shift_pd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
535       /*
536        * if no shift in this attempt & shifting & started shifting & can refine,
537        * then try lower shift
538        */
539       sctx.shift_hi       = info->shift_fraction;
540       info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
541       sctx.shift_amount   = info->shift_fraction * sctx.shift_top;
542       sctx.lushift        = PETSC_TRUE;
543       sctx.nshift++;
544     }
545   } while (sctx.lushift);
546 
547   /* invert diagonal entries for simplier triangular solves */
548   for (i=0; i<n; i++) {
549     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
550   }
551 
552   ierr = PetscFree(rtmp);CHKERRQ(ierr);
553   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
554   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
555   C->factor = FACTOR_LU;
556   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
557   C->assembled = PETSC_TRUE;
558   PetscLogFlops(C->n);
559   if (sctx.nshift){
560     if (shift_nz) {
561       PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount);
562     } else if (shift_pd) {
563       b->lu_shift_fraction = info->shift_fraction;
564       PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: diagonal shifted up by %e fraction top_value %e number shifts %D\n",info->shift_fraction,shift_top,sctx.nshift);
565     }
566   }
567   PetscFunctionReturn(0);
568 }
569 
570 #undef __FUNCT__
571 #define __FUNCT__ "MatUsePETSc_SeqAIJ"
572 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A)
573 {
574   PetscFunctionBegin;
575   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
576   A->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
577   PetscFunctionReturn(0);
578 }
579 
580 
581 /* ----------------------------------------------------------- */
582 #undef __FUNCT__
583 #define __FUNCT__ "MatLUFactor_SeqAIJ"
584 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
585 {
586   PetscErrorCode ierr;
587   Mat            C;
588 
589   PetscFunctionBegin;
590   ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr);
591   ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr);
592   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
593   PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);
594   PetscFunctionReturn(0);
595 }
596 /* ----------------------------------------------------------- */
597 #undef __FUNCT__
598 #define __FUNCT__ "MatSolve_SeqAIJ"
599 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
600 {
601   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
602   IS             iscol = a->col,isrow = a->row;
603   PetscErrorCode ierr;
604   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
605   PetscInt       nz,*rout,*cout;
606   PetscScalar    *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
607 
608   PetscFunctionBegin;
609   if (!n) PetscFunctionReturn(0);
610 
611   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
612   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
613   tmp  = a->solve_work;
614 
615   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
616   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
617 
618   /* forward solve the lower triangular */
619   tmp[0] = b[*r++];
620   tmps   = tmp;
621   for (i=1; i<n; i++) {
622     v   = aa + ai[i] ;
623     vi  = aj + ai[i] ;
624     nz  = a->diag[i] - ai[i];
625     sum = b[*r++];
626     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
627     tmp[i] = sum;
628   }
629 
630   /* backward solve the upper triangular */
631   for (i=n-1; i>=0; i--){
632     v   = aa + a->diag[i] + 1;
633     vi  = aj + a->diag[i] + 1;
634     nz  = ai[i+1] - a->diag[i] - 1;
635     sum = tmp[i];
636     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
637     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
638   }
639 
640   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
641   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
642   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
643   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
644   PetscLogFlops(2*a->nz - A->n);
645   PetscFunctionReturn(0);
646 }
647 
648 /* ----------------------------------------------------------- */
649 #undef __FUNCT__
650 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
651 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
652 {
653   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
654   PetscErrorCode ierr;
655   PetscInt       n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag;
656   PetscScalar    *x,*b,*aa = a->a;
657 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
658   PetscInt       adiag_i,i,*vi,nz,ai_i;
659   PetscScalar    *v,sum;
660 #endif
661 
662   PetscFunctionBegin;
663   if (!n) PetscFunctionReturn(0);
664 
665   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
666   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
667 
668 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
669   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
670 #else
671   /* forward solve the lower triangular */
672   x[0] = b[0];
673   for (i=1; i<n; i++) {
674     ai_i = ai[i];
675     v    = aa + ai_i;
676     vi   = aj + ai_i;
677     nz   = adiag[i] - ai_i;
678     sum  = b[i];
679     while (nz--) sum -= *v++ * x[*vi++];
680     x[i] = sum;
681   }
682 
683   /* backward solve the upper triangular */
684   for (i=n-1; i>=0; i--){
685     adiag_i = adiag[i];
686     v       = aa + adiag_i + 1;
687     vi      = aj + adiag_i + 1;
688     nz      = ai[i+1] - adiag_i - 1;
689     sum     = x[i];
690     while (nz--) sum -= *v++ * x[*vi++];
691     x[i]    = sum*aa[adiag_i];
692   }
693 #endif
694   PetscLogFlops(2*a->nz - A->n);
695   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
696   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
697   PetscFunctionReturn(0);
698 }
699 
700 #undef __FUNCT__
701 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
702 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
703 {
704   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
705   IS             iscol = a->col,isrow = a->row;
706   PetscErrorCode ierr;
707   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
708   PetscInt       nz,*rout,*cout;
709   PetscScalar    *x,*b,*tmp,*aa = a->a,sum,*v;
710 
711   PetscFunctionBegin;
712   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
713 
714   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
715   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
716   tmp  = a->solve_work;
717 
718   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
719   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
720 
721   /* forward solve the lower triangular */
722   tmp[0] = b[*r++];
723   for (i=1; i<n; i++) {
724     v   = aa + ai[i] ;
725     vi  = aj + ai[i] ;
726     nz  = a->diag[i] - ai[i];
727     sum = b[*r++];
728     while (nz--) sum -= *v++ * tmp[*vi++ ];
729     tmp[i] = sum;
730   }
731 
732   /* backward solve the upper triangular */
733   for (i=n-1; i>=0; i--){
734     v   = aa + a->diag[i] + 1;
735     vi  = aj + a->diag[i] + 1;
736     nz  = ai[i+1] - a->diag[i] - 1;
737     sum = tmp[i];
738     while (nz--) sum -= *v++ * tmp[*vi++ ];
739     tmp[i] = sum*aa[a->diag[i]];
740     x[*c--] += tmp[i];
741   }
742 
743   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
744   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
745   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
746   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
747   PetscLogFlops(2*a->nz);
748 
749   PetscFunctionReturn(0);
750 }
751 /* -------------------------------------------------------------------*/
752 #undef __FUNCT__
753 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
754 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
755 {
756   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
757   IS             iscol = a->col,isrow = a->row;
758   PetscErrorCode ierr;
759   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
760   PetscInt       nz,*rout,*cout,*diag = a->diag;
761   PetscScalar    *x,*b,*tmp,*aa = a->a,*v,s1;
762 
763   PetscFunctionBegin;
764   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
765   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
766   tmp  = a->solve_work;
767 
768   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
769   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
770 
771   /* copy the b into temp work space according to permutation */
772   for (i=0; i<n; i++) tmp[i] = b[c[i]];
773 
774   /* forward solve the U^T */
775   for (i=0; i<n; i++) {
776     v   = aa + diag[i] ;
777     vi  = aj + diag[i] + 1;
778     nz  = ai[i+1] - diag[i] - 1;
779     s1  = tmp[i];
780     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
781     while (nz--) {
782       tmp[*vi++ ] -= (*v++)*s1;
783     }
784     tmp[i] = s1;
785   }
786 
787   /* backward solve the L^T */
788   for (i=n-1; i>=0; i--){
789     v   = aa + diag[i] - 1 ;
790     vi  = aj + diag[i] - 1 ;
791     nz  = diag[i] - ai[i];
792     s1  = tmp[i];
793     while (nz--) {
794       tmp[*vi-- ] -= (*v--)*s1;
795     }
796   }
797 
798   /* copy tmp into x according to permutation */
799   for (i=0; i<n; i++) x[r[i]] = tmp[i];
800 
801   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
802   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
803   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
804   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
805 
806   PetscLogFlops(2*a->nz-A->n);
807   PetscFunctionReturn(0);
808 }
809 
810 #undef __FUNCT__
811 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
812 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
813 {
814   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
815   IS             iscol = a->col,isrow = a->row;
816   PetscErrorCode ierr;
817   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
818   PetscInt       nz,*rout,*cout,*diag = a->diag;
819   PetscScalar    *x,*b,*tmp,*aa = a->a,*v;
820 
821   PetscFunctionBegin;
822   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
823 
824   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
825   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
826   tmp = a->solve_work;
827 
828   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
829   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
830 
831   /* copy the b into temp work space according to permutation */
832   for (i=0; i<n; i++) tmp[i] = b[c[i]];
833 
834   /* forward solve the U^T */
835   for (i=0; i<n; i++) {
836     v   = aa + diag[i] ;
837     vi  = aj + diag[i] + 1;
838     nz  = ai[i+1] - diag[i] - 1;
839     tmp[i] *= *v++;
840     while (nz--) {
841       tmp[*vi++ ] -= (*v++)*tmp[i];
842     }
843   }
844 
845   /* backward solve the L^T */
846   for (i=n-1; i>=0; i--){
847     v   = aa + diag[i] - 1 ;
848     vi  = aj + diag[i] - 1 ;
849     nz  = diag[i] - ai[i];
850     while (nz--) {
851       tmp[*vi-- ] -= (*v--)*tmp[i];
852     }
853   }
854 
855   /* copy tmp into x according to permutation */
856   for (i=0; i<n; i++) x[r[i]] += tmp[i];
857 
858   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
859   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
860   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
861   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
862 
863   PetscLogFlops(2*a->nz);
864   PetscFunctionReturn(0);
865 }
866 /* ----------------------------------------------------------------*/
867 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat);
868 
869 #undef __FUNCT__
870 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
871 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
872 {
873   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
874   IS             isicol;
875   PetscErrorCode ierr;
876   PetscInt       *r,*ic,prow,n = A->m,*ai = a->i,*aj = a->j;
877   PetscInt       *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev;
878   PetscInt       *dloc,idx,row,m,fm,nzf,nzi,len, reallocs = 0,dcount = 0;
879   PetscInt       incrlev,nnz,i,levels,diagonal_fill;
880   PetscTruth     col_identity,row_identity;
881   PetscReal      f;
882 
883   PetscFunctionBegin;
884   f             = info->fill;
885   levels        = (PetscInt)info->levels;
886   diagonal_fill = (PetscInt)info->diagonal_fill;
887   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
888 
889   /* special case that simply copies fill pattern */
890   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
891   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
892   if (!levels && row_identity && col_identity) {
893     ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
894     (*fact)->factor = FACTOR_LU;
895     b               = (Mat_SeqAIJ*)(*fact)->data;
896     if (!b->diag) {
897       ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
898     }
899     ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
900     b->row              = isrow;
901     b->col              = iscol;
902     b->icol             = isicol;
903     ierr                = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
904     (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
905     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
906     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
907     PetscFunctionReturn(0);
908   }
909 
910   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
911   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
912 
913   /* get new row pointers */
914   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr);
915   ainew[0] = 0;
916   /* don't know how many column pointers are needed so estimate */
917   jmax = (PetscInt)(f*(ai[n]+1));
918   ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr);
919   /* ajfill is level of fill for each fill entry */
920   ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajfill);CHKERRQ(ierr);
921   /* fill is a linked list of nonzeros in active row */
922   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr);
923   /* im is level for each filled value */
924   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&im);CHKERRQ(ierr);
925   /* dloc is location of diagonal in factor */
926   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&dloc);CHKERRQ(ierr);
927   dloc[0]  = 0;
928   for (prow=0; prow<n; prow++) {
929 
930     /* copy current row into linked list */
931     nzf     = nz  = ai[r[prow]+1] - ai[r[prow]];
932     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
933     xi      = aj + ai[r[prow]];
934     fill[n] = n;
935     fill[prow] = -1; /* marker to indicate if diagonal exists */
936     while (nz--) {
937       fm  = n;
938       idx = ic[*xi++ ];
939       do {
940         m  = fm;
941         fm = fill[m];
942       } while (fm < idx);
943       fill[m]   = idx;
944       fill[idx] = fm;
945       im[idx]   = 0;
946     }
947 
948     /* make sure diagonal entry is included */
949     if (diagonal_fill && fill[prow] == -1) {
950       fm = n;
951       while (fill[fm] < prow) fm = fill[fm];
952       fill[prow] = fill[fm]; /* insert diagonal into linked list */
953       fill[fm]   = prow;
954       im[prow]   = 0;
955       nzf++;
956       dcount++;
957     }
958 
959     nzi = 0;
960     row = fill[n];
961     while (row < prow) {
962       incrlev = im[row] + 1;
963       nz      = dloc[row];
964       xi      = ajnew  + ainew[row]  + nz + 1;
965       flev    = ajfill + ainew[row]  + nz + 1;
966       nnz     = ainew[row+1] - ainew[row] - nz - 1;
967       fm      = row;
968       while (nnz-- > 0) {
969         idx = *xi++ ;
970         if (*flev + incrlev > levels) {
971           flev++;
972           continue;
973         }
974         do {
975           m  = fm;
976           fm = fill[m];
977         } while (fm < idx);
978         if (fm != idx) {
979           im[idx]   = *flev + incrlev;
980           fill[m]   = idx;
981           fill[idx] = fm;
982           fm        = idx;
983           nzf++;
984         } else {
985           if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev;
986         }
987         flev++;
988       }
989       row = fill[row];
990       nzi++;
991     }
992     /* copy new filled row into permanent storage */
993     ainew[prow+1] = ainew[prow] + nzf;
994     if (ainew[prow+1] > jmax) {
995 
996       /* estimate how much additional space we will need */
997       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
998       /* just double the memory each time */
999       /*  maxadd = (PetscInt)((f*(ai[n]+!shift)*(n-prow+5))/n); */
1000       PetscInt maxadd = jmax;
1001       if (maxadd < nzf) maxadd = (n-prow)*(nzf+1);
1002       jmax += maxadd;
1003 
1004       /* allocate a longer ajnew and ajfill */
1005       ierr   = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr);
1006       ierr   = PetscMemcpy(xi,ajnew,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr);
1007       ierr   = PetscFree(ajnew);CHKERRQ(ierr);
1008       ajnew  = xi;
1009       ierr   = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr);
1010       ierr   = PetscMemcpy(xi,ajfill,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr);
1011       ierr   = PetscFree(ajfill);CHKERRQ(ierr);
1012       ajfill = xi;
1013       reallocs++; /* count how many times we realloc */
1014     }
1015     xi          = ajnew + ainew[prow] ;
1016     flev        = ajfill + ainew[prow] ;
1017     dloc[prow]  = nzi;
1018     fm          = fill[n];
1019     while (nzf--) {
1020       *xi++   = fm ;
1021       *flev++ = im[fm];
1022       fm      = fill[fm];
1023     }
1024     /* make sure row has diagonal entry */
1025     if (ajnew[ainew[prow]+dloc[prow]] != prow) {
1026       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
1027     try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow);
1028     }
1029   }
1030   ierr = PetscFree(ajfill);CHKERRQ(ierr);
1031   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1032   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1033   ierr = PetscFree(fill);CHKERRQ(ierr);
1034   ierr = PetscFree(im);CHKERRQ(ierr);
1035 
1036   {
1037     PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
1038     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
1039     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af);
1040     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af);
1041     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n");
1042     if (diagonal_fill) {
1043       PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount);
1044     }
1045   }
1046 
1047   /* put together the new matrix */
1048   ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr);
1049   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
1050   ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr);
1051   PetscLogObjectParent(*fact,isicol);
1052   b = (Mat_SeqAIJ*)(*fact)->data;
1053   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1054   b->singlemalloc = PETSC_FALSE;
1055   len = (ainew[n] )*sizeof(PetscScalar);
1056   /* the next line frees the default space generated by the Create() */
1057   ierr = PetscFree(b->a);CHKERRQ(ierr);
1058   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1059   ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr);
1060   b->j          = ajnew;
1061   b->i          = ainew;
1062   for (i=0; i<n; i++) dloc[i] += ainew[i];
1063   b->diag       = dloc;
1064   b->ilen       = 0;
1065   b->imax       = 0;
1066   b->row        = isrow;
1067   b->col        = iscol;
1068   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1069   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1070   b->icol       = isicol;
1071   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1072   /* In b structure:  Free imax, ilen, old a, old j.
1073      Allocate dloc, solve_work, new a, new j */
1074   PetscLogObjectMemory(*fact,(ainew[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));
1075   b->maxnz             = b->nz = ainew[n] ;
1076   (*fact)->factor = FACTOR_LU;
1077   ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr);
1078   (*fact)->ops->lufactornumeric =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
1079   (*fact)->info.factor_mallocs    = reallocs;
1080   (*fact)->info.fill_ratio_given  = f;
1081   (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]);
1082   PetscFunctionReturn(0);
1083 }
1084 
1085 #include "src/mat/impls/sbaij/seq/sbaij.h"
1086 #undef __FUNCT__
1087 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
1088 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
1089 {
1090   Mat            C = *B;
1091   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1092   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
1093   IS             ip=b->row;
1094   PetscErrorCode ierr;
1095   PetscInt       *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol;
1096   PetscInt       *ai=a->i,*aj=a->j;
1097   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1098   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1099   PetscReal      zeropivot,rs,shiftnz;
1100   PetscTruth     shiftpd;
1101   ChShift_Ctx    sctx;
1102   PetscInt       newshift;
1103 
1104   PetscFunctionBegin;
1105   shiftnz   = info->shiftnz;
1106   shiftpd   = info->shiftpd;
1107   zeropivot = info->zeropivot;
1108 
1109   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
1110 
1111   /* initialization */
1112   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1113   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
1114   jl   = il + mbs;
1115   rtmp = (MatScalar*)(jl + mbs);
1116 
1117   sctx.shift_amount = 0;
1118   sctx.nshift       = 0;
1119   do {
1120     sctx.chshift = PETSC_FALSE;
1121     for (i=0; i<mbs; i++) {
1122       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1123     }
1124 
1125     for (k = 0; k<mbs; k++){
1126       bval = ba + bi[k];
1127       /* initialize k-th row by the perm[k]-th row of A */
1128       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1129       for (j = jmin; j < jmax; j++){
1130         col = rip[aj[j]];
1131         if (col >= k){ /* only take upper triangular entry */
1132           rtmp[col] = aa[j];
1133           *bval++  = 0.0; /* for in-place factorization */
1134         }
1135       }
1136       /* shift the diagonal of the matrix */
1137       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1138 
1139       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1140       dk = rtmp[k];
1141       i = jl[k]; /* first row to be added to k_th row  */
1142 
1143       while (i < k){
1144         nexti = jl[i]; /* next row to be added to k_th row */
1145 
1146         /* compute multiplier, update diag(k) and U(i,k) */
1147         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1148         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
1149         dk += uikdi*ba[ili];
1150         ba[ili] = uikdi; /* -U(i,k) */
1151 
1152         /* add multiple of row i to k-th row */
1153         jmin = ili + 1; jmax = bi[i+1];
1154         if (jmin < jmax){
1155           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1156           /* update il and jl for row i */
1157           il[i] = jmin;
1158           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1159         }
1160         i = nexti;
1161       }
1162 
1163       /* shift the diagonals when zero pivot is detected */
1164       /* compute rs=sum of abs(off-diagonal) */
1165       rs   = 0.0;
1166       jmin = bi[k]+1;
1167       nz   = bi[k+1] - jmin;
1168       if (nz){
1169         bcol = bj + jmin;
1170         while (nz--){
1171           rs += PetscAbsScalar(rtmp[*bcol++]);
1172         }
1173       }
1174 
1175       sctx.rs = rs;
1176       sctx.pv = dk;
1177       ierr = Mat_CholeskyCheckShift(info,&sctx,&newshift);CHKERRQ(ierr);
1178       if (newshift == 1){
1179         break;    /* sctx.shift_amount is updated */
1180       } else if (newshift == -1){
1181         SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs);
1182       }
1183 
1184       /* copy data into U(k,:) */
1185       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1186       jmin = bi[k]+1; jmax = bi[k+1];
1187       if (jmin < jmax) {
1188         for (j=jmin; j<jmax; j++){
1189           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1190         }
1191         /* add the k-th row into il and jl */
1192         il[k] = jmin;
1193         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1194       }
1195     }
1196   } while (sctx.chshift);
1197   ierr = PetscFree(il);CHKERRQ(ierr);
1198 
1199   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
1200   C->factor       = FACTOR_CHOLESKY;
1201   C->assembled    = PETSC_TRUE;
1202   C->preallocated = PETSC_TRUE;
1203   PetscLogFlops(C->m);
1204   if (sctx.nshift){
1205     if (shiftnz) {
1206       PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount);
1207     } else if (shiftpd) {
1208       PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount);
1209     }
1210   }
1211   PetscFunctionReturn(0);
1212 }
1213 
1214 #include "petscbt.h"
1215 #include "src/mat/utils/freespace.h"
1216 #undef __FUNCT__
1217 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
1218 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1219 {
1220   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1221   Mat_SeqSBAIJ   *b;
1222   Mat            B;
1223   PetscErrorCode ierr;
1224   PetscTruth     perm_identity;
1225   PetscInt       reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui;
1226   PetscInt       jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1227   PetscInt       nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1228   PetscInt       ncols,ncols_upper,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr;
1229   PetscReal      fill=info->fill,levels=info->levels;
1230   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1231   FreeSpaceList  free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1232   PetscBT        lnkbt;
1233 
1234   PetscFunctionBegin;
1235   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1236   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1237 
1238   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1239   ui[0] = 0;
1240 
1241   /* special case that simply copies fill pattern */
1242   if (!levels && perm_identity) {
1243     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
1244     for (i=0; i<am; i++) {
1245       ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1246     }
1247     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1248     cols = uj;
1249     for (i=0; i<am; i++) {
1250       aj    = a->j + a->diag[i];
1251       ncols = ui[i+1] - ui[i];
1252       for (j=0; j<ncols; j++) *cols++ = *aj++;
1253     }
1254   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1255     /* initialization */
1256     ierr  = PetscMalloc((2*am+1)*sizeof(PetscInt),&cols_lvl);CHKERRQ(ierr);
1257 
1258     /* jl: linked list for storing indices of the pivot rows
1259        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1260     ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr);
1261     il         = jl + am;
1262     uj_ptr     = (PetscInt**)(il + am);
1263     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1264     for (i=0; i<am; i++){
1265       jl[i] = am; il[i] = 0;
1266     }
1267 
1268     /* create and initialize a linked list for storing column indices of the active row k */
1269     nlnk = am + 1;
1270     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1271 
1272     /* initial FreeSpace size is fill*(ai[am]+1) */
1273     ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1274     current_space = free_space;
1275     ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
1276     current_space_lvl = free_space_lvl;
1277 
1278     for (k=0; k<am; k++){  /* for each active row k */
1279       /* initialize lnk by the column indices of row rip[k] of A */
1280       nzk   = 0;
1281       ncols = ai[rip[k]+1] - ai[rip[k]];
1282       ncols_upper = 0;
1283       cols        = cols_lvl + am;
1284       for (j=0; j<ncols; j++){
1285         i = rip[*(aj + ai[rip[k]] + j)];
1286         if (i >= k){ /* only take upper triangular entry */
1287           cols[ncols_upper] = i;
1288           cols_lvl[ncols_upper] = -1;  /* initialize level for nonzero entries */
1289           ncols_upper++;
1290         }
1291       }
1292       ierr = PetscIncompleteLLAdd(ncols_upper,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1293       nzk += nlnk;
1294 
1295       /* update lnk by computing fill-in for each pivot row to be merged in */
1296       prow = jl[k]; /* 1st pivot row */
1297 
1298       while (prow < k){
1299         nextprow = jl[prow];
1300 
1301         /* merge prow into k-th row */
1302         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1303         jmax = ui[prow+1];
1304         ncols = jmax-jmin;
1305         i     = jmin - ui[prow];
1306         cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1307         for (j=0; j<ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + i + j);
1308         ierr = PetscIncompleteLLAdd(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1309         nzk += nlnk;
1310 
1311         /* update il and jl for prow */
1312         if (jmin < jmax){
1313           il[prow] = jmin;
1314           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1315         }
1316         prow = nextprow;
1317       }
1318 
1319       /* if free space is not available, make more free space */
1320       if (current_space->local_remaining<nzk) {
1321         i = am - k + 1; /* num of unfactored rows */
1322         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1323         ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
1324         ierr = GetMoreSpace(i,&current_space_lvl);CHKERRQ(ierr);
1325         reallocs++;
1326       }
1327 
1328       /* copy data into free_space and free_space_lvl, then initialize lnk */
1329       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1330 
1331       /* add the k-th row into il and jl */
1332       if (nzk > 1){
1333         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1334         jl[k] = jl[i]; jl[i] = k;
1335         il[k] = ui[k] + 1;
1336       }
1337       uj_ptr[k]     = current_space->array;
1338       uj_lvl_ptr[k] = current_space_lvl->array;
1339 
1340       current_space->array           += nzk;
1341       current_space->local_used      += nzk;
1342       current_space->local_remaining -= nzk;
1343 
1344       current_space_lvl->array           += nzk;
1345       current_space_lvl->local_used      += nzk;
1346       current_space_lvl->local_remaining -= nzk;
1347 
1348       ui[k+1] = ui[k] + nzk;
1349     }
1350 
1351     if (ai[am] != 0) {
1352       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1353       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af);
1354       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af);
1355       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af);
1356     } else {
1357       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n");
1358     }
1359 
1360     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1361     ierr = PetscFree(jl);CHKERRQ(ierr);
1362     ierr = PetscFree(cols_lvl);CHKERRQ(ierr);
1363 
1364     /* destroy list of free space and other temporary array(s) */
1365     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1366     ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1367     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1368     ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr);
1369 
1370   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
1371 
1372   /* put together the new matrix in MATSEQSBAIJ format */
1373   ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr);
1374   B = *fact;
1375   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1376   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
1377 
1378   b    = (Mat_SeqSBAIJ*)B->data;
1379   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1380   b->singlemalloc = PETSC_FALSE;
1381   /* the next line frees the default space generated by the Create() */
1382   ierr = PetscFree(b->a);CHKERRQ(ierr);
1383   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1384   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1385   b->j    = uj;
1386   b->i    = ui;
1387   b->diag = 0;
1388   b->ilen = 0;
1389   b->imax = 0;
1390   b->row  = perm;
1391   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1392   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1393   b->icol = perm;
1394   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1395   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1396   PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
1397   b->maxnz = b->nz = ui[am];
1398 
1399   B->factor                 = FACTOR_CHOLESKY;
1400   B->info.factor_mallocs    = reallocs;
1401   B->info.fill_ratio_given  = fill;
1402   if (ai[am] != 0) {
1403     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1404   } else {
1405     B->info.fill_ratio_needed = 0.0;
1406   }
1407   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1408   if (perm_identity){
1409     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1410     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1411   }
1412   PetscFunctionReturn(0);
1413 }
1414 
1415 #undef __FUNCT__
1416 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
1417 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1418 {
1419   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1420   Mat_SeqSBAIJ   *b;
1421   Mat            B;
1422   PetscErrorCode ierr;
1423   PetscTruth     perm_identity;
1424   PetscReal      fill = info->fill;
1425   PetscInt       *rip,*riip,i,mbs=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow;
1426   PetscInt       *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1427   PetscInt       nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1428   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1429   PetscBT        lnkbt;
1430   IS             iperm;
1431 
1432   PetscFunctionBegin;
1433   /* check whether perm is the identity mapping */
1434   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1435   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1436 
1437   if (!perm_identity){
1438     /* check if perm is symmetric! */
1439     ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1440     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1441     for (i=0; i<mbs; i++) {
1442       if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation");
1443     }
1444     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1445     ierr = ISDestroy(iperm);CHKERRQ(ierr);
1446   }
1447 
1448   /* initialization */
1449   ierr  = PetscMalloc((mbs+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1450   ui[0] = 0;
1451 
1452   /* jl: linked list for storing indices of the pivot rows
1453      il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
1454   ierr = PetscMalloc((3*mbs+1)*sizeof(PetscInt)+mbs*sizeof(PetscInt*),&jl);CHKERRQ(ierr);
1455   il     = jl + mbs;
1456   cols   = il + mbs;
1457   ui_ptr = (PetscInt**)(cols + mbs);
1458   for (i=0; i<mbs; i++){
1459     jl[i] = mbs; il[i] = 0;
1460   }
1461 
1462   /* create and initialize a linked list for storing column indices of the active row k */
1463   nlnk = mbs + 1;
1464   ierr = PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1465 
1466   /* initial FreeSpace size is fill*(ai[mbs]+1) */
1467   ierr = GetMoreSpace((PetscInt)(fill*(ai[mbs]+1)),&free_space);CHKERRQ(ierr);
1468   current_space = free_space;
1469 
1470   for (k=0; k<mbs; k++){  /* for each active row k */
1471     /* initialize lnk by the column indices of row rip[k] of A */
1472     nzk   = 0;
1473     ncols = ai[rip[k]+1] - ai[rip[k]];
1474     ncols_upper = 0;
1475     for (j=0; j<ncols; j++){
1476       i = rip[*(aj + ai[rip[k]] + j)];
1477       if (i >= k){ /* only take upper triangular entry */
1478         cols[ncols_upper] = i;
1479         ncols_upper++;
1480       }
1481     }
1482     ierr = PetscLLAdd(ncols_upper,cols,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1483     nzk += nlnk;
1484 
1485     /* update lnk by computing fill-in for each pivot row to be merged in */
1486     prow = jl[k]; /* 1st pivot row */
1487 
1488     while (prow < k){
1489       nextprow = jl[prow];
1490       /* merge prow into k-th row */
1491       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
1492       jmax = ui[prow+1];
1493       ncols = jmax-jmin;
1494       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
1495       ierr = PetscLLAdd(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1496       nzk += nlnk;
1497 
1498       /* update il and jl for prow */
1499       if (jmin < jmax){
1500         il[prow] = jmin;
1501         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1502       }
1503       prow = nextprow;
1504     }
1505 
1506     /* if free space is not available, make more free space */
1507     if (current_space->local_remaining<nzk) {
1508       i = mbs - k + 1; /* num of unfactored rows */
1509       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1510       ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
1511       reallocs++;
1512     }
1513 
1514     /* copy data into free space, then initialize lnk */
1515     ierr = PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1516 
1517     /* add the k-th row into il and jl */
1518     if (nzk-1 > 0){
1519       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
1520       jl[k] = jl[i]; jl[i] = k;
1521       il[k] = ui[k] + 1;
1522     }
1523     ui_ptr[k] = current_space->array;
1524     current_space->array           += nzk;
1525     current_space->local_used      += nzk;
1526     current_space->local_remaining -= nzk;
1527 
1528     ui[k+1] = ui[k] + nzk;
1529   }
1530 
1531   if (ai[mbs] != 0) {
1532     PetscReal af = ((PetscReal)(2*ui[mbs]-mbs))/((PetscReal)ai[mbs]);
1533     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af);
1534     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af);
1535     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af);
1536   } else {
1537      PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n");
1538   }
1539 
1540   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1541   ierr = PetscFree(jl);CHKERRQ(ierr);
1542 
1543   /* destroy list of free space and other temporary array(s) */
1544   ierr = PetscMalloc((ui[mbs]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1545   ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1546   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1547 
1548   /* put together the new matrix in MATSEQSBAIJ format */
1549   ierr = MatCreate(PETSC_COMM_SELF,mbs,mbs,mbs,mbs,fact);CHKERRQ(ierr);
1550   B    = *fact;
1551   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1552   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
1553 
1554   b = (Mat_SeqSBAIJ*)B->data;
1555   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1556   b->singlemalloc = PETSC_FALSE;
1557   /* the next line frees the default space generated by the Create() */
1558   ierr = PetscFree(b->a);CHKERRQ(ierr);
1559   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1560   ierr = PetscMalloc((ui[mbs]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1561   b->j    = uj;
1562   b->i    = ui;
1563   b->diag = 0;
1564   b->ilen = 0;
1565   b->imax = 0;
1566   b->row  = perm;
1567   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1568   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1569   b->icol = perm;
1570   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1571   ierr    = PetscMalloc((mbs+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1572   PetscLogObjectMemory(B,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));
1573   b->maxnz = b->nz = ui[mbs];
1574 
1575   B->factor                 = FACTOR_CHOLESKY;
1576   B->info.factor_mallocs    = reallocs;
1577   B->info.fill_ratio_given  = fill;
1578   if (ai[mbs] != 0) {
1579     B->info.fill_ratio_needed = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]);
1580   } else {
1581     B->info.fill_ratio_needed = 0.0;
1582   }
1583   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1584   if (perm_identity){
1585     (*fact)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1586     (*fact)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1587   }
1588   PetscFunctionReturn(0);
1589 }
1590