xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision 413aa8a8f189e76665809ed8984e61113e58f003)
1 
2 #include "src/mat/impls/aij/seq/aij.h"
3 #include "src/inline/dot.h"
4 #include "src/inline/spops.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ"
8 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
9 {
10   PetscFunctionBegin;
11 
12   SETERRQ(PETSC_ERR_SUP,"Code not written");
13 #if !defined(PETSC_USE_DEBUG)
14   PetscFunctionReturn(0);
15 #endif
16 }
17 
18 
19 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat);
20 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth);
21 
22 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
23 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
24 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*);
25 
26 #undef __FUNCT__
27 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
28   /* ------------------------------------------------------------
29 
30           This interface was contribed by Tony Caola
31 
32      This routine is an interface to the pivoting drop-tolerance
33      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
34      SPARSEKIT2.
35 
36      The SPARSEKIT2 routines used here are covered by the GNU
37      copyright; see the file gnu in this directory.
38 
39      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
40      help in getting this routine ironed out.
41 
42      The major drawback to this routine is that if info->fill is
43      not large enough it fails rather than allocating more space;
44      this can be fixed by hacking/improving the f2c version of
45      Yousef Saad's code.
46 
47      ------------------------------------------------------------
48 */
49 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact)
50 {
51 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
52   PetscFunctionBegin;
53   SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
54   You can obtain the drop tolerance routines by installing PETSc from\n\
55   www.mcs.anl.gov/petsc\n");
56 #else
57   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
58   IS             iscolf,isicol,isirow;
59   PetscTruth     reorder;
60   PetscErrorCode ierr,sierr;
61   PetscInt       *c,*r,*ic,i,n = A->m;
62   PetscInt       *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
63   PetscInt       *ordcol,*iwk,*iperm,*jw;
64   PetscInt       jmax,lfill,job,*o_i,*o_j;
65   PetscScalar    *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
66   PetscReal      af;
67 
68   PetscFunctionBegin;
69 
70   if (info->dt == PETSC_DEFAULT)      info->dt      = .005;
71   if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax);
72   if (info->dtcol == PETSC_DEFAULT)   info->dtcol   = .01;
73   if (info->fill == PETSC_DEFAULT)    info->fill    = ((double)(n*(info->dtcount+1)))/a->nz;
74   lfill   = (PetscInt)(info->dtcount/2.0);
75   jmax    = (PetscInt)(info->fill*a->nz);
76 
77 
78   /* ------------------------------------------------------------
79      If reorder=.TRUE., then the original matrix has to be
80      reordered to reflect the user selected ordering scheme, and
81      then de-reordered so it is in it's original format.
82      Because Saad's dperm() is NOT in place, we have to copy
83      the original matrix and allocate more storage. . .
84      ------------------------------------------------------------
85   */
86 
87   /* set reorder to true if either isrow or iscol is not identity */
88   ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr);
89   if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);}
90   reorder = PetscNot(reorder);
91 
92 
93   /* storage for ilu factor */
94   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr);
95   ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr);
96   ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr);
97   ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr);
98 
99   /* ------------------------------------------------------------
100      Make sure that everything is Fortran formatted (1-Based)
101      ------------------------------------------------------------
102   */
103   for (i=old_i[0];i<old_i[n];i++) {
104     old_j[i]++;
105   }
106   for(i=0;i<n+1;i++) {
107     old_i[i]++;
108   };
109 
110 
111   if (reorder) {
112     ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr);
113     ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
114     for(i=0;i<n;i++) {
115       r[i]  = r[i]+1;
116       c[i]  = c[i]+1;
117     }
118     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr);
119     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr);
120     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr);
121     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
122     for (i=0;i<n;i++) {
123       r[i]  = r[i]-1;
124       c[i]  = c[i]-1;
125     }
126     ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr);
127     ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
128     o_a = old_a2;
129     o_j = old_j2;
130     o_i = old_i2;
131   } else {
132     o_a = old_a;
133     o_j = old_j;
134     o_i = old_i;
135   }
136 
137   /* ------------------------------------------------------------
138      Call Saad's ilutp() routine to generate the factorization
139      ------------------------------------------------------------
140   */
141 
142   ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr);
143   ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr);
144   ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr);
145 
146   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
147   if (sierr) {
148     switch (sierr) {
149       case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
150       case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
151       case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
152       case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
153       case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax);
154       default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
155     }
156   }
157 
158   ierr = PetscFree(w);CHKERRQ(ierr);
159   ierr = PetscFree(jw);CHKERRQ(ierr);
160 
161   /* ------------------------------------------------------------
162      Saad's routine gives the result in Modified Sparse Row (msr)
163      Convert to Compressed Sparse Row format (csr)
164      ------------------------------------------------------------
165   */
166 
167   ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr);
168   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr);
169 
170   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
171 
172   ierr = PetscFree(iwk);CHKERRQ(ierr);
173   ierr = PetscFree(wk);CHKERRQ(ierr);
174 
175   if (reorder) {
176     ierr = PetscFree(old_a2);CHKERRQ(ierr);
177     ierr = PetscFree(old_j2);CHKERRQ(ierr);
178     ierr = PetscFree(old_i2);CHKERRQ(ierr);
179   } else {
180     /* fix permutation of old_j that the factorization introduced */
181     for (i=old_i[0]; i<old_i[n]; i++) {
182       old_j[i-1] = iperm[old_j[i-1]-1];
183     }
184   }
185 
186   /* get rid of the shift to indices starting at 1 */
187   for (i=0; i<n+1; i++) {
188     old_i[i]--;
189   }
190   for (i=old_i[0];i<old_i[n];i++) {
191     old_j[i]--;
192   }
193 
194   /* Make the factored matrix 0-based */
195   for (i=0; i<n+1; i++) {
196     new_i[i]--;
197   }
198   for (i=new_i[0];i<new_i[n];i++) {
199     new_j[i]--;
200   }
201 
202   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
203   /*-- permute the right-hand-side and solution vectors           --*/
204   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
205   ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr);
206   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
207   for(i=0; i<n; i++) {
208     ordcol[i] = ic[iperm[i]-1];
209   };
210   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
211   ierr = ISDestroy(isicol);CHKERRQ(ierr);
212 
213   ierr = PetscFree(iperm);CHKERRQ(ierr);
214 
215   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr);
216   ierr = PetscFree(ordcol);CHKERRQ(ierr);
217 
218   /*----- put together the new matrix -----*/
219 
220   ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr);
221   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
222   ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr);
223   (*fact)->factor    = FACTOR_LU;
224   (*fact)->assembled = PETSC_TRUE;
225 
226   b = (Mat_SeqAIJ*)(*fact)->data;
227   ierr = PetscFree(b->imax);CHKERRQ(ierr);
228   b->sorted        = PETSC_FALSE;
229   b->singlemalloc  = PETSC_FALSE;
230   /* the next line frees the default space generated by the MatCreate() */
231   ierr             = PetscFree(b->a);CHKERRQ(ierr);
232   ierr             = PetscFree(b->ilen);CHKERRQ(ierr);
233   b->a             = new_a;
234   b->j             = new_j;
235   b->i             = new_i;
236   b->ilen          = 0;
237   b->imax          = 0;
238   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
239   b->row           = isirow;
240   b->col           = iscolf;
241   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
242   b->maxnz = b->nz = new_i[n];
243   ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
244   (*fact)->info.factor_mallocs = 0;
245 
246   ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
247 
248   /* check out for identical nodes. If found, use inode functions */
249   ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr);
250 
251   af = ((double)b->nz)/((double)a->nz) + .001;
252   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af);
253   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af);
254   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af);
255   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n");
256 
257   PetscFunctionReturn(0);
258 #endif
259 }
260 
261 /*
262     Factorization code for AIJ format.
263 */
264 #undef __FUNCT__
265 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
266 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
267 {
268   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
269   IS             isicol;
270   PetscErrorCode ierr;
271   PetscInt       *r,*ic,i,n = A->m,*ai = a->i,*aj = a->j;
272   PetscInt       *ainew,*ajnew,jmax,*fill,*ajtmp,nz;
273   PetscInt       *idnew,idx,row,m,fm,nnz,nzi,reallocs = 0,nzbd,*im;
274   PetscReal      f;
275 
276   PetscFunctionBegin;
277   if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
278   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
279   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
280   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
281 
282   /* get new row pointers */
283   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr);
284   ainew[0] = 0;
285   /* don't know how many column pointers are needed so estimate */
286   f = info->fill;
287   jmax  = (PetscInt)(f*ai[n]+1);
288   ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr);
289   /* fill is a linked list of nonzeros in active row */
290   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr);
291   im   = fill + n + 1;
292   /* idnew is location of diagonal in factor */
293   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&idnew);CHKERRQ(ierr);
294   idnew[0] = 0;
295 
296   for (i=0; i<n; i++) {
297     /* first copy previous fill into linked list */
298     nnz     = nz    = ai[r[i]+1] - ai[r[i]];
299     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
300     ajtmp   = aj + ai[r[i]];
301     fill[n] = n;
302     while (nz--) {
303       fm  = n;
304       idx = ic[*ajtmp++];
305       do {
306         m  = fm;
307         fm = fill[m];
308       } while (fm < idx);
309       fill[m]   = idx;
310       fill[idx] = fm;
311     }
312     row = fill[n];
313     while (row < i) {
314       ajtmp = ajnew + idnew[row] + 1;
315       nzbd  = 1 + idnew[row] - ainew[row];
316       nz    = im[row] - nzbd;
317       fm    = row;
318       while (nz-- > 0) {
319         idx = *ajtmp++ ;
320         nzbd++;
321         if (idx == i) im[row] = nzbd;
322         do {
323           m  = fm;
324           fm = fill[m];
325         } while (fm < idx);
326         if (fm != idx) {
327           fill[m]   = idx;
328           fill[idx] = fm;
329           fm        = idx;
330           nnz++;
331         }
332       }
333       row = fill[row];
334     }
335     /* copy new filled row into permanent storage */
336     ainew[i+1] = ainew[i] + nnz;
337     if (ainew[i+1] > jmax) {
338       /* estimate how much additional space we will need */
339       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
340       /* just double the memory each time */
341       PetscInt maxadd = jmax;
342       /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */
343       if (maxadd < nnz) maxadd = (n-i)*(nnz+1);
344       jmax += maxadd;
345 
346       /* allocate a longer ajnew */
347       ierr  = PetscMalloc(jmax*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
348       ierr  = PetscMemcpy(ajtmp,ajnew,(ainew[i])*sizeof(PetscInt));CHKERRQ(ierr);
349       ierr  = PetscFree(ajnew);CHKERRQ(ierr);
350       ajnew = ajtmp;
351       reallocs++; /* count how many times we realloc */
352     }
353     ajtmp = ajnew + ainew[i];
354     fm    = fill[n];
355     nzi   = 0;
356     im[i] = nnz;
357     while (nnz--) {
358       if (fm < i) nzi++;
359       *ajtmp++ = fm ;
360       fm       = fill[fm];
361     }
362     idnew[i] = ainew[i] + nzi;
363   }
364   if (ai[n] != 0) {
365     PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
366     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
367     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af);
368     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af);
369     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n");
370   } else {
371     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n");
372   }
373 
374   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
375   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
376 
377   ierr = PetscFree(fill);CHKERRQ(ierr);
378 
379   /* put together the new matrix */
380   ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr);
381   ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr);
382   ierr = MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);CHKERRQ(ierr);
383   PetscLogObjectParent(*B,isicol);
384   b    = (Mat_SeqAIJ*)(*B)->data;
385   ierr = PetscFree(b->imax);CHKERRQ(ierr);
386   b->singlemalloc = PETSC_FALSE;
387   /* the next line frees the default space generated by the Create() */
388   ierr = PetscFree(b->a);CHKERRQ(ierr);
389   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
390   ierr          = PetscMalloc((ainew[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
391   b->j          = ajnew;
392   b->i          = ainew;
393   b->diag       = idnew;
394   b->ilen       = 0;
395   b->imax       = 0;
396   b->row        = isrow;
397   b->col        = iscol;
398   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
399   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
400   b->icol       = isicol;
401   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
402   /* In b structure:  Free imax, ilen, old a, old j.
403      Allocate idnew, solve_work, new a, new j */
404   PetscLogObjectMemory(*B,(ainew[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));
405   b->maxnz = b->nz = ainew[n] ;
406 
407   (*B)->factor                 =  FACTOR_LU;
408   (*B)->info.factor_mallocs    = reallocs;
409   (*B)->info.fill_ratio_given  = f;
410   ierr = Mat_AIJ_CheckInode(*B,PETSC_FALSE);CHKERRQ(ierr);
411   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
412 
413   if (ai[n] != 0) {
414     (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
415   } else {
416     (*B)->info.fill_ratio_needed = 0.0;
417   }
418   PetscFunctionReturn(0);
419 }
420 /* ----------------------------------------------------------- */
421 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth);
422 
423 #include "src/ksp/pc/impls/factor/factor.h"
424 #undef __FUNCT__
425 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
426 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
427 {
428   Mat            C=*B;
429   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
430   IS             isrow = b->row,isicol = b->icol;
431   PetscErrorCode ierr;
432   PetscInt       *r,*ic,i,j,n=A->m,*ai=b->i,*aj=b->j;
433   PetscInt       *ajtmpold,*ajtmp,nz,row,*ics;
434   PetscInt       *diag_offset = b->diag,diag,*pj;
435   PetscScalar    *rtmp,*v,*pc,multiplier,*pv,*rtmps;
436   PetscReal      zeropivot,rs,d,shift_nonzero;
437   PetscReal      row_shift,shift_top=0.;
438   PetscTruth     shift_pd;
439   Shift_Ctx      sctx;
440   PetscInt       newshift;
441 
442   PetscFunctionBegin;
443   shift_nonzero  = info->shiftnz;
444   shift_pd       = info->shiftpd;
445   zeropivot      = info->zeropivot;
446 
447   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
448   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
449   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
450   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
451   rtmps = rtmp; ics = ic;
452 
453   if (!a->diag) {
454     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
455   }
456   /* if both shift schemes are chosen by user, only use shift_pd */
457   if (shift_pd && shift_nonzero) shift_nonzero = 0.0;
458   if (shift_pd) { /* set shift_top=max{row_shift} */
459     PetscInt *aai = a->i,*ddiag = a->diag;
460     shift_top = 0;
461     for (i=0; i<n; i++) {
462       d = PetscAbsScalar((a->a)[ddiag[i]]);
463       /* calculate amt of shift needed for this row */
464       if (d<=0) {
465         row_shift = 0;
466       } else {
467         row_shift = -2*d;
468       }
469       v  = a->a+aai[i];
470       nz = aai[i+1] - aai[i];
471       for (j=0; j<nz; j++)
472 	row_shift += PetscAbsScalar(v[j]);
473       if (row_shift>shift_top) shift_top = row_shift;
474     }
475   }
476 
477   sctx.shift_amount = 0;
478   sctx.shift_top    = shift_top;
479   sctx.nshift       = 0;
480   sctx.nshift_max   = 5;
481   sctx.shift_lo     = 0.;
482   sctx.shift_hi     = 1.;
483   do {
484     sctx.lushift = PETSC_FALSE;
485     for (i=0; i<n; i++){
486       nz    = ai[i+1] - ai[i];
487       ajtmp = aj + ai[i];
488       for  (j=0; j<nz; j++) rtmps[ajtmp[j]] = 0.0;
489 
490       /* load in initial (unfactored row) */
491       nz       = a->i[r[i]+1] - a->i[r[i]];
492       ajtmpold = a->j + a->i[r[i]];
493       v        = a->a + a->i[r[i]];
494       for (j=0; j<nz; j++) {
495         rtmp[ics[ajtmpold[j]]] = v[j];
496       }
497       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
498 
499       row = *ajtmp++;
500       while  (row < i) {
501         pc = rtmp + row;
502         if (*pc != 0.0) {
503           pv         = b->a + diag_offset[row];
504           pj         = b->j + diag_offset[row] + 1;
505           multiplier = *pc / *pv++;
506           *pc        = multiplier;
507           nz         = ai[row+1] - diag_offset[row] - 1;
508           for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
509           PetscLogFlops(2*nz);
510         }
511         row = *ajtmp++;
512       }
513       /* finished row so stick it into b->a */
514       pv   = b->a + ai[i] ;
515       pj   = b->j + ai[i] ;
516       nz   = ai[i+1] - ai[i];
517       diag = diag_offset[i] - ai[i];
518       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
519       rs   = 0.0;
520       for (j=0; j<nz; j++) {
521         pv[j] = rtmps[pj[j]];
522         if (j != diag) rs += PetscAbsScalar(pv[j]);
523       }
524 
525       sctx.rs  = rs;
526       sctx.pv  = pv[diag];
527       newshift = 0;
528       ierr = PCLUFactorCheckShift(A,info,B,&sctx,&newshift);CHKERRQ(ierr);
529       if (newshift == 1){
530         break;
531       } else if (newshift == -1){
532         SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs);
533       }
534     }
535 
536     if (shift_pd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
537       /*
538        * if no shift in this attempt & shifting & started shifting & can refine,
539        * then try lower shift
540        */
541       sctx.shift_hi       = info->shift_fraction;
542       info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
543       sctx.shift_amount   = info->shift_fraction * sctx.shift_top;
544       sctx.lushift        = PETSC_TRUE;
545       sctx.nshift++;
546     }
547   } while (sctx.lushift);
548 
549   /* invert diagonal entries for simplier triangular solves */
550   for (i=0; i<n; i++) {
551     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
552   }
553 
554   ierr = PetscFree(rtmp);CHKERRQ(ierr);
555   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
556   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
557   C->factor = FACTOR_LU;
558   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
559   C->assembled = PETSC_TRUE;
560   PetscLogFlops(C->n);
561   if (sctx.nshift){
562     if (shift_nonzero) {
563       PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of shift_nonzero tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount);
564     } else if (shift_pd) {
565       b->lu_shift_fraction = info->shift_fraction;
566       PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: diagonal shifted up by %e fraction top_value %e number shifts %D\n",info->shift_fraction,shift_top,sctx.nshift);
567     }
568   }
569   PetscFunctionReturn(0);
570 }
571 
572 #undef __FUNCT__
573 #define __FUNCT__ "MatUsePETSc_SeqAIJ"
574 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A)
575 {
576   PetscFunctionBegin;
577   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
578   A->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
579   PetscFunctionReturn(0);
580 }
581 
582 
583 /* ----------------------------------------------------------- */
584 #undef __FUNCT__
585 #define __FUNCT__ "MatLUFactor_SeqAIJ"
586 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
587 {
588   PetscErrorCode ierr;
589   Mat            C;
590 
591   PetscFunctionBegin;
592   ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr);
593   ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr);
594   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
595   PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);
596   PetscFunctionReturn(0);
597 }
598 /* ----------------------------------------------------------- */
599 #undef __FUNCT__
600 #define __FUNCT__ "MatSolve_SeqAIJ"
601 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
602 {
603   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
604   IS             iscol = a->col,isrow = a->row;
605   PetscErrorCode ierr;
606   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
607   PetscInt       nz,*rout,*cout;
608   PetscScalar    *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
609 
610   PetscFunctionBegin;
611   if (!n) PetscFunctionReturn(0);
612 
613   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
614   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
615   tmp  = a->solve_work;
616 
617   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
618   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
619 
620   /* forward solve the lower triangular */
621   tmp[0] = b[*r++];
622   tmps   = tmp;
623   for (i=1; i<n; i++) {
624     v   = aa + ai[i] ;
625     vi  = aj + ai[i] ;
626     nz  = a->diag[i] - ai[i];
627     sum = b[*r++];
628     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
629     tmp[i] = sum;
630   }
631 
632   /* backward solve the upper triangular */
633   for (i=n-1; i>=0; i--){
634     v   = aa + a->diag[i] + 1;
635     vi  = aj + a->diag[i] + 1;
636     nz  = ai[i+1] - a->diag[i] - 1;
637     sum = tmp[i];
638     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
639     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
640   }
641 
642   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
643   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
644   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
645   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
646   PetscLogFlops(2*a->nz - A->n);
647   PetscFunctionReturn(0);
648 }
649 
650 /* ----------------------------------------------------------- */
651 #undef __FUNCT__
652 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
653 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
654 {
655   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
656   PetscErrorCode ierr;
657   PetscInt       n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag;
658   PetscScalar    *x,*b,*aa = a->a;
659 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
660   PetscInt       adiag_i,i,*vi,nz,ai_i;
661   PetscScalar    *v,sum;
662 #endif
663 
664   PetscFunctionBegin;
665   if (!n) PetscFunctionReturn(0);
666 
667   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
668   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
669 
670 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
671   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
672 #else
673   /* forward solve the lower triangular */
674   x[0] = b[0];
675   for (i=1; i<n; i++) {
676     ai_i = ai[i];
677     v    = aa + ai_i;
678     vi   = aj + ai_i;
679     nz   = adiag[i] - ai_i;
680     sum  = b[i];
681     while (nz--) sum -= *v++ * x[*vi++];
682     x[i] = sum;
683   }
684 
685   /* backward solve the upper triangular */
686   for (i=n-1; i>=0; i--){
687     adiag_i = adiag[i];
688     v       = aa + adiag_i + 1;
689     vi      = aj + adiag_i + 1;
690     nz      = ai[i+1] - adiag_i - 1;
691     sum     = x[i];
692     while (nz--) sum -= *v++ * x[*vi++];
693     x[i]    = sum*aa[adiag_i];
694   }
695 #endif
696   PetscLogFlops(2*a->nz - A->n);
697   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
698   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
699   PetscFunctionReturn(0);
700 }
701 
702 #undef __FUNCT__
703 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
704 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
705 {
706   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
707   IS             iscol = a->col,isrow = a->row;
708   PetscErrorCode ierr;
709   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
710   PetscInt       nz,*rout,*cout;
711   PetscScalar    *x,*b,*tmp,*aa = a->a,sum,*v;
712 
713   PetscFunctionBegin;
714   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
715 
716   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
717   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
718   tmp  = a->solve_work;
719 
720   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
721   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
722 
723   /* forward solve the lower triangular */
724   tmp[0] = b[*r++];
725   for (i=1; i<n; i++) {
726     v   = aa + ai[i] ;
727     vi  = aj + ai[i] ;
728     nz  = a->diag[i] - ai[i];
729     sum = b[*r++];
730     while (nz--) sum -= *v++ * tmp[*vi++ ];
731     tmp[i] = sum;
732   }
733 
734   /* backward solve the upper triangular */
735   for (i=n-1; i>=0; i--){
736     v   = aa + a->diag[i] + 1;
737     vi  = aj + a->diag[i] + 1;
738     nz  = ai[i+1] - a->diag[i] - 1;
739     sum = tmp[i];
740     while (nz--) sum -= *v++ * tmp[*vi++ ];
741     tmp[i] = sum*aa[a->diag[i]];
742     x[*c--] += tmp[i];
743   }
744 
745   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
746   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
747   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
748   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
749   PetscLogFlops(2*a->nz);
750 
751   PetscFunctionReturn(0);
752 }
753 /* -------------------------------------------------------------------*/
754 #undef __FUNCT__
755 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
756 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
757 {
758   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
759   IS             iscol = a->col,isrow = a->row;
760   PetscErrorCode ierr;
761   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
762   PetscInt       nz,*rout,*cout,*diag = a->diag;
763   PetscScalar    *x,*b,*tmp,*aa = a->a,*v,s1;
764 
765   PetscFunctionBegin;
766   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
767   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
768   tmp  = a->solve_work;
769 
770   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
771   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
772 
773   /* copy the b into temp work space according to permutation */
774   for (i=0; i<n; i++) tmp[i] = b[c[i]];
775 
776   /* forward solve the U^T */
777   for (i=0; i<n; i++) {
778     v   = aa + diag[i] ;
779     vi  = aj + diag[i] + 1;
780     nz  = ai[i+1] - diag[i] - 1;
781     s1  = tmp[i];
782     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
783     while (nz--) {
784       tmp[*vi++ ] -= (*v++)*s1;
785     }
786     tmp[i] = s1;
787   }
788 
789   /* backward solve the L^T */
790   for (i=n-1; i>=0; i--){
791     v   = aa + diag[i] - 1 ;
792     vi  = aj + diag[i] - 1 ;
793     nz  = diag[i] - ai[i];
794     s1  = tmp[i];
795     while (nz--) {
796       tmp[*vi-- ] -= (*v--)*s1;
797     }
798   }
799 
800   /* copy tmp into x according to permutation */
801   for (i=0; i<n; i++) x[r[i]] = tmp[i];
802 
803   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
804   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
805   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
806   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
807 
808   PetscLogFlops(2*a->nz-A->n);
809   PetscFunctionReturn(0);
810 }
811 
812 #undef __FUNCT__
813 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
814 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
815 {
816   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
817   IS             iscol = a->col,isrow = a->row;
818   PetscErrorCode ierr;
819   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
820   PetscInt       nz,*rout,*cout,*diag = a->diag;
821   PetscScalar    *x,*b,*tmp,*aa = a->a,*v;
822 
823   PetscFunctionBegin;
824   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
825 
826   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
827   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
828   tmp = a->solve_work;
829 
830   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
831   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
832 
833   /* copy the b into temp work space according to permutation */
834   for (i=0; i<n; i++) tmp[i] = b[c[i]];
835 
836   /* forward solve the U^T */
837   for (i=0; i<n; i++) {
838     v   = aa + diag[i] ;
839     vi  = aj + diag[i] + 1;
840     nz  = ai[i+1] - diag[i] - 1;
841     tmp[i] *= *v++;
842     while (nz--) {
843       tmp[*vi++ ] -= (*v++)*tmp[i];
844     }
845   }
846 
847   /* backward solve the L^T */
848   for (i=n-1; i>=0; i--){
849     v   = aa + diag[i] - 1 ;
850     vi  = aj + diag[i] - 1 ;
851     nz  = diag[i] - ai[i];
852     while (nz--) {
853       tmp[*vi-- ] -= (*v--)*tmp[i];
854     }
855   }
856 
857   /* copy tmp into x according to permutation */
858   for (i=0; i<n; i++) x[r[i]] += tmp[i];
859 
860   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
861   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
862   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
863   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
864 
865   PetscLogFlops(2*a->nz);
866   PetscFunctionReturn(0);
867 }
868 /* ----------------------------------------------------------------*/
869 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat);
870 
871 #undef __FUNCT__
872 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
873 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
874 {
875   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
876   IS             isicol;
877   PetscErrorCode ierr;
878   PetscInt       *r,*ic,prow,n = A->m,*ai = a->i,*aj = a->j;
879   PetscInt       *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev;
880   PetscInt       *dloc,idx,row,m,fm,nzf,nzi,len, reallocs = 0,dcount = 0;
881   PetscInt       incrlev,nnz,i,levels,diagonal_fill;
882   PetscTruth     col_identity,row_identity;
883   PetscReal      f;
884 
885   PetscFunctionBegin;
886   f             = info->fill;
887   levels        = (PetscInt)info->levels;
888   diagonal_fill = (PetscInt)info->diagonal_fill;
889   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
890 
891   /* special case that simply copies fill pattern */
892   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
893   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
894   if (!levels && row_identity && col_identity) {
895     ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
896     (*fact)->factor = FACTOR_LU;
897     b               = (Mat_SeqAIJ*)(*fact)->data;
898     if (!b->diag) {
899       ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
900     }
901     ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
902     b->row              = isrow;
903     b->col              = iscol;
904     b->icol             = isicol;
905     ierr                = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
906     (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
907     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
908     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
909     PetscFunctionReturn(0);
910   }
911 
912   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
913   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
914 
915   /* get new row pointers */
916   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr);
917   ainew[0] = 0;
918   /* don't know how many column pointers are needed so estimate */
919   jmax = (PetscInt)(f*(ai[n]+1));
920   ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr);
921   /* ajfill is level of fill for each fill entry */
922   ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajfill);CHKERRQ(ierr);
923   /* fill is a linked list of nonzeros in active row */
924   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr);
925   /* im is level for each filled value */
926   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&im);CHKERRQ(ierr);
927   /* dloc is location of diagonal in factor */
928   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&dloc);CHKERRQ(ierr);
929   dloc[0]  = 0;
930   for (prow=0; prow<n; prow++) {
931 
932     /* copy current row into linked list */
933     nzf     = nz  = ai[r[prow]+1] - ai[r[prow]];
934     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
935     xi      = aj + ai[r[prow]];
936     fill[n] = n;
937     fill[prow] = -1; /* marker to indicate if diagonal exists */
938     while (nz--) {
939       fm  = n;
940       idx = ic[*xi++ ];
941       do {
942         m  = fm;
943         fm = fill[m];
944       } while (fm < idx);
945       fill[m]   = idx;
946       fill[idx] = fm;
947       im[idx]   = 0;
948     }
949 
950     /* make sure diagonal entry is included */
951     if (diagonal_fill && fill[prow] == -1) {
952       fm = n;
953       while (fill[fm] < prow) fm = fill[fm];
954       fill[prow] = fill[fm]; /* insert diagonal into linked list */
955       fill[fm]   = prow;
956       im[prow]   = 0;
957       nzf++;
958       dcount++;
959     }
960 
961     nzi = 0;
962     row = fill[n];
963     while (row < prow) {
964       incrlev = im[row] + 1;
965       nz      = dloc[row];
966       xi      = ajnew  + ainew[row]  + nz + 1;
967       flev    = ajfill + ainew[row]  + nz + 1;
968       nnz     = ainew[row+1] - ainew[row] - nz - 1;
969       fm      = row;
970       while (nnz-- > 0) {
971         idx = *xi++ ;
972         if (*flev + incrlev > levels) {
973           flev++;
974           continue;
975         }
976         do {
977           m  = fm;
978           fm = fill[m];
979         } while (fm < idx);
980         if (fm != idx) {
981           im[idx]   = *flev + incrlev;
982           fill[m]   = idx;
983           fill[idx] = fm;
984           fm        = idx;
985           nzf++;
986         } else {
987           if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev;
988         }
989         flev++;
990       }
991       row = fill[row];
992       nzi++;
993     }
994     /* copy new filled row into permanent storage */
995     ainew[prow+1] = ainew[prow] + nzf;
996     if (ainew[prow+1] > jmax) {
997 
998       /* estimate how much additional space we will need */
999       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
1000       /* just double the memory each time */
1001       /*  maxadd = (PetscInt)((f*(ai[n]+!shift)*(n-prow+5))/n); */
1002       PetscInt maxadd = jmax;
1003       if (maxadd < nzf) maxadd = (n-prow)*(nzf+1);
1004       jmax += maxadd;
1005 
1006       /* allocate a longer ajnew and ajfill */
1007       ierr   = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr);
1008       ierr   = PetscMemcpy(xi,ajnew,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr);
1009       ierr   = PetscFree(ajnew);CHKERRQ(ierr);
1010       ajnew  = xi;
1011       ierr   = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr);
1012       ierr   = PetscMemcpy(xi,ajfill,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr);
1013       ierr   = PetscFree(ajfill);CHKERRQ(ierr);
1014       ajfill = xi;
1015       reallocs++; /* count how many times we realloc */
1016     }
1017     xi          = ajnew + ainew[prow] ;
1018     flev        = ajfill + ainew[prow] ;
1019     dloc[prow]  = nzi;
1020     fm          = fill[n];
1021     while (nzf--) {
1022       *xi++   = fm ;
1023       *flev++ = im[fm];
1024       fm      = fill[fm];
1025     }
1026     /* make sure row has diagonal entry */
1027     if (ajnew[ainew[prow]+dloc[prow]] != prow) {
1028       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
1029     try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow);
1030     }
1031   }
1032   ierr = PetscFree(ajfill);CHKERRQ(ierr);
1033   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1034   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1035   ierr = PetscFree(fill);CHKERRQ(ierr);
1036   ierr = PetscFree(im);CHKERRQ(ierr);
1037 
1038   {
1039     PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
1040     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
1041     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af);
1042     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af);
1043     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n");
1044     if (diagonal_fill) {
1045       PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount);
1046     }
1047   }
1048 
1049   /* put together the new matrix */
1050   ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr);
1051   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
1052   ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr);
1053   PetscLogObjectParent(*fact,isicol);
1054   b = (Mat_SeqAIJ*)(*fact)->data;
1055   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1056   b->singlemalloc = PETSC_FALSE;
1057   len = (ainew[n] )*sizeof(PetscScalar);
1058   /* the next line frees the default space generated by the Create() */
1059   ierr = PetscFree(b->a);CHKERRQ(ierr);
1060   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1061   ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr);
1062   b->j          = ajnew;
1063   b->i          = ainew;
1064   for (i=0; i<n; i++) dloc[i] += ainew[i];
1065   b->diag       = dloc;
1066   b->ilen       = 0;
1067   b->imax       = 0;
1068   b->row        = isrow;
1069   b->col        = iscol;
1070   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1071   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1072   b->icol       = isicol;
1073   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1074   /* In b structure:  Free imax, ilen, old a, old j.
1075      Allocate dloc, solve_work, new a, new j */
1076   PetscLogObjectMemory(*fact,(ainew[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));
1077   b->maxnz             = b->nz = ainew[n] ;
1078   (*fact)->factor = FACTOR_LU;
1079   ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr);
1080   (*fact)->ops->lufactornumeric =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
1081   (*fact)->info.factor_mallocs    = reallocs;
1082   (*fact)->info.fill_ratio_given  = f;
1083   (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]);
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 #include "src/mat/impls/sbaij/seq/sbaij.h"
1088 #undef __FUNCT__
1089 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
1090 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
1091 {
1092   Mat            C = *B;
1093   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1094   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
1095   IS             ip=b->row;
1096   PetscErrorCode ierr;
1097   PetscInt       *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol;
1098   PetscInt       *ai=a->i,*aj=a->j;
1099   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1100   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1101   PetscReal      zeropivot,shift_amount,rs,shift_nonzero;
1102   PetscTruth     chshift,shift_pd;
1103   PetscInt       nshift=0;
1104 
1105   PetscFunctionBegin;
1106   shift_nonzero  = info->shiftnz;
1107   shift_pd       = info->shiftpd;
1108   zeropivot      = info->zeropivot;
1109 
1110   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
1111 
1112   /* initialization */
1113   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1114   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
1115   jl   = il + mbs;
1116   rtmp = (MatScalar*)(jl + mbs);
1117 
1118   shift_amount = 0;
1119   do {
1120     chshift = PETSC_FALSE;
1121     for (i=0; i<mbs; i++) {
1122       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1123     }
1124 
1125     for (k = 0; k<mbs; k++){
1126       bval = ba + bi[k];
1127       /* initialize k-th row by the perm[k]-th row of A */
1128       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1129       for (j = jmin; j < jmax; j++){
1130         col = rip[aj[j]];
1131         if (col >= k){ /* only take upper triangular entry */
1132           rtmp[col] = aa[j];
1133           *bval++  = 0.0; /* for in-place factorization */
1134         }
1135       }
1136       /* shift the diagonal of the matrix */
1137       if (nshift) rtmp[k] += shift_amount;
1138 
1139       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1140       dk = rtmp[k];
1141       i = jl[k]; /* first row to be added to k_th row  */
1142 
1143       while (i < k){
1144         nexti = jl[i]; /* next row to be added to k_th row */
1145 
1146         /* compute multiplier, update diag(k) and U(i,k) */
1147         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1148         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
1149         dk += uikdi*ba[ili];
1150         ba[ili] = uikdi; /* -U(i,k) */
1151 
1152         /* add multiple of row i to k-th row */
1153         jmin = ili + 1; jmax = bi[i+1];
1154         if (jmin < jmax){
1155           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1156           /* update il and jl for row i */
1157           il[i] = jmin;
1158           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1159         }
1160         i = nexti;
1161       }
1162 
1163       /* shift the diagonals when zero pivot is detected */
1164       /* compute rs=sum of abs(off-diagonal) */
1165       rs   = 0.0;
1166       jmin = bi[k]+1;
1167       nz   = bi[k+1] - jmin;
1168       if (nz){
1169         bcol = bj + jmin;
1170         while (nz--){
1171           rs += PetscAbsScalar(rtmp[*bcol++]);
1172         }
1173       }
1174       if (PetscAbsScalar(dk) <= zeropivot*rs && shift_nonzero){
1175         /* force |diag(*B)| > zeropivot*rs */
1176         if (!nshift){
1177           shift_amount = shift_nonzero;
1178         } else {
1179           shift_amount *= 2.0;
1180         }
1181         chshift = PETSC_TRUE;
1182         nshift++;
1183         break;
1184       } else if (PetscRealPart(dk) <= zeropivot*rs && shift_pd){
1185         /* calculate a shift that would make this row diagonally dominant */
1186 	shift_amount = PetscMax(rs+PetscAbs(PetscRealPart(dk)),1.1*shift_amount);
1187 	chshift      = PETSC_TRUE;
1188 	/* Unlike in the ILU case there is no exit condition on nshift:
1189 	   we increase the shift until it converges. There is no guarantee that
1190 	   this algorithm converges faster or slower, or is better or worse
1191 	   than the ILU algorithm. */
1192 	nshift++;
1193 	break;
1194       } else if (PetscAbsScalar(dk) <= zeropivot*rs){
1195         SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs);
1196       }
1197 
1198       /* copy data into U(k,:) */
1199       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1200       jmin = bi[k]+1; jmax = bi[k+1];
1201       if (jmin < jmax) {
1202         for (j=jmin; j<jmax; j++){
1203           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1204         }
1205         /* add the k-th row into il and jl */
1206         il[k] = jmin;
1207         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1208       }
1209     }
1210   } while (chshift);
1211   ierr = PetscFree(il);CHKERRQ(ierr);
1212 
1213   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
1214   C->factor       = FACTOR_CHOLESKY;
1215   C->assembled    = PETSC_TRUE;
1216   C->preallocated = PETSC_TRUE;
1217   PetscLogFlops(C->m);
1218   if (nshift){
1219     if (shift_nonzero) {
1220       PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shift_nonzero tries %D, shift_amount %g\n",nshift,shift_amount);
1221     } else if (shift_pd) {
1222       PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shift_pd tries %D, shift_amount %g\n",nshift,shift_amount);
1223     }
1224   }
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 #include "petscbt.h"
1229 #include "src/mat/utils/freespace.h"
1230 #undef __FUNCT__
1231 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
1232 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1233 {
1234   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1235   Mat_SeqSBAIJ   *b;
1236   Mat            B;
1237   PetscErrorCode ierr;
1238   PetscTruth     perm_identity;
1239   PetscInt       reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui;
1240   PetscInt       jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1241   PetscInt       nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1242   PetscInt       ncols,ncols_upper,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr;
1243   PetscReal      fill=info->fill,levels=info->levels;
1244   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1245   FreeSpaceList  free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1246   PetscBT        lnkbt;
1247 
1248   PetscFunctionBegin;
1249   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1250   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1251 
1252   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1253   ui[0] = 0;
1254 
1255   /* special case that simply copies fill pattern */
1256   if (!levels && perm_identity) {
1257     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
1258     for (i=0; i<am; i++) {
1259       ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1260     }
1261     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1262     cols = uj;
1263     for (i=0; i<am; i++) {
1264       aj    = a->j + a->diag[i];
1265       ncols = ui[i+1] - ui[i];
1266       for (j=0; j<ncols; j++) *cols++ = *aj++;
1267     }
1268   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1269     /* initialization */
1270     ierr  = PetscMalloc((2*am+1)*sizeof(PetscInt),&cols_lvl);CHKERRQ(ierr);
1271 
1272     /* jl: linked list for storing indices of the pivot rows
1273        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1274     ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr);
1275     il         = jl + am;
1276     uj_ptr     = (PetscInt**)(il + am);
1277     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1278     for (i=0; i<am; i++){
1279       jl[i] = am; il[i] = 0;
1280     }
1281 
1282     /* create and initialize a linked list for storing column indices of the active row k */
1283     nlnk = am + 1;
1284     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1285 
1286     /* initial FreeSpace size is fill*(ai[am]+1) */
1287     ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1288     current_space = free_space;
1289     ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
1290     current_space_lvl = free_space_lvl;
1291 
1292     for (k=0; k<am; k++){  /* for each active row k */
1293       /* initialize lnk by the column indices of row rip[k] of A */
1294       nzk   = 0;
1295       ncols = ai[rip[k]+1] - ai[rip[k]];
1296       ncols_upper = 0;
1297       cols        = cols_lvl + am;
1298       for (j=0; j<ncols; j++){
1299         i = rip[*(aj + ai[rip[k]] + j)];
1300         if (i >= k){ /* only take upper triangular entry */
1301           cols[ncols_upper] = i;
1302           cols_lvl[ncols_upper] = -1;  /* initialize level for nonzero entries */
1303           ncols_upper++;
1304         }
1305       }
1306       ierr = PetscIncompleteLLAdd(ncols_upper,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1307       nzk += nlnk;
1308 
1309       /* update lnk by computing fill-in for each pivot row to be merged in */
1310       prow = jl[k]; /* 1st pivot row */
1311 
1312       while (prow < k){
1313         nextprow = jl[prow];
1314 
1315         /* merge prow into k-th row */
1316         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1317         jmax = ui[prow+1];
1318         ncols = jmax-jmin;
1319         i     = jmin - ui[prow];
1320         cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1321         for (j=0; j<ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + i + j);
1322         ierr = PetscIncompleteLLAdd(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1323         nzk += nlnk;
1324 
1325         /* update il and jl for prow */
1326         if (jmin < jmax){
1327           il[prow] = jmin;
1328           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1329         }
1330         prow = nextprow;
1331       }
1332 
1333       /* if free space is not available, make more free space */
1334       if (current_space->local_remaining<nzk) {
1335         i = am - k + 1; /* num of unfactored rows */
1336         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1337         ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
1338         ierr = GetMoreSpace(i,&current_space_lvl);CHKERRQ(ierr);
1339         reallocs++;
1340       }
1341 
1342       /* copy data into free_space and free_space_lvl, then initialize lnk */
1343       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1344 
1345       /* add the k-th row into il and jl */
1346       if (nzk > 1){
1347         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1348         jl[k] = jl[i]; jl[i] = k;
1349         il[k] = ui[k] + 1;
1350       }
1351       uj_ptr[k]     = current_space->array;
1352       uj_lvl_ptr[k] = current_space_lvl->array;
1353 
1354       current_space->array           += nzk;
1355       current_space->local_used      += nzk;
1356       current_space->local_remaining -= nzk;
1357 
1358       current_space_lvl->array           += nzk;
1359       current_space_lvl->local_used      += nzk;
1360       current_space_lvl->local_remaining -= nzk;
1361 
1362       ui[k+1] = ui[k] + nzk;
1363     }
1364 
1365     if (ai[am] != 0) {
1366       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1367       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af);
1368       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af);
1369       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af);
1370     } else {
1371       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n");
1372     }
1373 
1374     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1375     ierr = PetscFree(jl);CHKERRQ(ierr);
1376     ierr = PetscFree(cols_lvl);CHKERRQ(ierr);
1377 
1378     /* destroy list of free space and other temporary array(s) */
1379     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1380     ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1381     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1382     ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr);
1383 
1384   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
1385 
1386   /* put together the new matrix in MATSEQSBAIJ format */
1387   ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr);
1388   B = *fact;
1389   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1390   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
1391 
1392   b    = (Mat_SeqSBAIJ*)B->data;
1393   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1394   b->singlemalloc = PETSC_FALSE;
1395   /* the next line frees the default space generated by the Create() */
1396   ierr = PetscFree(b->a);CHKERRQ(ierr);
1397   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1398   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1399   b->j    = uj;
1400   b->i    = ui;
1401   b->diag = 0;
1402   b->ilen = 0;
1403   b->imax = 0;
1404   b->row  = perm;
1405   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1406   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1407   b->icol = perm;
1408   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1409   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1410   PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
1411   b->maxnz = b->nz = ui[am];
1412 
1413   B->factor                 = FACTOR_CHOLESKY;
1414   B->info.factor_mallocs    = reallocs;
1415   B->info.fill_ratio_given  = fill;
1416   if (ai[am] != 0) {
1417     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1418   } else {
1419     B->info.fill_ratio_needed = 0.0;
1420   }
1421   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1422   if (perm_identity){
1423     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1424     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1425   }
1426   PetscFunctionReturn(0);
1427 }
1428 
1429 #undef __FUNCT__
1430 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
1431 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1432 {
1433   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1434   Mat_SeqSBAIJ   *b;
1435   Mat            B;
1436   PetscErrorCode ierr;
1437   PetscTruth     perm_identity;
1438   PetscReal      fill = info->fill;
1439   PetscInt       *rip,*riip,i,mbs=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow;
1440   PetscInt       *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1441   PetscInt       nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1442   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1443   PetscBT        lnkbt;
1444   IS             iperm;
1445 
1446   PetscFunctionBegin;
1447   /* check whether perm is the identity mapping */
1448   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1449   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1450 
1451   if (!perm_identity){
1452     /* check if perm is symmetric! */
1453     ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1454     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1455     for (i=0; i<mbs; i++) {
1456       if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation");
1457     }
1458     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1459     ierr = ISDestroy(iperm);CHKERRQ(ierr);
1460   }
1461 
1462   /* initialization */
1463   ierr  = PetscMalloc((mbs+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1464   ui[0] = 0;
1465 
1466   /* jl: linked list for storing indices of the pivot rows
1467      il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
1468   ierr = PetscMalloc((3*mbs+1)*sizeof(PetscInt)+mbs*sizeof(PetscInt*),&jl);CHKERRQ(ierr);
1469   il     = jl + mbs;
1470   cols   = il + mbs;
1471   ui_ptr = (PetscInt**)(cols + mbs);
1472   for (i=0; i<mbs; i++){
1473     jl[i] = mbs; il[i] = 0;
1474   }
1475 
1476   /* create and initialize a linked list for storing column indices of the active row k */
1477   nlnk = mbs + 1;
1478   ierr = PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1479 
1480   /* initial FreeSpace size is fill*(ai[mbs]+1) */
1481   ierr = GetMoreSpace((PetscInt)(fill*(ai[mbs]+1)),&free_space);CHKERRQ(ierr);
1482   current_space = free_space;
1483 
1484   for (k=0; k<mbs; k++){  /* for each active row k */
1485     /* initialize lnk by the column indices of row rip[k] of A */
1486     nzk   = 0;
1487     ncols = ai[rip[k]+1] - ai[rip[k]];
1488     ncols_upper = 0;
1489     for (j=0; j<ncols; j++){
1490       i = rip[*(aj + ai[rip[k]] + j)];
1491       if (i >= k){ /* only take upper triangular entry */
1492         cols[ncols_upper] = i;
1493         ncols_upper++;
1494       }
1495     }
1496     ierr = PetscLLAdd(ncols_upper,cols,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1497     nzk += nlnk;
1498 
1499     /* update lnk by computing fill-in for each pivot row to be merged in */
1500     prow = jl[k]; /* 1st pivot row */
1501 
1502     while (prow < k){
1503       nextprow = jl[prow];
1504       /* merge prow into k-th row */
1505       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
1506       jmax = ui[prow+1];
1507       ncols = jmax-jmin;
1508       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
1509       ierr = PetscLLAdd(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1510       nzk += nlnk;
1511 
1512       /* update il and jl for prow */
1513       if (jmin < jmax){
1514         il[prow] = jmin;
1515         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1516       }
1517       prow = nextprow;
1518     }
1519 
1520     /* if free space is not available, make more free space */
1521     if (current_space->local_remaining<nzk) {
1522       i = mbs - k + 1; /* num of unfactored rows */
1523       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1524       ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
1525       reallocs++;
1526     }
1527 
1528     /* copy data into free space, then initialize lnk */
1529     ierr = PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1530 
1531     /* add the k-th row into il and jl */
1532     if (nzk-1 > 0){
1533       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
1534       jl[k] = jl[i]; jl[i] = k;
1535       il[k] = ui[k] + 1;
1536     }
1537     ui_ptr[k] = current_space->array;
1538     current_space->array           += nzk;
1539     current_space->local_used      += nzk;
1540     current_space->local_remaining -= nzk;
1541 
1542     ui[k+1] = ui[k] + nzk;
1543   }
1544 
1545   if (ai[mbs] != 0) {
1546     PetscReal af = ((PetscReal)(2*ui[mbs]-mbs))/((PetscReal)ai[mbs]);
1547     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af);
1548     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af);
1549     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af);
1550   } else {
1551      PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n");
1552   }
1553 
1554   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1555   ierr = PetscFree(jl);CHKERRQ(ierr);
1556 
1557   /* destroy list of free space and other temporary array(s) */
1558   ierr = PetscMalloc((ui[mbs]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1559   ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1560   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1561 
1562   /* put together the new matrix in MATSEQSBAIJ format */
1563   ierr = MatCreate(PETSC_COMM_SELF,mbs,mbs,mbs,mbs,fact);CHKERRQ(ierr);
1564   B    = *fact;
1565   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1566   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
1567 
1568   b = (Mat_SeqSBAIJ*)B->data;
1569   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1570   b->singlemalloc = PETSC_FALSE;
1571   /* the next line frees the default space generated by the Create() */
1572   ierr = PetscFree(b->a);CHKERRQ(ierr);
1573   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1574   ierr = PetscMalloc((ui[mbs]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1575   b->j    = uj;
1576   b->i    = ui;
1577   b->diag = 0;
1578   b->ilen = 0;
1579   b->imax = 0;
1580   b->row  = perm;
1581   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1582   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1583   b->icol = perm;
1584   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1585   ierr    = PetscMalloc((mbs+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1586   PetscLogObjectMemory(B,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));
1587   b->maxnz = b->nz = ui[mbs];
1588 
1589   B->factor                 = FACTOR_CHOLESKY;
1590   B->info.factor_mallocs    = reallocs;
1591   B->info.fill_ratio_given  = fill;
1592   if (ai[mbs] != 0) {
1593     B->info.fill_ratio_needed = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]);
1594   } else {
1595     B->info.fill_ratio_needed = 0.0;
1596   }
1597   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1598   if (perm_identity){
1599     (*fact)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1600     (*fact)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1601   }
1602   PetscFunctionReturn(0);
1603 }
1604