xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision b3bf805bc5fa6ada2d57fa222e3d33ae005e5770)
1 
2 #include "src/mat/impls/aij/seq/aij.h"
3 #include "src/inline/dot.h"
4 #include "src/inline/spops.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ"
8 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
9 {
10   PetscFunctionBegin;
11 
12   SETERRQ(PETSC_ERR_SUP,"Code not written");
13 #if !defined(PETSC_USE_DEBUG)
14   PetscFunctionReturn(0);
15 #endif
16 }
17 
18 
19 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat);
20 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth);
21 
22 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
23 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
24 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*);
25 
26 #undef __FUNCT__
27 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
28   /* ------------------------------------------------------------
29 
30           This interface was contribed by Tony Caola
31 
32      This routine is an interface to the pivoting drop-tolerance
33      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
34      SPARSEKIT2.
35 
36      The SPARSEKIT2 routines used here are covered by the GNU
37      copyright; see the file gnu in this directory.
38 
39      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
40      help in getting this routine ironed out.
41 
42      The major drawback to this routine is that if info->fill is
43      not large enough it fails rather than allocating more space;
44      this can be fixed by hacking/improving the f2c version of
45      Yousef Saad's code.
46 
47      ------------------------------------------------------------
48 */
49 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact)
50 {
51 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
52   PetscFunctionBegin;
53   SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
54   You can obtain the drop tolerance routines by installing PETSc from\n\
55   www.mcs.anl.gov/petsc\n");
56 #else
57   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
58   IS             iscolf,isicol,isirow;
59   PetscTruth     reorder;
60   PetscErrorCode ierr,sierr;
61   PetscInt       *c,*r,*ic,i,n = A->m;
62   PetscInt       *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
63   PetscInt       *ordcol,*iwk,*iperm,*jw;
64   PetscInt       jmax,lfill,job,*o_i,*o_j;
65   PetscScalar    *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
66   PetscReal      af;
67 
68   PetscFunctionBegin;
69 
70   if (info->dt == PETSC_DEFAULT)      info->dt      = .005;
71   if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax);
72   if (info->dtcol == PETSC_DEFAULT)   info->dtcol   = .01;
73   if (info->fill == PETSC_DEFAULT)    info->fill    = ((double)(n*(info->dtcount+1)))/a->nz;
74   lfill   = (PetscInt)(info->dtcount/2.0);
75   jmax    = (PetscInt)(info->fill*a->nz);
76 
77 
78   /* ------------------------------------------------------------
79      If reorder=.TRUE., then the original matrix has to be
80      reordered to reflect the user selected ordering scheme, and
81      then de-reordered so it is in it's original format.
82      Because Saad's dperm() is NOT in place, we have to copy
83      the original matrix and allocate more storage. . .
84      ------------------------------------------------------------
85   */
86 
87   /* set reorder to true if either isrow or iscol is not identity */
88   ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr);
89   if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);}
90   reorder = PetscNot(reorder);
91 
92 
93   /* storage for ilu factor */
94   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr);
95   ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr);
96   ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr);
97   ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr);
98 
99   /* ------------------------------------------------------------
100      Make sure that everything is Fortran formatted (1-Based)
101      ------------------------------------------------------------
102   */
103   for (i=old_i[0];i<old_i[n];i++) {
104     old_j[i]++;
105   }
106   for(i=0;i<n+1;i++) {
107     old_i[i]++;
108   };
109 
110 
111   if (reorder) {
112     ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr);
113     ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
114     for(i=0;i<n;i++) {
115       r[i]  = r[i]+1;
116       c[i]  = c[i]+1;
117     }
118     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr);
119     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr);
120     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr);
121     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
122     for (i=0;i<n;i++) {
123       r[i]  = r[i]-1;
124       c[i]  = c[i]-1;
125     }
126     ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr);
127     ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
128     o_a = old_a2;
129     o_j = old_j2;
130     o_i = old_i2;
131   } else {
132     o_a = old_a;
133     o_j = old_j;
134     o_i = old_i;
135   }
136 
137   /* ------------------------------------------------------------
138      Call Saad's ilutp() routine to generate the factorization
139      ------------------------------------------------------------
140   */
141 
142   ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr);
143   ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr);
144   ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr);
145 
146   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
147   if (sierr) {
148     switch (sierr) {
149       case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
150       case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
151       case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
152       case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
153       case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax);
154       default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
155     }
156   }
157 
158   ierr = PetscFree(w);CHKERRQ(ierr);
159   ierr = PetscFree(jw);CHKERRQ(ierr);
160 
161   /* ------------------------------------------------------------
162      Saad's routine gives the result in Modified Sparse Row (msr)
163      Convert to Compressed Sparse Row format (csr)
164      ------------------------------------------------------------
165   */
166 
167   ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr);
168   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr);
169 
170   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
171 
172   ierr = PetscFree(iwk);CHKERRQ(ierr);
173   ierr = PetscFree(wk);CHKERRQ(ierr);
174 
175   if (reorder) {
176     ierr = PetscFree(old_a2);CHKERRQ(ierr);
177     ierr = PetscFree(old_j2);CHKERRQ(ierr);
178     ierr = PetscFree(old_i2);CHKERRQ(ierr);
179   } else {
180     /* fix permutation of old_j that the factorization introduced */
181     for (i=old_i[0]; i<old_i[n]; i++) {
182       old_j[i-1] = iperm[old_j[i-1]-1];
183     }
184   }
185 
186   /* get rid of the shift to indices starting at 1 */
187   for (i=0; i<n+1; i++) {
188     old_i[i]--;
189   }
190   for (i=old_i[0];i<old_i[n];i++) {
191     old_j[i]--;
192   }
193 
194   /* Make the factored matrix 0-based */
195   for (i=0; i<n+1; i++) {
196     new_i[i]--;
197   }
198   for (i=new_i[0];i<new_i[n];i++) {
199     new_j[i]--;
200   }
201 
202   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
203   /*-- permute the right-hand-side and solution vectors           --*/
204   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
205   ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr);
206   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
207   for(i=0; i<n; i++) {
208     ordcol[i] = ic[iperm[i]-1];
209   };
210   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
211   ierr = ISDestroy(isicol);CHKERRQ(ierr);
212 
213   ierr = PetscFree(iperm);CHKERRQ(ierr);
214 
215   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr);
216   ierr = PetscFree(ordcol);CHKERRQ(ierr);
217 
218   /*----- put together the new matrix -----*/
219 
220   ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr);
221   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
222   ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr);
223   (*fact)->factor    = FACTOR_LU;
224   (*fact)->assembled = PETSC_TRUE;
225 
226   b = (Mat_SeqAIJ*)(*fact)->data;
227   ierr = PetscFree(b->imax);CHKERRQ(ierr);
228   b->sorted        = PETSC_FALSE;
229   b->singlemalloc  = PETSC_FALSE;
230   /* the next line frees the default space generated by the MatCreate() */
231   ierr             = PetscFree(b->a);CHKERRQ(ierr);
232   ierr             = PetscFree(b->ilen);CHKERRQ(ierr);
233   b->a             = new_a;
234   b->j             = new_j;
235   b->i             = new_i;
236   b->ilen          = 0;
237   b->imax          = 0;
238   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
239   b->row           = isirow;
240   b->col           = iscolf;
241   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
242   b->maxnz = b->nz = new_i[n];
243   ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
244   (*fact)->info.factor_mallocs = 0;
245 
246   ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
247 
248   /* check out for identical nodes. If found, use inode functions */
249   ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr);
250 
251   af = ((double)b->nz)/((double)a->nz) + .001;
252   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af);
253   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af);
254   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af);
255   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n");
256 
257   PetscFunctionReturn(0);
258 #endif
259 }
260 
261 /*
262     Factorization code for AIJ format.
263 */
264 #undef __FUNCT__
265 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
266 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
267 {
268   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
269   IS             isicol;
270   PetscErrorCode ierr;
271   PetscInt       *r,*ic,i,n = A->m,*ai = a->i,*aj = a->j;
272   PetscInt       *ainew,*ajnew,jmax,*fill,*ajtmp,nz;
273   PetscInt       *idnew,idx,row,m,fm,nnz,nzi,reallocs = 0,nzbd,*im;
274   PetscReal      f;
275 
276   PetscFunctionBegin;
277   if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
278   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
279   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
280   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
281 
282   /* get new row pointers */
283   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr);
284   ainew[0] = 0;
285   /* don't know how many column pointers are needed so estimate */
286   f = info->fill;
287   jmax  = (PetscInt)(f*ai[n]+1);
288   ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr);
289   /* fill is a linked list of nonzeros in active row */
290   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr);
291   im   = fill + n + 1;
292   /* idnew is location of diagonal in factor */
293   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&idnew);CHKERRQ(ierr);
294   idnew[0] = 0;
295 
296   for (i=0; i<n; i++) {
297     /* first copy previous fill into linked list */
298     nnz     = nz    = ai[r[i]+1] - ai[r[i]];
299     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
300     ajtmp   = aj + ai[r[i]];
301     fill[n] = n;
302     while (nz--) {
303       fm  = n;
304       idx = ic[*ajtmp++];
305       do {
306         m  = fm;
307         fm = fill[m];
308       } while (fm < idx);
309       fill[m]   = idx;
310       fill[idx] = fm;
311     }
312     row = fill[n];
313     while (row < i) {
314       ajtmp = ajnew + idnew[row] + 1;
315       nzbd  = 1 + idnew[row] - ainew[row];
316       nz    = im[row] - nzbd;
317       fm    = row;
318       while (nz-- > 0) {
319         idx = *ajtmp++ ;
320         nzbd++;
321         if (idx == i) im[row] = nzbd;
322         do {
323           m  = fm;
324           fm = fill[m];
325         } while (fm < idx);
326         if (fm != idx) {
327           fill[m]   = idx;
328           fill[idx] = fm;
329           fm        = idx;
330           nnz++;
331         }
332       }
333       row = fill[row];
334     }
335     /* copy new filled row into permanent storage */
336     ainew[i+1] = ainew[i] + nnz;
337     if (ainew[i+1] > jmax) {
338       /* estimate how much additional space we will need */
339       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
340       /* just double the memory each time */
341       PetscInt maxadd = jmax;
342       /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */
343       if (maxadd < nnz) maxadd = (n-i)*(nnz+1);
344       jmax += maxadd;
345 
346       /* allocate a longer ajnew */
347       ierr  = PetscMalloc(jmax*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
348       ierr  = PetscMemcpy(ajtmp,ajnew,(ainew[i])*sizeof(PetscInt));CHKERRQ(ierr);
349       ierr  = PetscFree(ajnew);CHKERRQ(ierr);
350       ajnew = ajtmp;
351       reallocs++; /* count how many times we realloc */
352     }
353     ajtmp = ajnew + ainew[i];
354     fm    = fill[n];
355     nzi   = 0;
356     im[i] = nnz;
357     while (nnz--) {
358       if (fm < i) nzi++;
359       *ajtmp++ = fm ;
360       fm       = fill[fm];
361     }
362     idnew[i] = ainew[i] + nzi;
363   }
364   if (ai[n] != 0) {
365     PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
366     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
367     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af);
368     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af);
369     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n");
370   } else {
371     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n");
372   }
373 
374   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
375   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
376 
377   ierr = PetscFree(fill);CHKERRQ(ierr);
378 
379   /* put together the new matrix */
380   ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr);
381   ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr);
382   ierr = MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);CHKERRQ(ierr);
383   PetscLogObjectParent(*B,isicol);
384   b    = (Mat_SeqAIJ*)(*B)->data;
385   ierr = PetscFree(b->imax);CHKERRQ(ierr);
386   b->singlemalloc = PETSC_FALSE;
387   /* the next line frees the default space generated by the Create() */
388   ierr = PetscFree(b->a);CHKERRQ(ierr);
389   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
390   ierr          = PetscMalloc((ainew[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
391   b->j          = ajnew;
392   b->i          = ainew;
393   b->diag       = idnew;
394   b->ilen       = 0;
395   b->imax       = 0;
396   b->row        = isrow;
397   b->col        = iscol;
398   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
399   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
400   b->icol       = isicol;
401   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
402   /* In b structure:  Free imax, ilen, old a, old j.
403      Allocate idnew, solve_work, new a, new j */
404   PetscLogObjectMemory(*B,(ainew[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));
405   b->maxnz = b->nz = ainew[n] ;
406 
407   (*B)->factor                 =  FACTOR_LU;
408   (*B)->info.factor_mallocs    = reallocs;
409   (*B)->info.fill_ratio_given  = f;
410   ierr = Mat_AIJ_CheckInode(*B,PETSC_FALSE);CHKERRQ(ierr);
411   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
412 
413   if (ai[n] != 0) {
414     (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
415   } else {
416     (*B)->info.fill_ratio_needed = 0.0;
417   }
418   PetscFunctionReturn(0);
419 }
420 /* ----------------------------------------------------------- */
421 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth);
422 
423 #undef __FUNCT__
424 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
425 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
426 {
427   Mat            C=*B;
428   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
429   IS             isrow = b->row,isicol = b->icol;
430   PetscErrorCode ierr;
431   PetscInt       *r,*ic,i,j,n=A->m,*bi=b->i,*bj=b->j;
432   PetscInt       *ajtmp,*bjtmp,nz,row,*ics;
433   PetscInt       *diag_offset = b->diag,diag,*pj;
434   PetscScalar    *rtmp,*v,*pc,multiplier,*pv,*rtmps;
435   PetscReal      zeropivot,rs,d,shift_nonzero;
436   PetscReal      row_shift,shift_top=0.;
437   PetscTruth     shift_pd;
438   LUShift_Ctx    sctx;
439   PetscInt       newshift;
440 
441   PetscFunctionBegin;
442   shift_nonzero  = info->shiftnz;
443   shift_pd       = info->shiftpd;
444   zeropivot      = info->zeropivot;
445 
446   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
447   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
448   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
449   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
450   rtmps = rtmp; ics = ic;
451 
452   if (!a->diag) {
453     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
454   }
455   /* if both shift schemes are chosen by user, only use shift_pd */
456   if (shift_pd && shift_nonzero) shift_nonzero = 0.0;
457   if (shift_pd) { /* set shift_top=max{row_shift} */
458     PetscInt *aai = a->i,*ddiag = a->diag;
459     shift_top = 0;
460     for (i=0; i<n; i++) {
461       d = PetscAbsScalar((a->a)[ddiag[i]]);
462       /* calculate amt of shift needed for this row */
463       if (d<=0) {
464         row_shift = 0;
465       } else {
466         row_shift = -2*d;
467       }
468       v  = a->a+aai[i];
469       nz = aai[i+1] - aai[i];
470       for (j=0; j<nz; j++)
471 	row_shift += PetscAbsScalar(v[j]);
472       if (row_shift>shift_top) shift_top = row_shift;
473     }
474   }
475 
476   sctx.shift_amount = 0;
477   sctx.shift_top    = shift_top;
478   sctx.nshift       = 0;
479   sctx.nshift_max   = 5;
480   sctx.shift_lo     = 0.;
481   sctx.shift_hi     = 1.;
482   do {
483     sctx.lushift = PETSC_FALSE;
484     for (i=0; i<n; i++){
485       nz    = bi[i+1] - bi[i];
486       bjtmp = bj + bi[i];
487       for  (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0;
488 
489       /* load in initial (unfactored row) */
490       nz    = a->i[r[i]+1] - a->i[r[i]];
491       ajtmp = a->j + a->i[r[i]];
492       v     = a->a + a->i[r[i]];
493       for (j=0; j<nz; j++) {
494         rtmp[ics[ajtmp[j]]] = v[j];
495       }
496       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
497 
498       row = *bjtmp++;
499       while  (row < i) {
500         pc = rtmp + row;
501         if (*pc != 0.0) {
502           pv         = b->a + diag_offset[row];
503           pj         = b->j + diag_offset[row] + 1;
504           multiplier = *pc / *pv++;
505           *pc        = multiplier;
506           nz         = bi[row+1] - diag_offset[row] - 1;
507           for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
508           PetscLogFlops(2*nz);
509         }
510         row = *bjtmp++;
511       }
512       /* finished row so stick it into b->a */
513       pv   = b->a + bi[i] ;
514       pj   = b->j + bi[i] ;
515       nz   = bi[i+1] - bi[i];
516       diag = diag_offset[i] - bi[i];
517       rs   = 0.0;
518       for (j=0; j<nz; j++) {
519         pv[j] = rtmps[pj[j]];
520         if (j != diag) rs += PetscAbsScalar(pv[j]);
521       }
522 
523       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
524       sctx.rs  = rs;
525       sctx.pv  = pv[diag];
526       newshift = 0;
527       ierr = Mat_LUFactorCheckShift(info,&sctx,&newshift);CHKERRQ(ierr);
528       if (newshift == 1){
529         break;    /* sctx.shift_amount is updated */
530       } else if (newshift == -1){
531         SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs);
532       }
533     }
534 
535     if (shift_pd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
536       /*
537        * if no shift in this attempt & shifting & started shifting & can refine,
538        * then try lower shift
539        */
540       sctx.shift_hi       = info->shift_fraction;
541       info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
542       sctx.shift_amount   = info->shift_fraction * sctx.shift_top;
543       sctx.lushift        = PETSC_TRUE;
544       sctx.nshift++;
545     }
546   } while (sctx.lushift);
547 
548   /* invert diagonal entries for simplier triangular solves */
549   for (i=0; i<n; i++) {
550     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
551   }
552 
553   ierr = PetscFree(rtmp);CHKERRQ(ierr);
554   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
555   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
556   C->factor = FACTOR_LU;
557   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
558   C->assembled = PETSC_TRUE;
559   PetscLogFlops(C->n);
560   if (sctx.nshift){
561     if (shift_nonzero) {
562       PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of shift_nonzero tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount);
563     } else if (shift_pd) {
564       b->lu_shift_fraction = info->shift_fraction;
565       PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: diagonal shifted up by %e fraction top_value %e number shifts %D\n",info->shift_fraction,shift_top,sctx.nshift);
566     }
567   }
568   PetscFunctionReturn(0);
569 }
570 
571 #undef __FUNCT__
572 #define __FUNCT__ "MatUsePETSc_SeqAIJ"
573 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A)
574 {
575   PetscFunctionBegin;
576   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
577   A->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
578   PetscFunctionReturn(0);
579 }
580 
581 
582 /* ----------------------------------------------------------- */
583 #undef __FUNCT__
584 #define __FUNCT__ "MatLUFactor_SeqAIJ"
585 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
586 {
587   PetscErrorCode ierr;
588   Mat            C;
589 
590   PetscFunctionBegin;
591   ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr);
592   ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr);
593   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
594   PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);
595   PetscFunctionReturn(0);
596 }
597 /* ----------------------------------------------------------- */
598 #undef __FUNCT__
599 #define __FUNCT__ "MatSolve_SeqAIJ"
600 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
601 {
602   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
603   IS             iscol = a->col,isrow = a->row;
604   PetscErrorCode ierr;
605   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
606   PetscInt       nz,*rout,*cout;
607   PetscScalar    *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
608 
609   PetscFunctionBegin;
610   if (!n) PetscFunctionReturn(0);
611 
612   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
613   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
614   tmp  = a->solve_work;
615 
616   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
617   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
618 
619   /* forward solve the lower triangular */
620   tmp[0] = b[*r++];
621   tmps   = tmp;
622   for (i=1; i<n; i++) {
623     v   = aa + ai[i] ;
624     vi  = aj + ai[i] ;
625     nz  = a->diag[i] - ai[i];
626     sum = b[*r++];
627     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
628     tmp[i] = sum;
629   }
630 
631   /* backward solve the upper triangular */
632   for (i=n-1; i>=0; i--){
633     v   = aa + a->diag[i] + 1;
634     vi  = aj + a->diag[i] + 1;
635     nz  = ai[i+1] - a->diag[i] - 1;
636     sum = tmp[i];
637     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
638     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
639   }
640 
641   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
642   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
643   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
644   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
645   PetscLogFlops(2*a->nz - A->n);
646   PetscFunctionReturn(0);
647 }
648 
649 /* ----------------------------------------------------------- */
650 #undef __FUNCT__
651 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
652 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
653 {
654   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
655   PetscErrorCode ierr;
656   PetscInt       n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag;
657   PetscScalar    *x,*b,*aa = a->a;
658 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
659   PetscInt       adiag_i,i,*vi,nz,ai_i;
660   PetscScalar    *v,sum;
661 #endif
662 
663   PetscFunctionBegin;
664   if (!n) PetscFunctionReturn(0);
665 
666   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
667   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
668 
669 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
670   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
671 #else
672   /* forward solve the lower triangular */
673   x[0] = b[0];
674   for (i=1; i<n; i++) {
675     ai_i = ai[i];
676     v    = aa + ai_i;
677     vi   = aj + ai_i;
678     nz   = adiag[i] - ai_i;
679     sum  = b[i];
680     while (nz--) sum -= *v++ * x[*vi++];
681     x[i] = sum;
682   }
683 
684   /* backward solve the upper triangular */
685   for (i=n-1; i>=0; i--){
686     adiag_i = adiag[i];
687     v       = aa + adiag_i + 1;
688     vi      = aj + adiag_i + 1;
689     nz      = ai[i+1] - adiag_i - 1;
690     sum     = x[i];
691     while (nz--) sum -= *v++ * x[*vi++];
692     x[i]    = sum*aa[adiag_i];
693   }
694 #endif
695   PetscLogFlops(2*a->nz - A->n);
696   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
697   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
698   PetscFunctionReturn(0);
699 }
700 
701 #undef __FUNCT__
702 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
703 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
704 {
705   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
706   IS             iscol = a->col,isrow = a->row;
707   PetscErrorCode ierr;
708   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
709   PetscInt       nz,*rout,*cout;
710   PetscScalar    *x,*b,*tmp,*aa = a->a,sum,*v;
711 
712   PetscFunctionBegin;
713   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
714 
715   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
716   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
717   tmp  = a->solve_work;
718 
719   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
720   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
721 
722   /* forward solve the lower triangular */
723   tmp[0] = b[*r++];
724   for (i=1; i<n; i++) {
725     v   = aa + ai[i] ;
726     vi  = aj + ai[i] ;
727     nz  = a->diag[i] - ai[i];
728     sum = b[*r++];
729     while (nz--) sum -= *v++ * tmp[*vi++ ];
730     tmp[i] = sum;
731   }
732 
733   /* backward solve the upper triangular */
734   for (i=n-1; i>=0; i--){
735     v   = aa + a->diag[i] + 1;
736     vi  = aj + a->diag[i] + 1;
737     nz  = ai[i+1] - a->diag[i] - 1;
738     sum = tmp[i];
739     while (nz--) sum -= *v++ * tmp[*vi++ ];
740     tmp[i] = sum*aa[a->diag[i]];
741     x[*c--] += tmp[i];
742   }
743 
744   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
745   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
746   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
747   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
748   PetscLogFlops(2*a->nz);
749 
750   PetscFunctionReturn(0);
751 }
752 /* -------------------------------------------------------------------*/
753 #undef __FUNCT__
754 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
755 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
756 {
757   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
758   IS             iscol = a->col,isrow = a->row;
759   PetscErrorCode ierr;
760   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
761   PetscInt       nz,*rout,*cout,*diag = a->diag;
762   PetscScalar    *x,*b,*tmp,*aa = a->a,*v,s1;
763 
764   PetscFunctionBegin;
765   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
766   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
767   tmp  = a->solve_work;
768 
769   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
770   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
771 
772   /* copy the b into temp work space according to permutation */
773   for (i=0; i<n; i++) tmp[i] = b[c[i]];
774 
775   /* forward solve the U^T */
776   for (i=0; i<n; i++) {
777     v   = aa + diag[i] ;
778     vi  = aj + diag[i] + 1;
779     nz  = ai[i+1] - diag[i] - 1;
780     s1  = tmp[i];
781     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
782     while (nz--) {
783       tmp[*vi++ ] -= (*v++)*s1;
784     }
785     tmp[i] = s1;
786   }
787 
788   /* backward solve the L^T */
789   for (i=n-1; i>=0; i--){
790     v   = aa + diag[i] - 1 ;
791     vi  = aj + diag[i] - 1 ;
792     nz  = diag[i] - ai[i];
793     s1  = tmp[i];
794     while (nz--) {
795       tmp[*vi-- ] -= (*v--)*s1;
796     }
797   }
798 
799   /* copy tmp into x according to permutation */
800   for (i=0; i<n; i++) x[r[i]] = tmp[i];
801 
802   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
803   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
804   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
805   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
806 
807   PetscLogFlops(2*a->nz-A->n);
808   PetscFunctionReturn(0);
809 }
810 
811 #undef __FUNCT__
812 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
813 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
814 {
815   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
816   IS             iscol = a->col,isrow = a->row;
817   PetscErrorCode ierr;
818   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
819   PetscInt       nz,*rout,*cout,*diag = a->diag;
820   PetscScalar    *x,*b,*tmp,*aa = a->a,*v;
821 
822   PetscFunctionBegin;
823   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
824 
825   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
826   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
827   tmp = a->solve_work;
828 
829   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
830   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
831 
832   /* copy the b into temp work space according to permutation */
833   for (i=0; i<n; i++) tmp[i] = b[c[i]];
834 
835   /* forward solve the U^T */
836   for (i=0; i<n; i++) {
837     v   = aa + diag[i] ;
838     vi  = aj + diag[i] + 1;
839     nz  = ai[i+1] - diag[i] - 1;
840     tmp[i] *= *v++;
841     while (nz--) {
842       tmp[*vi++ ] -= (*v++)*tmp[i];
843     }
844   }
845 
846   /* backward solve the L^T */
847   for (i=n-1; i>=0; i--){
848     v   = aa + diag[i] - 1 ;
849     vi  = aj + diag[i] - 1 ;
850     nz  = diag[i] - ai[i];
851     while (nz--) {
852       tmp[*vi-- ] -= (*v--)*tmp[i];
853     }
854   }
855 
856   /* copy tmp into x according to permutation */
857   for (i=0; i<n; i++) x[r[i]] += tmp[i];
858 
859   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
860   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
861   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
862   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
863 
864   PetscLogFlops(2*a->nz);
865   PetscFunctionReturn(0);
866 }
867 /* ----------------------------------------------------------------*/
868 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat);
869 
870 #undef __FUNCT__
871 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
872 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
873 {
874   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
875   IS             isicol;
876   PetscErrorCode ierr;
877   PetscInt       *r,*ic,prow,n = A->m,*ai = a->i,*aj = a->j;
878   PetscInt       *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev;
879   PetscInt       *dloc,idx,row,m,fm,nzf,nzi,len, reallocs = 0,dcount = 0;
880   PetscInt       incrlev,nnz,i,levels,diagonal_fill;
881   PetscTruth     col_identity,row_identity;
882   PetscReal      f;
883 
884   PetscFunctionBegin;
885   f             = info->fill;
886   levels        = (PetscInt)info->levels;
887   diagonal_fill = (PetscInt)info->diagonal_fill;
888   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
889 
890   /* special case that simply copies fill pattern */
891   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
892   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
893   if (!levels && row_identity && col_identity) {
894     ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
895     (*fact)->factor = FACTOR_LU;
896     b               = (Mat_SeqAIJ*)(*fact)->data;
897     if (!b->diag) {
898       ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
899     }
900     ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
901     b->row              = isrow;
902     b->col              = iscol;
903     b->icol             = isicol;
904     ierr                = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
905     (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
906     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
907     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
908     PetscFunctionReturn(0);
909   }
910 
911   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
912   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
913 
914   /* get new row pointers */
915   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr);
916   ainew[0] = 0;
917   /* don't know how many column pointers are needed so estimate */
918   jmax = (PetscInt)(f*(ai[n]+1));
919   ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr);
920   /* ajfill is level of fill for each fill entry */
921   ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajfill);CHKERRQ(ierr);
922   /* fill is a linked list of nonzeros in active row */
923   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr);
924   /* im is level for each filled value */
925   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&im);CHKERRQ(ierr);
926   /* dloc is location of diagonal in factor */
927   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&dloc);CHKERRQ(ierr);
928   dloc[0]  = 0;
929   for (prow=0; prow<n; prow++) {
930 
931     /* copy current row into linked list */
932     nzf     = nz  = ai[r[prow]+1] - ai[r[prow]];
933     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
934     xi      = aj + ai[r[prow]];
935     fill[n] = n;
936     fill[prow] = -1; /* marker to indicate if diagonal exists */
937     while (nz--) {
938       fm  = n;
939       idx = ic[*xi++ ];
940       do {
941         m  = fm;
942         fm = fill[m];
943       } while (fm < idx);
944       fill[m]   = idx;
945       fill[idx] = fm;
946       im[idx]   = 0;
947     }
948 
949     /* make sure diagonal entry is included */
950     if (diagonal_fill && fill[prow] == -1) {
951       fm = n;
952       while (fill[fm] < prow) fm = fill[fm];
953       fill[prow] = fill[fm]; /* insert diagonal into linked list */
954       fill[fm]   = prow;
955       im[prow]   = 0;
956       nzf++;
957       dcount++;
958     }
959 
960     nzi = 0;
961     row = fill[n];
962     while (row < prow) {
963       incrlev = im[row] + 1;
964       nz      = dloc[row];
965       xi      = ajnew  + ainew[row]  + nz + 1;
966       flev    = ajfill + ainew[row]  + nz + 1;
967       nnz     = ainew[row+1] - ainew[row] - nz - 1;
968       fm      = row;
969       while (nnz-- > 0) {
970         idx = *xi++ ;
971         if (*flev + incrlev > levels) {
972           flev++;
973           continue;
974         }
975         do {
976           m  = fm;
977           fm = fill[m];
978         } while (fm < idx);
979         if (fm != idx) {
980           im[idx]   = *flev + incrlev;
981           fill[m]   = idx;
982           fill[idx] = fm;
983           fm        = idx;
984           nzf++;
985         } else {
986           if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev;
987         }
988         flev++;
989       }
990       row = fill[row];
991       nzi++;
992     }
993     /* copy new filled row into permanent storage */
994     ainew[prow+1] = ainew[prow] + nzf;
995     if (ainew[prow+1] > jmax) {
996 
997       /* estimate how much additional space we will need */
998       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
999       /* just double the memory each time */
1000       /*  maxadd = (PetscInt)((f*(ai[n]+!shift)*(n-prow+5))/n); */
1001       PetscInt maxadd = jmax;
1002       if (maxadd < nzf) maxadd = (n-prow)*(nzf+1);
1003       jmax += maxadd;
1004 
1005       /* allocate a longer ajnew and ajfill */
1006       ierr   = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr);
1007       ierr   = PetscMemcpy(xi,ajnew,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr);
1008       ierr   = PetscFree(ajnew);CHKERRQ(ierr);
1009       ajnew  = xi;
1010       ierr   = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr);
1011       ierr   = PetscMemcpy(xi,ajfill,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr);
1012       ierr   = PetscFree(ajfill);CHKERRQ(ierr);
1013       ajfill = xi;
1014       reallocs++; /* count how many times we realloc */
1015     }
1016     xi          = ajnew + ainew[prow] ;
1017     flev        = ajfill + ainew[prow] ;
1018     dloc[prow]  = nzi;
1019     fm          = fill[n];
1020     while (nzf--) {
1021       *xi++   = fm ;
1022       *flev++ = im[fm];
1023       fm      = fill[fm];
1024     }
1025     /* make sure row has diagonal entry */
1026     if (ajnew[ainew[prow]+dloc[prow]] != prow) {
1027       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
1028     try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow);
1029     }
1030   }
1031   ierr = PetscFree(ajfill);CHKERRQ(ierr);
1032   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1033   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1034   ierr = PetscFree(fill);CHKERRQ(ierr);
1035   ierr = PetscFree(im);CHKERRQ(ierr);
1036 
1037   {
1038     PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
1039     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
1040     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af);
1041     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af);
1042     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n");
1043     if (diagonal_fill) {
1044       PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount);
1045     }
1046   }
1047 
1048   /* put together the new matrix */
1049   ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr);
1050   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
1051   ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr);
1052   PetscLogObjectParent(*fact,isicol);
1053   b = (Mat_SeqAIJ*)(*fact)->data;
1054   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1055   b->singlemalloc = PETSC_FALSE;
1056   len = (ainew[n] )*sizeof(PetscScalar);
1057   /* the next line frees the default space generated by the Create() */
1058   ierr = PetscFree(b->a);CHKERRQ(ierr);
1059   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1060   ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr);
1061   b->j          = ajnew;
1062   b->i          = ainew;
1063   for (i=0; i<n; i++) dloc[i] += ainew[i];
1064   b->diag       = dloc;
1065   b->ilen       = 0;
1066   b->imax       = 0;
1067   b->row        = isrow;
1068   b->col        = iscol;
1069   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1070   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1071   b->icol       = isicol;
1072   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1073   /* In b structure:  Free imax, ilen, old a, old j.
1074      Allocate dloc, solve_work, new a, new j */
1075   PetscLogObjectMemory(*fact,(ainew[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));
1076   b->maxnz             = b->nz = ainew[n] ;
1077   (*fact)->factor = FACTOR_LU;
1078   ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr);
1079   (*fact)->ops->lufactornumeric =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
1080   (*fact)->info.factor_mallocs    = reallocs;
1081   (*fact)->info.fill_ratio_given  = f;
1082   (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]);
1083   PetscFunctionReturn(0);
1084 }
1085 
1086 #include "src/mat/impls/sbaij/seq/sbaij.h"
1087 #undef __FUNCT__
1088 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
1089 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
1090 {
1091   Mat            C = *B;
1092   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1093   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
1094   IS             ip=b->row;
1095   PetscErrorCode ierr;
1096   PetscInt       *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol;
1097   PetscInt       *ai=a->i,*aj=a->j;
1098   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1099   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1100   PetscReal      zeropivot,shift_amount,rs,shift_nonzero;
1101   PetscTruth     chshift,shift_pd;
1102   PetscInt       nshift=0;
1103 
1104   PetscFunctionBegin;
1105   shift_nonzero  = info->shiftnz;
1106   shift_pd       = info->shiftpd;
1107   zeropivot      = info->zeropivot;
1108 
1109   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
1110 
1111   /* initialization */
1112   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1113   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
1114   jl   = il + mbs;
1115   rtmp = (MatScalar*)(jl + mbs);
1116 
1117   shift_amount = 0;
1118   do {
1119     chshift = PETSC_FALSE;
1120     for (i=0; i<mbs; i++) {
1121       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1122     }
1123 
1124     for (k = 0; k<mbs; k++){
1125       bval = ba + bi[k];
1126       /* initialize k-th row by the perm[k]-th row of A */
1127       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1128       for (j = jmin; j < jmax; j++){
1129         col = rip[aj[j]];
1130         if (col >= k){ /* only take upper triangular entry */
1131           rtmp[col] = aa[j];
1132           *bval++  = 0.0; /* for in-place factorization */
1133         }
1134       }
1135       /* shift the diagonal of the matrix */
1136       if (nshift) rtmp[k] += shift_amount;
1137 
1138       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1139       dk = rtmp[k];
1140       i = jl[k]; /* first row to be added to k_th row  */
1141 
1142       while (i < k){
1143         nexti = jl[i]; /* next row to be added to k_th row */
1144 
1145         /* compute multiplier, update diag(k) and U(i,k) */
1146         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1147         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
1148         dk += uikdi*ba[ili];
1149         ba[ili] = uikdi; /* -U(i,k) */
1150 
1151         /* add multiple of row i to k-th row */
1152         jmin = ili + 1; jmax = bi[i+1];
1153         if (jmin < jmax){
1154           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1155           /* update il and jl for row i */
1156           il[i] = jmin;
1157           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1158         }
1159         i = nexti;
1160       }
1161 
1162       /* shift the diagonals when zero pivot is detected */
1163       /* compute rs=sum of abs(off-diagonal) */
1164       rs   = 0.0;
1165       jmin = bi[k]+1;
1166       nz   = bi[k+1] - jmin;
1167       if (nz){
1168         bcol = bj + jmin;
1169         while (nz--){
1170           rs += PetscAbsScalar(rtmp[*bcol++]);
1171         }
1172       }
1173       if (PetscAbsScalar(dk) <= zeropivot*rs && shift_nonzero){
1174         /* force |diag(*B)| > zeropivot*rs */
1175         if (!nshift){
1176           shift_amount = shift_nonzero;
1177         } else {
1178           shift_amount *= 2.0;
1179         }
1180         chshift = PETSC_TRUE;
1181         nshift++;
1182         break;
1183       } else if (PetscRealPart(dk) <= zeropivot*rs && shift_pd){
1184         /* calculate a shift that would make this row diagonally dominant */
1185 	shift_amount = PetscMax(rs+PetscAbs(PetscRealPart(dk)),1.1*shift_amount);
1186 	chshift      = PETSC_TRUE;
1187 	/* Unlike in the ILU case there is no exit condition on nshift:
1188 	   we increase the shift until it converges. There is no guarantee that
1189 	   this algorithm converges faster or slower, or is better or worse
1190 	   than the ILU algorithm. */
1191 	nshift++;
1192 	break;
1193       } else if (PetscAbsScalar(dk) <= zeropivot*rs){
1194         SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs);
1195       }
1196 
1197       /* copy data into U(k,:) */
1198       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1199       jmin = bi[k]+1; jmax = bi[k+1];
1200       if (jmin < jmax) {
1201         for (j=jmin; j<jmax; j++){
1202           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1203         }
1204         /* add the k-th row into il and jl */
1205         il[k] = jmin;
1206         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1207       }
1208     }
1209   } while (chshift);
1210   ierr = PetscFree(il);CHKERRQ(ierr);
1211 
1212   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
1213   C->factor       = FACTOR_CHOLESKY;
1214   C->assembled    = PETSC_TRUE;
1215   C->preallocated = PETSC_TRUE;
1216   PetscLogFlops(C->m);
1217   if (nshift){
1218     if (shift_nonzero) {
1219       PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shift_nonzero tries %D, shift_amount %g\n",nshift,shift_amount);
1220     } else if (shift_pd) {
1221       PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shift_pd tries %D, shift_amount %g\n",nshift,shift_amount);
1222     }
1223   }
1224   PetscFunctionReturn(0);
1225 }
1226 
1227 #include "petscbt.h"
1228 #include "src/mat/utils/freespace.h"
1229 #undef __FUNCT__
1230 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
1231 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1232 {
1233   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1234   Mat_SeqSBAIJ   *b;
1235   Mat            B;
1236   PetscErrorCode ierr;
1237   PetscTruth     perm_identity;
1238   PetscInt       reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui;
1239   PetscInt       jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1240   PetscInt       nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1241   PetscInt       ncols,ncols_upper,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr;
1242   PetscReal      fill=info->fill,levels=info->levels;
1243   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1244   FreeSpaceList  free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1245   PetscBT        lnkbt;
1246 
1247   PetscFunctionBegin;
1248   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1249   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1250 
1251   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1252   ui[0] = 0;
1253 
1254   /* special case that simply copies fill pattern */
1255   if (!levels && perm_identity) {
1256     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
1257     for (i=0; i<am; i++) {
1258       ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1259     }
1260     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1261     cols = uj;
1262     for (i=0; i<am; i++) {
1263       aj    = a->j + a->diag[i];
1264       ncols = ui[i+1] - ui[i];
1265       for (j=0; j<ncols; j++) *cols++ = *aj++;
1266     }
1267   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1268     /* initialization */
1269     ierr  = PetscMalloc((2*am+1)*sizeof(PetscInt),&cols_lvl);CHKERRQ(ierr);
1270 
1271     /* jl: linked list for storing indices of the pivot rows
1272        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1273     ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr);
1274     il         = jl + am;
1275     uj_ptr     = (PetscInt**)(il + am);
1276     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1277     for (i=0; i<am; i++){
1278       jl[i] = am; il[i] = 0;
1279     }
1280 
1281     /* create and initialize a linked list for storing column indices of the active row k */
1282     nlnk = am + 1;
1283     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1284 
1285     /* initial FreeSpace size is fill*(ai[am]+1) */
1286     ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1287     current_space = free_space;
1288     ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
1289     current_space_lvl = free_space_lvl;
1290 
1291     for (k=0; k<am; k++){  /* for each active row k */
1292       /* initialize lnk by the column indices of row rip[k] of A */
1293       nzk   = 0;
1294       ncols = ai[rip[k]+1] - ai[rip[k]];
1295       ncols_upper = 0;
1296       cols        = cols_lvl + am;
1297       for (j=0; j<ncols; j++){
1298         i = rip[*(aj + ai[rip[k]] + j)];
1299         if (i >= k){ /* only take upper triangular entry */
1300           cols[ncols_upper] = i;
1301           cols_lvl[ncols_upper] = -1;  /* initialize level for nonzero entries */
1302           ncols_upper++;
1303         }
1304       }
1305       ierr = PetscIncompleteLLAdd(ncols_upper,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1306       nzk += nlnk;
1307 
1308       /* update lnk by computing fill-in for each pivot row to be merged in */
1309       prow = jl[k]; /* 1st pivot row */
1310 
1311       while (prow < k){
1312         nextprow = jl[prow];
1313 
1314         /* merge prow into k-th row */
1315         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1316         jmax = ui[prow+1];
1317         ncols = jmax-jmin;
1318         i     = jmin - ui[prow];
1319         cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1320         for (j=0; j<ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + i + j);
1321         ierr = PetscIncompleteLLAdd(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1322         nzk += nlnk;
1323 
1324         /* update il and jl for prow */
1325         if (jmin < jmax){
1326           il[prow] = jmin;
1327           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1328         }
1329         prow = nextprow;
1330       }
1331 
1332       /* if free space is not available, make more free space */
1333       if (current_space->local_remaining<nzk) {
1334         i = am - k + 1; /* num of unfactored rows */
1335         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1336         ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
1337         ierr = GetMoreSpace(i,&current_space_lvl);CHKERRQ(ierr);
1338         reallocs++;
1339       }
1340 
1341       /* copy data into free_space and free_space_lvl, then initialize lnk */
1342       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1343 
1344       /* add the k-th row into il and jl */
1345       if (nzk > 1){
1346         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1347         jl[k] = jl[i]; jl[i] = k;
1348         il[k] = ui[k] + 1;
1349       }
1350       uj_ptr[k]     = current_space->array;
1351       uj_lvl_ptr[k] = current_space_lvl->array;
1352 
1353       current_space->array           += nzk;
1354       current_space->local_used      += nzk;
1355       current_space->local_remaining -= nzk;
1356 
1357       current_space_lvl->array           += nzk;
1358       current_space_lvl->local_used      += nzk;
1359       current_space_lvl->local_remaining -= nzk;
1360 
1361       ui[k+1] = ui[k] + nzk;
1362     }
1363 
1364     if (ai[am] != 0) {
1365       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1366       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af);
1367       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af);
1368       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af);
1369     } else {
1370       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n");
1371     }
1372 
1373     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1374     ierr = PetscFree(jl);CHKERRQ(ierr);
1375     ierr = PetscFree(cols_lvl);CHKERRQ(ierr);
1376 
1377     /* destroy list of free space and other temporary array(s) */
1378     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1379     ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1380     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1381     ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr);
1382 
1383   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
1384 
1385   /* put together the new matrix in MATSEQSBAIJ format */
1386   ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr);
1387   B = *fact;
1388   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1389   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
1390 
1391   b    = (Mat_SeqSBAIJ*)B->data;
1392   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1393   b->singlemalloc = PETSC_FALSE;
1394   /* the next line frees the default space generated by the Create() */
1395   ierr = PetscFree(b->a);CHKERRQ(ierr);
1396   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1397   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1398   b->j    = uj;
1399   b->i    = ui;
1400   b->diag = 0;
1401   b->ilen = 0;
1402   b->imax = 0;
1403   b->row  = perm;
1404   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1405   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1406   b->icol = perm;
1407   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1408   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1409   PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
1410   b->maxnz = b->nz = ui[am];
1411 
1412   B->factor                 = FACTOR_CHOLESKY;
1413   B->info.factor_mallocs    = reallocs;
1414   B->info.fill_ratio_given  = fill;
1415   if (ai[am] != 0) {
1416     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1417   } else {
1418     B->info.fill_ratio_needed = 0.0;
1419   }
1420   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1421   if (perm_identity){
1422     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1423     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1424   }
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 #undef __FUNCT__
1429 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
1430 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1431 {
1432   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1433   Mat_SeqSBAIJ   *b;
1434   Mat            B;
1435   PetscErrorCode ierr;
1436   PetscTruth     perm_identity;
1437   PetscReal      fill = info->fill;
1438   PetscInt       *rip,*riip,i,mbs=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow;
1439   PetscInt       *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1440   PetscInt       nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1441   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1442   PetscBT        lnkbt;
1443   IS             iperm;
1444 
1445   PetscFunctionBegin;
1446   /* check whether perm is the identity mapping */
1447   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1448   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1449 
1450   if (!perm_identity){
1451     /* check if perm is symmetric! */
1452     ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1453     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1454     for (i=0; i<mbs; i++) {
1455       if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation");
1456     }
1457     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1458     ierr = ISDestroy(iperm);CHKERRQ(ierr);
1459   }
1460 
1461   /* initialization */
1462   ierr  = PetscMalloc((mbs+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1463   ui[0] = 0;
1464 
1465   /* jl: linked list for storing indices of the pivot rows
1466      il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
1467   ierr = PetscMalloc((3*mbs+1)*sizeof(PetscInt)+mbs*sizeof(PetscInt*),&jl);CHKERRQ(ierr);
1468   il     = jl + mbs;
1469   cols   = il + mbs;
1470   ui_ptr = (PetscInt**)(cols + mbs);
1471   for (i=0; i<mbs; i++){
1472     jl[i] = mbs; il[i] = 0;
1473   }
1474 
1475   /* create and initialize a linked list for storing column indices of the active row k */
1476   nlnk = mbs + 1;
1477   ierr = PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1478 
1479   /* initial FreeSpace size is fill*(ai[mbs]+1) */
1480   ierr = GetMoreSpace((PetscInt)(fill*(ai[mbs]+1)),&free_space);CHKERRQ(ierr);
1481   current_space = free_space;
1482 
1483   for (k=0; k<mbs; k++){  /* for each active row k */
1484     /* initialize lnk by the column indices of row rip[k] of A */
1485     nzk   = 0;
1486     ncols = ai[rip[k]+1] - ai[rip[k]];
1487     ncols_upper = 0;
1488     for (j=0; j<ncols; j++){
1489       i = rip[*(aj + ai[rip[k]] + j)];
1490       if (i >= k){ /* only take upper triangular entry */
1491         cols[ncols_upper] = i;
1492         ncols_upper++;
1493       }
1494     }
1495     ierr = PetscLLAdd(ncols_upper,cols,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1496     nzk += nlnk;
1497 
1498     /* update lnk by computing fill-in for each pivot row to be merged in */
1499     prow = jl[k]; /* 1st pivot row */
1500 
1501     while (prow < k){
1502       nextprow = jl[prow];
1503       /* merge prow into k-th row */
1504       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
1505       jmax = ui[prow+1];
1506       ncols = jmax-jmin;
1507       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
1508       ierr = PetscLLAdd(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1509       nzk += nlnk;
1510 
1511       /* update il and jl for prow */
1512       if (jmin < jmax){
1513         il[prow] = jmin;
1514         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1515       }
1516       prow = nextprow;
1517     }
1518 
1519     /* if free space is not available, make more free space */
1520     if (current_space->local_remaining<nzk) {
1521       i = mbs - k + 1; /* num of unfactored rows */
1522       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1523       ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
1524       reallocs++;
1525     }
1526 
1527     /* copy data into free space, then initialize lnk */
1528     ierr = PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1529 
1530     /* add the k-th row into il and jl */
1531     if (nzk-1 > 0){
1532       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
1533       jl[k] = jl[i]; jl[i] = k;
1534       il[k] = ui[k] + 1;
1535     }
1536     ui_ptr[k] = current_space->array;
1537     current_space->array           += nzk;
1538     current_space->local_used      += nzk;
1539     current_space->local_remaining -= nzk;
1540 
1541     ui[k+1] = ui[k] + nzk;
1542   }
1543 
1544   if (ai[mbs] != 0) {
1545     PetscReal af = ((PetscReal)(2*ui[mbs]-mbs))/((PetscReal)ai[mbs]);
1546     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af);
1547     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af);
1548     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af);
1549   } else {
1550      PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n");
1551   }
1552 
1553   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1554   ierr = PetscFree(jl);CHKERRQ(ierr);
1555 
1556   /* destroy list of free space and other temporary array(s) */
1557   ierr = PetscMalloc((ui[mbs]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1558   ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1559   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1560 
1561   /* put together the new matrix in MATSEQSBAIJ format */
1562   ierr = MatCreate(PETSC_COMM_SELF,mbs,mbs,mbs,mbs,fact);CHKERRQ(ierr);
1563   B    = *fact;
1564   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1565   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
1566 
1567   b = (Mat_SeqSBAIJ*)B->data;
1568   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1569   b->singlemalloc = PETSC_FALSE;
1570   /* the next line frees the default space generated by the Create() */
1571   ierr = PetscFree(b->a);CHKERRQ(ierr);
1572   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1573   ierr = PetscMalloc((ui[mbs]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1574   b->j    = uj;
1575   b->i    = ui;
1576   b->diag = 0;
1577   b->ilen = 0;
1578   b->imax = 0;
1579   b->row  = perm;
1580   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1581   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1582   b->icol = perm;
1583   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1584   ierr    = PetscMalloc((mbs+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1585   PetscLogObjectMemory(B,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));
1586   b->maxnz = b->nz = ui[mbs];
1587 
1588   B->factor                 = FACTOR_CHOLESKY;
1589   B->info.factor_mallocs    = reallocs;
1590   B->info.fill_ratio_given  = fill;
1591   if (ai[mbs] != 0) {
1592     B->info.fill_ratio_needed = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]);
1593   } else {
1594     B->info.fill_ratio_needed = 0.0;
1595   }
1596   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1597   if (perm_identity){
1598     (*fact)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1599     (*fact)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1600   }
1601   PetscFunctionReturn(0);
1602 }
1603