xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision 1db153dce078f0d15aa3ce57bbaa5aa676e742c8)
1 
2 #include "src/mat/impls/aij/seq/aij.h"
3 #include "src/inline/dot.h"
4 #include "src/inline/spops.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ"
8 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
9 {
10   PetscFunctionBegin;
11 
12   SETERRQ(PETSC_ERR_SUP,"Code not written");
13 #if !defined(PETSC_USE_DEBUG)
14   PetscFunctionReturn(0);
15 #endif
16 }
17 
18 
19 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat);
20 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth);
21 
22 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
23 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
24 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*);
25 
26 #undef __FUNCT__
27 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
28   /* ------------------------------------------------------------
29 
30           This interface was contribed by Tony Caola
31 
32      This routine is an interface to the pivoting drop-tolerance
33      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
34      SPARSEKIT2.
35 
36      The SPARSEKIT2 routines used here are covered by the GNU
37      copyright; see the file gnu in this directory.
38 
39      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
40      help in getting this routine ironed out.
41 
42      The major drawback to this routine is that if info->fill is
43      not large enough it fails rather than allocating more space;
44      this can be fixed by hacking/improving the f2c version of
45      Yousef Saad's code.
46 
47      ------------------------------------------------------------
48 */
49 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact)
50 {
51 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
52   PetscFunctionBegin;
53   SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
54   You can obtain the drop tolerance routines by installing PETSc from\n\
55   www.mcs.anl.gov/petsc\n");
56 #else
57   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
58   IS             iscolf,isicol,isirow;
59   PetscTruth     reorder;
60   PetscErrorCode ierr,sierr;
61   PetscInt       *c,*r,*ic,i,n = A->m;
62   PetscInt       *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
63   PetscInt       *ordcol,*iwk,*iperm,*jw;
64   PetscInt       jmax,lfill,job,*o_i,*o_j;
65   PetscScalar    *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
66   PetscReal      af;
67 
68   PetscFunctionBegin;
69 
70   if (info->dt == PETSC_DEFAULT)      info->dt      = .005;
71   if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax);
72   if (info->dtcol == PETSC_DEFAULT)   info->dtcol   = .01;
73   if (info->fill == PETSC_DEFAULT)    info->fill    = ((double)(n*(info->dtcount+1)))/a->nz;
74   lfill   = (PetscInt)(info->dtcount/2.0);
75   jmax    = (PetscInt)(info->fill*a->nz);
76 
77 
78   /* ------------------------------------------------------------
79      If reorder=.TRUE., then the original matrix has to be
80      reordered to reflect the user selected ordering scheme, and
81      then de-reordered so it is in it's original format.
82      Because Saad's dperm() is NOT in place, we have to copy
83      the original matrix and allocate more storage. . .
84      ------------------------------------------------------------
85   */
86 
87   /* set reorder to true if either isrow or iscol is not identity */
88   ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr);
89   if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);}
90   reorder = PetscNot(reorder);
91 
92 
93   /* storage for ilu factor */
94   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr);
95   ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr);
96   ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr);
97   ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr);
98 
99   /* ------------------------------------------------------------
100      Make sure that everything is Fortran formatted (1-Based)
101      ------------------------------------------------------------
102   */
103   for (i=old_i[0];i<old_i[n];i++) {
104     old_j[i]++;
105   }
106   for(i=0;i<n+1;i++) {
107     old_i[i]++;
108   };
109 
110 
111   if (reorder) {
112     ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr);
113     ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
114     for(i=0;i<n;i++) {
115       r[i]  = r[i]+1;
116       c[i]  = c[i]+1;
117     }
118     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr);
119     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr);
120     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr);
121     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
122     for (i=0;i<n;i++) {
123       r[i]  = r[i]-1;
124       c[i]  = c[i]-1;
125     }
126     ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr);
127     ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
128     o_a = old_a2;
129     o_j = old_j2;
130     o_i = old_i2;
131   } else {
132     o_a = old_a;
133     o_j = old_j;
134     o_i = old_i;
135   }
136 
137   /* ------------------------------------------------------------
138      Call Saad's ilutp() routine to generate the factorization
139      ------------------------------------------------------------
140   */
141 
142   ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr);
143   ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr);
144   ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr);
145 
146   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
147   if (sierr) {
148     switch (sierr) {
149       case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
150       case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
151       case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
152       case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
153       case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax);
154       default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
155     }
156   }
157 
158   ierr = PetscFree(w);CHKERRQ(ierr);
159   ierr = PetscFree(jw);CHKERRQ(ierr);
160 
161   /* ------------------------------------------------------------
162      Saad's routine gives the result in Modified Sparse Row (msr)
163      Convert to Compressed Sparse Row format (csr)
164      ------------------------------------------------------------
165   */
166 
167   ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr);
168   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr);
169 
170   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
171 
172   ierr = PetscFree(iwk);CHKERRQ(ierr);
173   ierr = PetscFree(wk);CHKERRQ(ierr);
174 
175   if (reorder) {
176     ierr = PetscFree(old_a2);CHKERRQ(ierr);
177     ierr = PetscFree(old_j2);CHKERRQ(ierr);
178     ierr = PetscFree(old_i2);CHKERRQ(ierr);
179   } else {
180     /* fix permutation of old_j that the factorization introduced */
181     for (i=old_i[0]; i<old_i[n]; i++) {
182       old_j[i-1] = iperm[old_j[i-1]-1];
183     }
184   }
185 
186   /* get rid of the shift to indices starting at 1 */
187   for (i=0; i<n+1; i++) {
188     old_i[i]--;
189   }
190   for (i=old_i[0];i<old_i[n];i++) {
191     old_j[i]--;
192   }
193 
194   /* Make the factored matrix 0-based */
195   for (i=0; i<n+1; i++) {
196     new_i[i]--;
197   }
198   for (i=new_i[0];i<new_i[n];i++) {
199     new_j[i]--;
200   }
201 
202   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
203   /*-- permute the right-hand-side and solution vectors           --*/
204   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
205   ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr);
206   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
207   for(i=0; i<n; i++) {
208     ordcol[i] = ic[iperm[i]-1];
209   };
210   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
211   ierr = ISDestroy(isicol);CHKERRQ(ierr);
212 
213   ierr = PetscFree(iperm);CHKERRQ(ierr);
214 
215   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr);
216   ierr = PetscFree(ordcol);CHKERRQ(ierr);
217 
218   /*----- put together the new matrix -----*/
219 
220   ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr);
221   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
222   ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr);
223   (*fact)->factor    = FACTOR_LU;
224   (*fact)->assembled = PETSC_TRUE;
225 
226   b = (Mat_SeqAIJ*)(*fact)->data;
227   ierr = PetscFree(b->imax);CHKERRQ(ierr);
228   b->sorted        = PETSC_FALSE;
229   b->singlemalloc  = PETSC_FALSE;
230   /* the next line frees the default space generated by the MatCreate() */
231   ierr             = PetscFree(b->a);CHKERRQ(ierr);
232   ierr             = PetscFree(b->ilen);CHKERRQ(ierr);
233   b->a             = new_a;
234   b->j             = new_j;
235   b->i             = new_i;
236   b->ilen          = 0;
237   b->imax          = 0;
238   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
239   b->row           = isirow;
240   b->col           = iscolf;
241   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
242   b->maxnz = b->nz = new_i[n];
243   ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
244   (*fact)->info.factor_mallocs = 0;
245 
246   ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
247 
248   /* check out for identical nodes. If found, use inode functions */
249   ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr);
250 
251   af = ((double)b->nz)/((double)a->nz) + .001;
252   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af);
253   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af);
254   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af);
255   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n");
256 
257   PetscFunctionReturn(0);
258 #endif
259 }
260 
261 /*
262     Factorization code for AIJ format.
263 */
264 #undef __FUNCT__
265 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
266 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
267 {
268   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
269   IS             isicol;
270   PetscErrorCode ierr;
271   PetscInt       *r,*ic,i,n = A->m,*ai = a->i,*aj = a->j;
272   PetscInt       *ainew,*ajnew,jmax,*fill,*ajtmp,nz;
273   PetscInt       *idnew,idx,row,m,fm,nnz,nzi,reallocs = 0,nzbd,*im;
274   PetscReal      f;
275 
276   PetscFunctionBegin;
277   if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
278   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
279   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
280   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
281 
282   /* get new row pointers */
283   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr);
284   ainew[0] = 0;
285   /* don't know how many column pointers are needed so estimate */
286   f = info->fill;
287   jmax  = (PetscInt)(f*ai[n]+1);
288   ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr);
289   /* fill is a linked list of nonzeros in active row */
290   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr);
291   im   = fill + n + 1;
292   /* idnew is location of diagonal in factor */
293   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&idnew);CHKERRQ(ierr);
294   idnew[0] = 0;
295 
296   for (i=0; i<n; i++) {
297     /* first copy previous fill into linked list */
298     nnz     = nz    = ai[r[i]+1] - ai[r[i]];
299     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
300     ajtmp   = aj + ai[r[i]];
301     fill[n] = n;
302     while (nz--) {
303       fm  = n;
304       idx = ic[*ajtmp++];
305       do {
306         m  = fm;
307         fm = fill[m];
308       } while (fm < idx);
309       fill[m]   = idx;
310       fill[idx] = fm;
311     }
312     row = fill[n];
313     while (row < i) {
314       ajtmp = ajnew + idnew[row] + 1;
315       nzbd  = 1 + idnew[row] - ainew[row];
316       nz    = im[row] - nzbd;
317       fm    = row;
318       while (nz-- > 0) {
319         idx = *ajtmp++ ;
320         nzbd++;
321         if (idx == i) im[row] = nzbd;
322         do {
323           m  = fm;
324           fm = fill[m];
325         } while (fm < idx);
326         if (fm != idx) {
327           fill[m]   = idx;
328           fill[idx] = fm;
329           fm        = idx;
330           nnz++;
331         }
332       }
333       row = fill[row];
334     }
335     /* copy new filled row into permanent storage */
336     ainew[i+1] = ainew[i] + nnz;
337     if (ainew[i+1] > jmax) {
338       /* estimate how much additional space we will need */
339       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
340       /* just double the memory each time */
341       PetscInt maxadd = jmax;
342       /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */
343       if (maxadd < nnz) maxadd = (n-i)*(nnz+1);
344       jmax += maxadd;
345 
346       /* allocate a longer ajnew */
347       ierr  = PetscMalloc(jmax*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
348       ierr  = PetscMemcpy(ajtmp,ajnew,(ainew[i])*sizeof(PetscInt));CHKERRQ(ierr);
349       ierr  = PetscFree(ajnew);CHKERRQ(ierr);
350       ajnew = ajtmp;
351       reallocs++; /* count how many times we realloc */
352     }
353     ajtmp = ajnew + ainew[i];
354     fm    = fill[n];
355     nzi   = 0;
356     im[i] = nnz;
357     while (nnz--) {
358       if (fm < i) nzi++;
359       *ajtmp++ = fm ;
360       fm       = fill[fm];
361     }
362     idnew[i] = ainew[i] + nzi;
363   }
364   if (ai[n] != 0) {
365     PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
366     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
367     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af);
368     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af);
369     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n");
370   } else {
371     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n");
372   }
373 
374   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
375   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
376 
377   ierr = PetscFree(fill);CHKERRQ(ierr);
378 
379   /* put together the new matrix */
380   ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr);
381   ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr);
382   ierr = MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);CHKERRQ(ierr);
383   PetscLogObjectParent(*B,isicol);
384   b    = (Mat_SeqAIJ*)(*B)->data;
385   ierr = PetscFree(b->imax);CHKERRQ(ierr);
386   b->singlemalloc = PETSC_FALSE;
387   /* the next line frees the default space generated by the Create() */
388   ierr = PetscFree(b->a);CHKERRQ(ierr);
389   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
390   ierr          = PetscMalloc((ainew[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
391   b->j          = ajnew;
392   b->i          = ainew;
393   b->diag       = idnew;
394   b->ilen       = 0;
395   b->imax       = 0;
396   b->row        = isrow;
397   b->col        = iscol;
398   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
399   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
400   b->icol       = isicol;
401   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
402   /* In b structure:  Free imax, ilen, old a, old j.
403      Allocate idnew, solve_work, new a, new j */
404   PetscLogObjectMemory(*B,(ainew[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));
405   b->maxnz = b->nz = ainew[n] ;
406 
407   (*B)->factor                 =  FACTOR_LU;
408   (*B)->info.factor_mallocs    = reallocs;
409   (*B)->info.fill_ratio_given  = f;
410   ierr = Mat_AIJ_CheckInode(*B,PETSC_FALSE);CHKERRQ(ierr);
411   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
412 
413   if (ai[n] != 0) {
414     (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
415   } else {
416     (*B)->info.fill_ratio_needed = 0.0;
417   }
418   PetscFunctionReturn(0);
419 }
420 /* ----------------------------------------------------------- */
421 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth);
422 
423 #undef __FUNCT__
424 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
425 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
426 {
427   Mat            C=*B;
428   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
429   IS             isrow = b->row,isicol = b->icol;
430   PetscErrorCode ierr;
431   PetscInt       *r,*ic,i,j,n=A->m,*bi=b->i,*bj=b->j;
432   PetscInt       *ajtmp,*bjtmp,nz,row,*ics;
433   PetscInt       *diag_offset = b->diag,diag,*pj;
434   PetscScalar    *rtmp,*v,*pc,multiplier,*pv,*rtmps;
435   PetscReal      zeropivot,rs,d,shift_nz;
436   PetscReal      row_shift,shift_top=0.;
437   PetscTruth     shift_pd;
438   LUShift_Ctx    sctx;
439   PetscInt       newshift;
440 
441   PetscFunctionBegin;
442   shift_nz  = info->shiftnz;
443   shift_pd  = info->shiftpd;
444   zeropivot = info->zeropivot;
445 
446   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
447   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
448   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
449   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
450   rtmps = rtmp; ics = ic;
451 
452   if (!a->diag) {
453     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
454   }
455   /* if both shift schemes are chosen by user, only use shift_pd */
456   if (shift_pd && shift_nz) shift_nz = 0.0;
457   if (shift_pd) { /* set shift_top=max{row_shift} */
458     PetscInt *aai = a->i,*ddiag = a->diag;
459     shift_top = 0;
460     for (i=0; i<n; i++) {
461       d = PetscAbsScalar((a->a)[ddiag[i]]);
462       /* calculate amt of shift needed for this row */
463       if (d<=0) {
464         row_shift = 0;
465       } else {
466         row_shift = -2*d;
467       }
468       v  = a->a+aai[i];
469       nz = aai[i+1] - aai[i];
470       for (j=0; j<nz; j++)
471 	row_shift += PetscAbsScalar(v[j]);
472       if (row_shift>shift_top) shift_top = row_shift;
473     }
474   }
475 
476   sctx.shift_amount = 0;
477   sctx.shift_top    = shift_top;
478   sctx.nshift       = 0;
479   sctx.nshift_max   = 5;
480   sctx.shift_lo     = 0.;
481   sctx.shift_hi     = 1.;
482   do {
483     sctx.lushift = PETSC_FALSE;
484     for (i=0; i<n; i++){
485       nz    = bi[i+1] - bi[i];
486       bjtmp = bj + bi[i];
487       for  (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0;
488 
489       /* load in initial (unfactored row) */
490       nz    = a->i[r[i]+1] - a->i[r[i]];
491       ajtmp = a->j + a->i[r[i]];
492       v     = a->a + a->i[r[i]];
493       for (j=0; j<nz; j++) {
494         rtmp[ics[ajtmp[j]]] = v[j];
495       }
496       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
497 
498       row = *bjtmp++;
499       while  (row < i) {
500         pc = rtmp + row;
501         if (*pc != 0.0) {
502           pv         = b->a + diag_offset[row];
503           pj         = b->j + diag_offset[row] + 1;
504           multiplier = *pc / *pv++;
505           *pc        = multiplier;
506           nz         = bi[row+1] - diag_offset[row] - 1;
507           for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
508           PetscLogFlops(2*nz);
509         }
510         row = *bjtmp++;
511       }
512       /* finished row so stick it into b->a */
513       pv   = b->a + bi[i] ;
514       pj   = b->j + bi[i] ;
515       nz   = bi[i+1] - bi[i];
516       diag = diag_offset[i] - bi[i];
517       rs   = 0.0;
518       for (j=0; j<nz; j++) {
519         pv[j] = rtmps[pj[j]];
520         if (j != diag) rs += PetscAbsScalar(pv[j]);
521       }
522 
523       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
524       sctx.rs  = rs;
525       sctx.pv  = pv[diag];
526       ierr = Mat_LUCheckShift(info,&sctx,&newshift);CHKERRQ(ierr);
527       if (newshift == 1){
528         break;    /* sctx.shift_amount is updated */
529       } else if (newshift == -1){
530         SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs);
531       }
532     }
533 
534     if (shift_pd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
535       /*
536        * if no shift in this attempt & shifting & started shifting & can refine,
537        * then try lower shift
538        */
539       sctx.shift_hi       = info->shift_fraction;
540       info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
541       sctx.shift_amount   = info->shift_fraction * sctx.shift_top;
542       sctx.lushift        = PETSC_TRUE;
543       sctx.nshift++;
544     }
545   } while (sctx.lushift);
546 
547   /* invert diagonal entries for simplier triangular solves */
548   for (i=0; i<n; i++) {
549     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
550   }
551 
552   ierr = PetscFree(rtmp);CHKERRQ(ierr);
553   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
554   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
555   C->factor = FACTOR_LU;
556   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
557   C->assembled = PETSC_TRUE;
558   PetscLogFlops(C->n);
559   if (sctx.nshift){
560     if (shift_nz) {
561       PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount);
562     } else if (shift_pd) {
563       b->lu_shift_fraction = info->shift_fraction;
564       PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: diagonal shifted up by %e fraction top_value %e number shifts %D\n",info->shift_fraction,shift_top,sctx.nshift);
565     }
566   }
567   PetscFunctionReturn(0);
568 }
569 
570 #undef __FUNCT__
571 #define __FUNCT__ "MatUsePETSc_SeqAIJ"
572 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A)
573 {
574   PetscFunctionBegin;
575   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
576   A->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
577   PetscFunctionReturn(0);
578 }
579 
580 
581 /* ----------------------------------------------------------- */
582 #undef __FUNCT__
583 #define __FUNCT__ "MatLUFactor_SeqAIJ"
584 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
585 {
586   PetscErrorCode ierr;
587   Mat            C;
588 
589   PetscFunctionBegin;
590   ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr);
591   ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr);
592   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
593   PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);
594   PetscFunctionReturn(0);
595 }
596 /* ----------------------------------------------------------- */
597 #undef __FUNCT__
598 #define __FUNCT__ "MatSolve_SeqAIJ"
599 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
600 {
601   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
602   IS             iscol = a->col,isrow = a->row;
603   PetscErrorCode ierr;
604   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
605   PetscInt       nz,*rout,*cout;
606   PetscScalar    *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
607 
608   PetscFunctionBegin;
609   if (!n) PetscFunctionReturn(0);
610 
611   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
612   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
613   tmp  = a->solve_work;
614 
615   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
616   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
617 
618   /* forward solve the lower triangular */
619   tmp[0] = b[*r++];
620   tmps   = tmp;
621   for (i=1; i<n; i++) {
622     v   = aa + ai[i] ;
623     vi  = aj + ai[i] ;
624     nz  = a->diag[i] - ai[i];
625     sum = b[*r++];
626     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
627     tmp[i] = sum;
628   }
629 
630   /* backward solve the upper triangular */
631   for (i=n-1; i>=0; i--){
632     v   = aa + a->diag[i] + 1;
633     vi  = aj + a->diag[i] + 1;
634     nz  = ai[i+1] - a->diag[i] - 1;
635     sum = tmp[i];
636     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
637     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
638   }
639 
640   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
641   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
642   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
643   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
644   PetscLogFlops(2*a->nz - A->n);
645   PetscFunctionReturn(0);
646 }
647 
648 /* ----------------------------------------------------------- */
649 #undef __FUNCT__
650 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
651 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
652 {
653   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
654   PetscErrorCode ierr;
655   PetscInt       n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag;
656   PetscScalar    *x,*b,*aa = a->a;
657 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
658   PetscInt       adiag_i,i,*vi,nz,ai_i;
659   PetscScalar    *v,sum;
660 #endif
661 
662   PetscFunctionBegin;
663   if (!n) PetscFunctionReturn(0);
664 
665   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
666   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
667 
668 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
669   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
670 #else
671   /* forward solve the lower triangular */
672   x[0] = b[0];
673   for (i=1; i<n; i++) {
674     ai_i = ai[i];
675     v    = aa + ai_i;
676     vi   = aj + ai_i;
677     nz   = adiag[i] - ai_i;
678     sum  = b[i];
679     while (nz--) sum -= *v++ * x[*vi++];
680     x[i] = sum;
681   }
682 
683   /* backward solve the upper triangular */
684   for (i=n-1; i>=0; i--){
685     adiag_i = adiag[i];
686     v       = aa + adiag_i + 1;
687     vi      = aj + adiag_i + 1;
688     nz      = ai[i+1] - adiag_i - 1;
689     sum     = x[i];
690     while (nz--) sum -= *v++ * x[*vi++];
691     x[i]    = sum*aa[adiag_i];
692   }
693 #endif
694   PetscLogFlops(2*a->nz - A->n);
695   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
696   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
697   PetscFunctionReturn(0);
698 }
699 
700 #undef __FUNCT__
701 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
702 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
703 {
704   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
705   IS             iscol = a->col,isrow = a->row;
706   PetscErrorCode ierr;
707   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
708   PetscInt       nz,*rout,*cout;
709   PetscScalar    *x,*b,*tmp,*aa = a->a,sum,*v;
710 
711   PetscFunctionBegin;
712   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
713 
714   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
715   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
716   tmp  = a->solve_work;
717 
718   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
719   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
720 
721   /* forward solve the lower triangular */
722   tmp[0] = b[*r++];
723   for (i=1; i<n; i++) {
724     v   = aa + ai[i] ;
725     vi  = aj + ai[i] ;
726     nz  = a->diag[i] - ai[i];
727     sum = b[*r++];
728     while (nz--) sum -= *v++ * tmp[*vi++ ];
729     tmp[i] = sum;
730   }
731 
732   /* backward solve the upper triangular */
733   for (i=n-1; i>=0; i--){
734     v   = aa + a->diag[i] + 1;
735     vi  = aj + a->diag[i] + 1;
736     nz  = ai[i+1] - a->diag[i] - 1;
737     sum = tmp[i];
738     while (nz--) sum -= *v++ * tmp[*vi++ ];
739     tmp[i] = sum*aa[a->diag[i]];
740     x[*c--] += tmp[i];
741   }
742 
743   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
744   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
745   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
746   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
747   PetscLogFlops(2*a->nz);
748 
749   PetscFunctionReturn(0);
750 }
751 /* -------------------------------------------------------------------*/
752 #undef __FUNCT__
753 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
754 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
755 {
756   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
757   IS             iscol = a->col,isrow = a->row;
758   PetscErrorCode ierr;
759   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
760   PetscInt       nz,*rout,*cout,*diag = a->diag;
761   PetscScalar    *x,*b,*tmp,*aa = a->a,*v,s1;
762 
763   PetscFunctionBegin;
764   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
765   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
766   tmp  = a->solve_work;
767 
768   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
769   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
770 
771   /* copy the b into temp work space according to permutation */
772   for (i=0; i<n; i++) tmp[i] = b[c[i]];
773 
774   /* forward solve the U^T */
775   for (i=0; i<n; i++) {
776     v   = aa + diag[i] ;
777     vi  = aj + diag[i] + 1;
778     nz  = ai[i+1] - diag[i] - 1;
779     s1  = tmp[i];
780     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
781     while (nz--) {
782       tmp[*vi++ ] -= (*v++)*s1;
783     }
784     tmp[i] = s1;
785   }
786 
787   /* backward solve the L^T */
788   for (i=n-1; i>=0; i--){
789     v   = aa + diag[i] - 1 ;
790     vi  = aj + diag[i] - 1 ;
791     nz  = diag[i] - ai[i];
792     s1  = tmp[i];
793     while (nz--) {
794       tmp[*vi-- ] -= (*v--)*s1;
795     }
796   }
797 
798   /* copy tmp into x according to permutation */
799   for (i=0; i<n; i++) x[r[i]] = tmp[i];
800 
801   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
802   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
803   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
804   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
805 
806   PetscLogFlops(2*a->nz-A->n);
807   PetscFunctionReturn(0);
808 }
809 
810 #undef __FUNCT__
811 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
812 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
813 {
814   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
815   IS             iscol = a->col,isrow = a->row;
816   PetscErrorCode ierr;
817   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
818   PetscInt       nz,*rout,*cout,*diag = a->diag;
819   PetscScalar    *x,*b,*tmp,*aa = a->a,*v;
820 
821   PetscFunctionBegin;
822   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
823 
824   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
825   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
826   tmp = a->solve_work;
827 
828   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
829   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
830 
831   /* copy the b into temp work space according to permutation */
832   for (i=0; i<n; i++) tmp[i] = b[c[i]];
833 
834   /* forward solve the U^T */
835   for (i=0; i<n; i++) {
836     v   = aa + diag[i] ;
837     vi  = aj + diag[i] + 1;
838     nz  = ai[i+1] - diag[i] - 1;
839     tmp[i] *= *v++;
840     while (nz--) {
841       tmp[*vi++ ] -= (*v++)*tmp[i];
842     }
843   }
844 
845   /* backward solve the L^T */
846   for (i=n-1; i>=0; i--){
847     v   = aa + diag[i] - 1 ;
848     vi  = aj + diag[i] - 1 ;
849     nz  = diag[i] - ai[i];
850     while (nz--) {
851       tmp[*vi-- ] -= (*v--)*tmp[i];
852     }
853   }
854 
855   /* copy tmp into x according to permutation */
856   for (i=0; i<n; i++) x[r[i]] += tmp[i];
857 
858   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
859   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
860   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
861   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
862 
863   PetscLogFlops(2*a->nz);
864   PetscFunctionReturn(0);
865 }
866 /* ----------------------------------------------------------------*/
867 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat);
868 
869 #undef __FUNCT__
870 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
871 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
872 {
873   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
874   IS             isicol;
875   PetscErrorCode ierr;
876   PetscInt       *r,*ic,prow,n = A->m,*ai = a->i,*aj = a->j;
877   PetscInt       *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev;
878   PetscInt       *dloc,idx,row,m,fm,nzf,nzi,len, reallocs = 0,dcount = 0;
879   PetscInt       incrlev,nnz,i,levels,diagonal_fill;
880   PetscTruth     col_identity,row_identity;
881   PetscReal      f;
882 
883   PetscFunctionBegin;
884   f             = info->fill;
885   levels        = (PetscInt)info->levels;
886   diagonal_fill = (PetscInt)info->diagonal_fill;
887   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
888 
889   /* special case that simply copies fill pattern */
890   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
891   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
892   if (!levels && row_identity && col_identity) {
893     ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
894     (*fact)->factor = FACTOR_LU;
895     b               = (Mat_SeqAIJ*)(*fact)->data;
896     if (!b->diag) {
897       ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
898     }
899     ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
900     b->row              = isrow;
901     b->col              = iscol;
902     b->icol             = isicol;
903     ierr                = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
904     (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
905     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
906     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
907     PetscFunctionReturn(0);
908   }
909 
910   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
911   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
912 
913   /* get new row pointers */
914   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr);
915   ainew[0] = 0;
916   /* don't know how many column pointers are needed so estimate */
917   jmax = (PetscInt)(f*(ai[n]+1));
918   ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr);
919   /* ajfill is level of fill for each fill entry */
920   ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajfill);CHKERRQ(ierr);
921   /* fill is a linked list of nonzeros in active row */
922   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr);
923   /* im is level for each filled value */
924   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&im);CHKERRQ(ierr);
925   /* dloc is location of diagonal in factor */
926   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&dloc);CHKERRQ(ierr);
927   dloc[0]  = 0;
928   for (prow=0; prow<n; prow++) {
929 
930     /* copy current row into linked list */
931     nzf     = nz  = ai[r[prow]+1] - ai[r[prow]];
932     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
933     xi      = aj + ai[r[prow]];
934     fill[n] = n;
935     fill[prow] = -1; /* marker to indicate if diagonal exists */
936     while (nz--) {
937       fm  = n;
938       idx = ic[*xi++ ];
939       do {
940         m  = fm;
941         fm = fill[m];
942       } while (fm < idx);
943       fill[m]   = idx;
944       fill[idx] = fm;
945       im[idx]   = 0;
946     }
947 
948     /* make sure diagonal entry is included */
949     if (diagonal_fill && fill[prow] == -1) {
950       fm = n;
951       while (fill[fm] < prow) fm = fill[fm];
952       fill[prow] = fill[fm]; /* insert diagonal into linked list */
953       fill[fm]   = prow;
954       im[prow]   = 0;
955       nzf++;
956       dcount++;
957     }
958 
959     nzi = 0;
960     row = fill[n];
961     while (row < prow) {
962       incrlev = im[row] + 1;
963       nz      = dloc[row];
964       xi      = ajnew  + ainew[row]  + nz + 1;
965       flev    = ajfill + ainew[row]  + nz + 1;
966       nnz     = ainew[row+1] - ainew[row] - nz - 1;
967       fm      = row;
968       while (nnz-- > 0) {
969         idx = *xi++ ;
970         if (*flev + incrlev > levels) {
971           flev++;
972           continue;
973         }
974         do {
975           m  = fm;
976           fm = fill[m];
977         } while (fm < idx);
978         if (fm != idx) {
979           im[idx]   = *flev + incrlev;
980           fill[m]   = idx;
981           fill[idx] = fm;
982           fm        = idx;
983           nzf++;
984         } else {
985           if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev;
986         }
987         flev++;
988       }
989       row = fill[row];
990       nzi++;
991     }
992     /* copy new filled row into permanent storage */
993     ainew[prow+1] = ainew[prow] + nzf;
994     if (ainew[prow+1] > jmax) {
995 
996       /* estimate how much additional space we will need */
997       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
998       /* just double the memory each time */
999       /*  maxadd = (PetscInt)((f*(ai[n]+!shift)*(n-prow+5))/n); */
1000       PetscInt maxadd = jmax;
1001       if (maxadd < nzf) maxadd = (n-prow)*(nzf+1);
1002       jmax += maxadd;
1003 
1004       /* allocate a longer ajnew and ajfill */
1005       ierr   = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr);
1006       ierr   = PetscMemcpy(xi,ajnew,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr);
1007       ierr   = PetscFree(ajnew);CHKERRQ(ierr);
1008       ajnew  = xi;
1009       ierr   = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr);
1010       ierr   = PetscMemcpy(xi,ajfill,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr);
1011       ierr   = PetscFree(ajfill);CHKERRQ(ierr);
1012       ajfill = xi;
1013       reallocs++; /* count how many times we realloc */
1014     }
1015     xi          = ajnew + ainew[prow] ;
1016     flev        = ajfill + ainew[prow] ;
1017     dloc[prow]  = nzi;
1018     fm          = fill[n];
1019     while (nzf--) {
1020       *xi++   = fm ;
1021       *flev++ = im[fm];
1022       fm      = fill[fm];
1023     }
1024     /* make sure row has diagonal entry */
1025     if (ajnew[ainew[prow]+dloc[prow]] != prow) {
1026       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
1027     try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow);
1028     }
1029   }
1030   ierr = PetscFree(ajfill);CHKERRQ(ierr);
1031   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1032   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1033   ierr = PetscFree(fill);CHKERRQ(ierr);
1034   ierr = PetscFree(im);CHKERRQ(ierr);
1035 
1036   {
1037     PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
1038     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
1039     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af);
1040     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af);
1041     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n");
1042     if (diagonal_fill) {
1043       PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount);
1044     }
1045   }
1046 
1047   /* put together the new matrix */
1048   ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr);
1049   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
1050   ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr);
1051   PetscLogObjectParent(*fact,isicol);
1052   b = (Mat_SeqAIJ*)(*fact)->data;
1053   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1054   b->singlemalloc = PETSC_FALSE;
1055   len = (ainew[n] )*sizeof(PetscScalar);
1056   /* the next line frees the default space generated by the Create() */
1057   ierr = PetscFree(b->a);CHKERRQ(ierr);
1058   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1059   ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr);
1060   b->j          = ajnew;
1061   b->i          = ainew;
1062   for (i=0; i<n; i++) dloc[i] += ainew[i];
1063   b->diag       = dloc;
1064   b->ilen       = 0;
1065   b->imax       = 0;
1066   b->row        = isrow;
1067   b->col        = iscol;
1068   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1069   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1070   b->icol       = isicol;
1071   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1072   /* In b structure:  Free imax, ilen, old a, old j.
1073      Allocate dloc, solve_work, new a, new j */
1074   PetscLogObjectMemory(*fact,(ainew[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));
1075   b->maxnz             = b->nz = ainew[n] ;
1076   (*fact)->factor = FACTOR_LU;
1077   ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr);
1078   (*fact)->ops->lufactornumeric =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
1079   (*fact)->info.factor_mallocs    = reallocs;
1080   (*fact)->info.fill_ratio_given  = f;
1081   (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]);
1082   PetscFunctionReturn(0);
1083 }
1084 
1085 #include "src/mat/impls/sbaij/seq/sbaij.h"
1086 #undef __FUNCT__
1087 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
1088 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
1089 {
1090   Mat            C = *B;
1091   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1092   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
1093   IS             ip=b->row;
1094   PetscErrorCode ierr;
1095   PetscInt       *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol;
1096   PetscInt       *ai=a->i,*aj=a->j;
1097   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1098   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1099   PetscReal      zeropivot,rs,shiftnz;
1100   PetscTruth     shiftpd;
1101   ChShift_Ctx    sctx;
1102   PetscInt       newshift;
1103 
1104   PetscFunctionBegin;
1105   shiftnz   = info->shiftnz;
1106   shiftpd   = info->shiftpd;
1107   zeropivot = info->zeropivot;
1108 
1109   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
1110 
1111   /* initialization */
1112   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1113   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
1114   jl   = il + mbs;
1115   rtmp = (MatScalar*)(jl + mbs);
1116 
1117   sctx.shift_amount = 0;
1118   sctx.nshift       = 0;
1119   do {
1120     sctx.chshift = PETSC_FALSE;
1121     for (i=0; i<mbs; i++) {
1122       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1123     }
1124 
1125     for (k = 0; k<mbs; k++){
1126       bval = ba + bi[k];
1127       /* initialize k-th row by the perm[k]-th row of A */
1128       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1129       for (j = jmin; j < jmax; j++){
1130         col = rip[aj[j]];
1131         if (col >= k){ /* only take upper triangular entry */
1132           rtmp[col] = aa[j];
1133           *bval++  = 0.0; /* for in-place factorization */
1134         }
1135       }
1136       /* shift the diagonal of the matrix */
1137       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1138 
1139       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1140       dk = rtmp[k];
1141       i = jl[k]; /* first row to be added to k_th row  */
1142 
1143       while (i < k){
1144         nexti = jl[i]; /* next row to be added to k_th row */
1145 
1146         /* compute multiplier, update diag(k) and U(i,k) */
1147         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1148         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
1149         dk += uikdi*ba[ili];
1150         ba[ili] = uikdi; /* -U(i,k) */
1151 
1152         /* add multiple of row i to k-th row */
1153         jmin = ili + 1; jmax = bi[i+1];
1154         if (jmin < jmax){
1155           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1156           /* update il and jl for row i */
1157           il[i] = jmin;
1158           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1159         }
1160         i = nexti;
1161       }
1162 
1163       /* shift the diagonals when zero pivot is detected */
1164       /* compute rs=sum of abs(off-diagonal) */
1165       rs   = 0.0;
1166       jmin = bi[k]+1;
1167       nz   = bi[k+1] - jmin;
1168       if (nz){
1169         bcol = bj + jmin;
1170         while (nz--){
1171           rs += PetscAbsScalar(rtmp[*bcol]);
1172           bcol++;
1173         }
1174       }
1175 
1176       sctx.rs = rs;
1177       sctx.pv = dk;
1178       ierr = Mat_CholeskyCheckShift(info,&sctx,&newshift);CHKERRQ(ierr);
1179       if (newshift == 1){
1180         break;    /* sctx.shift_amount is updated */
1181       } else if (newshift == -1){
1182         SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs);
1183       }
1184 
1185       /* copy data into U(k,:) */
1186       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1187       jmin = bi[k]+1; jmax = bi[k+1];
1188       if (jmin < jmax) {
1189         for (j=jmin; j<jmax; j++){
1190           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1191         }
1192         /* add the k-th row into il and jl */
1193         il[k] = jmin;
1194         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1195       }
1196     }
1197   } while (sctx.chshift);
1198   ierr = PetscFree(il);CHKERRQ(ierr);
1199 
1200   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
1201   C->factor       = FACTOR_CHOLESKY;
1202   C->assembled    = PETSC_TRUE;
1203   C->preallocated = PETSC_TRUE;
1204   PetscLogFlops(C->m);
1205   if (sctx.nshift){
1206     if (shiftnz) {
1207       PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount);
1208     } else if (shiftpd) {
1209       PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount);
1210     }
1211   }
1212   PetscFunctionReturn(0);
1213 }
1214 
1215 #include "petscbt.h"
1216 #include "src/mat/utils/freespace.h"
1217 #undef __FUNCT__
1218 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
1219 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1220 {
1221   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1222   Mat_SeqSBAIJ   *b;
1223   Mat            B;
1224   PetscErrorCode ierr;
1225   PetscTruth     perm_identity;
1226   PetscInt       reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui;
1227   PetscInt       jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1228   PetscInt       nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1229   PetscInt       ncols,ncols_upper,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr;
1230   PetscReal      fill=info->fill,levels=info->levels;
1231   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1232   FreeSpaceList  free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1233   PetscBT        lnkbt;
1234 
1235   PetscFunctionBegin;
1236   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1237   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1238 
1239   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1240   ui[0] = 0;
1241 
1242   /* special case that simply copies fill pattern */
1243   if (!levels && perm_identity) {
1244     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
1245     for (i=0; i<am; i++) {
1246       ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1247     }
1248     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1249     cols = uj;
1250     for (i=0; i<am; i++) {
1251       aj    = a->j + a->diag[i];
1252       ncols = ui[i+1] - ui[i];
1253       for (j=0; j<ncols; j++) *cols++ = *aj++;
1254     }
1255   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1256     /* initialization */
1257     ierr  = PetscMalloc((2*am+1)*sizeof(PetscInt),&cols_lvl);CHKERRQ(ierr);
1258 
1259     /* jl: linked list for storing indices of the pivot rows
1260        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1261     ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr);
1262     il         = jl + am;
1263     uj_ptr     = (PetscInt**)(il + am);
1264     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1265     for (i=0; i<am; i++){
1266       jl[i] = am; il[i] = 0;
1267     }
1268 
1269     /* create and initialize a linked list for storing column indices of the active row k */
1270     nlnk = am + 1;
1271     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1272 
1273     /* initial FreeSpace size is fill*(ai[am]+1) */
1274     ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1275     current_space = free_space;
1276     ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
1277     current_space_lvl = free_space_lvl;
1278 
1279     for (k=0; k<am; k++){  /* for each active row k */
1280       /* initialize lnk by the column indices of row rip[k] of A */
1281       nzk   = 0;
1282       ncols = ai[rip[k]+1] - ai[rip[k]];
1283       ncols_upper = 0;
1284       cols        = cols_lvl + am;
1285       for (j=0; j<ncols; j++){
1286         i = rip[*(aj + ai[rip[k]] + j)];
1287         if (i >= k){ /* only take upper triangular entry */
1288           cols[ncols_upper] = i;
1289           cols_lvl[ncols_upper] = -1;  /* initialize level for nonzero entries */
1290           ncols_upper++;
1291         }
1292       }
1293       ierr = PetscIncompleteLLAdd(ncols_upper,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1294       nzk += nlnk;
1295 
1296       /* update lnk by computing fill-in for each pivot row to be merged in */
1297       prow = jl[k]; /* 1st pivot row */
1298 
1299       while (prow < k){
1300         nextprow = jl[prow];
1301 
1302         /* merge prow into k-th row */
1303         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1304         jmax = ui[prow+1];
1305         ncols = jmax-jmin;
1306         i     = jmin - ui[prow];
1307         cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1308         for (j=0; j<ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + i + j);
1309         ierr = PetscIncompleteLLAdd(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1310         nzk += nlnk;
1311 
1312         /* update il and jl for prow */
1313         if (jmin < jmax){
1314           il[prow] = jmin;
1315           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1316         }
1317         prow = nextprow;
1318       }
1319 
1320       /* if free space is not available, make more free space */
1321       if (current_space->local_remaining<nzk) {
1322         i = am - k + 1; /* num of unfactored rows */
1323         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1324         ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
1325         ierr = GetMoreSpace(i,&current_space_lvl);CHKERRQ(ierr);
1326         reallocs++;
1327       }
1328 
1329       /* copy data into free_space and free_space_lvl, then initialize lnk */
1330       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1331 
1332       /* add the k-th row into il and jl */
1333       if (nzk > 1){
1334         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1335         jl[k] = jl[i]; jl[i] = k;
1336         il[k] = ui[k] + 1;
1337       }
1338       uj_ptr[k]     = current_space->array;
1339       uj_lvl_ptr[k] = current_space_lvl->array;
1340 
1341       current_space->array           += nzk;
1342       current_space->local_used      += nzk;
1343       current_space->local_remaining -= nzk;
1344 
1345       current_space_lvl->array           += nzk;
1346       current_space_lvl->local_used      += nzk;
1347       current_space_lvl->local_remaining -= nzk;
1348 
1349       ui[k+1] = ui[k] + nzk;
1350     }
1351 
1352     if (ai[am] != 0) {
1353       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1354       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af);
1355       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af);
1356       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af);
1357     } else {
1358       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n");
1359     }
1360 
1361     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1362     ierr = PetscFree(jl);CHKERRQ(ierr);
1363     ierr = PetscFree(cols_lvl);CHKERRQ(ierr);
1364 
1365     /* destroy list of free space and other temporary array(s) */
1366     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1367     ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1368     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1369     ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr);
1370 
1371   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
1372 
1373   /* put together the new matrix in MATSEQSBAIJ format */
1374   ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr);
1375   B = *fact;
1376   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1377   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
1378 
1379   b    = (Mat_SeqSBAIJ*)B->data;
1380   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1381   b->singlemalloc = PETSC_FALSE;
1382   /* the next line frees the default space generated by the Create() */
1383   ierr = PetscFree(b->a);CHKERRQ(ierr);
1384   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1385   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1386   b->j    = uj;
1387   b->i    = ui;
1388   b->diag = 0;
1389   b->ilen = 0;
1390   b->imax = 0;
1391   b->row  = perm;
1392   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1393   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1394   b->icol = perm;
1395   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1396   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1397   PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
1398   b->maxnz = b->nz = ui[am];
1399 
1400   B->factor                 = FACTOR_CHOLESKY;
1401   B->info.factor_mallocs    = reallocs;
1402   B->info.fill_ratio_given  = fill;
1403   if (ai[am] != 0) {
1404     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1405   } else {
1406     B->info.fill_ratio_needed = 0.0;
1407   }
1408   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1409   if (perm_identity){
1410     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1411     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1412   }
1413   PetscFunctionReturn(0);
1414 }
1415 
1416 #undef __FUNCT__
1417 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
1418 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1419 {
1420   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1421   Mat_SeqSBAIJ   *b;
1422   Mat            B;
1423   PetscErrorCode ierr;
1424   PetscTruth     perm_identity;
1425   PetscReal      fill = info->fill;
1426   PetscInt       *rip,*riip,i,mbs=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow;
1427   PetscInt       *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1428   PetscInt       nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1429   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1430   PetscBT        lnkbt;
1431   IS             iperm;
1432 
1433   PetscFunctionBegin;
1434   /* check whether perm is the identity mapping */
1435   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1436   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1437 
1438   if (!perm_identity){
1439     /* check if perm is symmetric! */
1440     ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1441     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1442     for (i=0; i<mbs; i++) {
1443       if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation");
1444     }
1445     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1446     ierr = ISDestroy(iperm);CHKERRQ(ierr);
1447   }
1448 
1449   /* initialization */
1450   ierr  = PetscMalloc((mbs+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1451   ui[0] = 0;
1452 
1453   /* jl: linked list for storing indices of the pivot rows
1454      il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
1455   ierr = PetscMalloc((3*mbs+1)*sizeof(PetscInt)+mbs*sizeof(PetscInt*),&jl);CHKERRQ(ierr);
1456   il     = jl + mbs;
1457   cols   = il + mbs;
1458   ui_ptr = (PetscInt**)(cols + mbs);
1459   for (i=0; i<mbs; i++){
1460     jl[i] = mbs; il[i] = 0;
1461   }
1462 
1463   /* create and initialize a linked list for storing column indices of the active row k */
1464   nlnk = mbs + 1;
1465   ierr = PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1466 
1467   /* initial FreeSpace size is fill*(ai[mbs]+1) */
1468   ierr = GetMoreSpace((PetscInt)(fill*(ai[mbs]+1)),&free_space);CHKERRQ(ierr);
1469   current_space = free_space;
1470 
1471   for (k=0; k<mbs; k++){  /* for each active row k */
1472     /* initialize lnk by the column indices of row rip[k] of A */
1473     nzk   = 0;
1474     ncols = ai[rip[k]+1] - ai[rip[k]];
1475     ncols_upper = 0;
1476     for (j=0; j<ncols; j++){
1477       i = rip[*(aj + ai[rip[k]] + j)];
1478       if (i >= k){ /* only take upper triangular entry */
1479         cols[ncols_upper] = i;
1480         ncols_upper++;
1481       }
1482     }
1483     ierr = PetscLLAdd(ncols_upper,cols,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1484     nzk += nlnk;
1485 
1486     /* update lnk by computing fill-in for each pivot row to be merged in */
1487     prow = jl[k]; /* 1st pivot row */
1488 
1489     while (prow < k){
1490       nextprow = jl[prow];
1491       /* merge prow into k-th row */
1492       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
1493       jmax = ui[prow+1];
1494       ncols = jmax-jmin;
1495       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
1496       ierr = PetscLLAdd(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1497       nzk += nlnk;
1498 
1499       /* update il and jl for prow */
1500       if (jmin < jmax){
1501         il[prow] = jmin;
1502         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1503       }
1504       prow = nextprow;
1505     }
1506 
1507     /* if free space is not available, make more free space */
1508     if (current_space->local_remaining<nzk) {
1509       i = mbs - k + 1; /* num of unfactored rows */
1510       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1511       ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
1512       reallocs++;
1513     }
1514 
1515     /* copy data into free space, then initialize lnk */
1516     ierr = PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1517 
1518     /* add the k-th row into il and jl */
1519     if (nzk-1 > 0){
1520       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
1521       jl[k] = jl[i]; jl[i] = k;
1522       il[k] = ui[k] + 1;
1523     }
1524     ui_ptr[k] = current_space->array;
1525     current_space->array           += nzk;
1526     current_space->local_used      += nzk;
1527     current_space->local_remaining -= nzk;
1528 
1529     ui[k+1] = ui[k] + nzk;
1530   }
1531 
1532   if (ai[mbs] != 0) {
1533     PetscReal af = ((PetscReal)(2*ui[mbs]-mbs))/((PetscReal)ai[mbs]);
1534     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af);
1535     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af);
1536     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af);
1537   } else {
1538      PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n");
1539   }
1540 
1541   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1542   ierr = PetscFree(jl);CHKERRQ(ierr);
1543 
1544   /* destroy list of free space and other temporary array(s) */
1545   ierr = PetscMalloc((ui[mbs]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1546   ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1547   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1548 
1549   /* put together the new matrix in MATSEQSBAIJ format */
1550   ierr = MatCreate(PETSC_COMM_SELF,mbs,mbs,mbs,mbs,fact);CHKERRQ(ierr);
1551   B    = *fact;
1552   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1553   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
1554 
1555   b = (Mat_SeqSBAIJ*)B->data;
1556   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1557   b->singlemalloc = PETSC_FALSE;
1558   /* the next line frees the default space generated by the Create() */
1559   ierr = PetscFree(b->a);CHKERRQ(ierr);
1560   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1561   ierr = PetscMalloc((ui[mbs]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1562   b->j    = uj;
1563   b->i    = ui;
1564   b->diag = 0;
1565   b->ilen = 0;
1566   b->imax = 0;
1567   b->row  = perm;
1568   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1569   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1570   b->icol = perm;
1571   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1572   ierr    = PetscMalloc((mbs+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1573   PetscLogObjectMemory(B,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));
1574   b->maxnz = b->nz = ui[mbs];
1575 
1576   B->factor                 = FACTOR_CHOLESKY;
1577   B->info.factor_mallocs    = reallocs;
1578   B->info.fill_ratio_given  = fill;
1579   if (ai[mbs] != 0) {
1580     B->info.fill_ratio_needed = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]);
1581   } else {
1582     B->info.fill_ratio_needed = 0.0;
1583   }
1584   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1585   if (perm_identity){
1586     (*fact)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1587     (*fact)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1588   }
1589   PetscFunctionReturn(0);
1590 }
1591