xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision d2276718d84d75a9e894608700f4d0f88a5f11d7)
1 
2 #include "src/mat/impls/aij/seq/aij.h"
3 #include "src/inline/dot.h"
4 #include "src/inline/spops.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ"
8 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
9 {
10   PetscFunctionBegin;
11 
12   SETERRQ(PETSC_ERR_SUP,"Code not written");
13 #if !defined(PETSC_USE_DEBUG)
14   PetscFunctionReturn(0);
15 #endif
16 }
17 
18 
19 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat);
20 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth);
21 
22 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
23 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
24 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*);
25 
26 #undef __FUNCT__
27 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
28   /* ------------------------------------------------------------
29 
30           This interface was contribed by Tony Caola
31 
32      This routine is an interface to the pivoting drop-tolerance
33      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
34      SPARSEKIT2.
35 
36      The SPARSEKIT2 routines used here are covered by the GNU
37      copyright; see the file gnu in this directory.
38 
39      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
40      help in getting this routine ironed out.
41 
42      The major drawback to this routine is that if info->fill is
43      not large enough it fails rather than allocating more space;
44      this can be fixed by hacking/improving the f2c version of
45      Yousef Saad's code.
46 
47      ------------------------------------------------------------
48 */
49 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact)
50 {
51 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
52   PetscFunctionBegin;
53   SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
54   You can obtain the drop tolerance routines by installing PETSc from\n\
55   www.mcs.anl.gov/petsc\n");
56 #else
57   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
58   IS             iscolf,isicol,isirow;
59   PetscTruth     reorder;
60   PetscErrorCode ierr,sierr;
61   PetscInt       *c,*r,*ic,i,n = A->m;
62   PetscInt       *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
63   PetscInt       *ordcol,*iwk,*iperm,*jw;
64   PetscInt       jmax,lfill,job,*o_i,*o_j;
65   PetscScalar    *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
66   PetscReal      af;
67 
68   PetscFunctionBegin;
69 
70   if (info->dt == PETSC_DEFAULT)      info->dt      = .005;
71   if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax);
72   if (info->dtcol == PETSC_DEFAULT)   info->dtcol   = .01;
73   if (info->fill == PETSC_DEFAULT)    info->fill    = ((double)(n*(info->dtcount+1)))/a->nz;
74   lfill   = (PetscInt)(info->dtcount/2.0);
75   jmax    = (PetscInt)(info->fill*a->nz);
76 
77 
78   /* ------------------------------------------------------------
79      If reorder=.TRUE., then the original matrix has to be
80      reordered to reflect the user selected ordering scheme, and
81      then de-reordered so it is in it's original format.
82      Because Saad's dperm() is NOT in place, we have to copy
83      the original matrix and allocate more storage. . .
84      ------------------------------------------------------------
85   */
86 
87   /* set reorder to true if either isrow or iscol is not identity */
88   ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr);
89   if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);}
90   reorder = PetscNot(reorder);
91 
92 
93   /* storage for ilu factor */
94   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr);
95   ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr);
96   ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr);
97   ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr);
98 
99   /* ------------------------------------------------------------
100      Make sure that everything is Fortran formatted (1-Based)
101      ------------------------------------------------------------
102   */
103   for (i=old_i[0];i<old_i[n];i++) {
104     old_j[i]++;
105   }
106   for(i=0;i<n+1;i++) {
107     old_i[i]++;
108   };
109 
110 
111   if (reorder) {
112     ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr);
113     ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
114     for(i=0;i<n;i++) {
115       r[i]  = r[i]+1;
116       c[i]  = c[i]+1;
117     }
118     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr);
119     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr);
120     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr);
121     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
122     for (i=0;i<n;i++) {
123       r[i]  = r[i]-1;
124       c[i]  = c[i]-1;
125     }
126     ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr);
127     ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
128     o_a = old_a2;
129     o_j = old_j2;
130     o_i = old_i2;
131   } else {
132     o_a = old_a;
133     o_j = old_j;
134     o_i = old_i;
135   }
136 
137   /* ------------------------------------------------------------
138      Call Saad's ilutp() routine to generate the factorization
139      ------------------------------------------------------------
140   */
141 
142   ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr);
143   ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr);
144   ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr);
145 
146   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
147   if (sierr) {
148     switch (sierr) {
149       case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
150       case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
151       case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
152       case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
153       case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax);
154       default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
155     }
156   }
157 
158   ierr = PetscFree(w);CHKERRQ(ierr);
159   ierr = PetscFree(jw);CHKERRQ(ierr);
160 
161   /* ------------------------------------------------------------
162      Saad's routine gives the result in Modified Sparse Row (msr)
163      Convert to Compressed Sparse Row format (csr)
164      ------------------------------------------------------------
165   */
166 
167   ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr);
168   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr);
169 
170   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
171 
172   ierr = PetscFree(iwk);CHKERRQ(ierr);
173   ierr = PetscFree(wk);CHKERRQ(ierr);
174 
175   if (reorder) {
176     ierr = PetscFree(old_a2);CHKERRQ(ierr);
177     ierr = PetscFree(old_j2);CHKERRQ(ierr);
178     ierr = PetscFree(old_i2);CHKERRQ(ierr);
179   } else {
180     /* fix permutation of old_j that the factorization introduced */
181     for (i=old_i[0]; i<old_i[n]; i++) {
182       old_j[i-1] = iperm[old_j[i-1]-1];
183     }
184   }
185 
186   /* get rid of the shift to indices starting at 1 */
187   for (i=0; i<n+1; i++) {
188     old_i[i]--;
189   }
190   for (i=old_i[0];i<old_i[n];i++) {
191     old_j[i]--;
192   }
193 
194   /* Make the factored matrix 0-based */
195   for (i=0; i<n+1; i++) {
196     new_i[i]--;
197   }
198   for (i=new_i[0];i<new_i[n];i++) {
199     new_j[i]--;
200   }
201 
202   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
203   /*-- permute the right-hand-side and solution vectors           --*/
204   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
205   ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr);
206   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
207   for(i=0; i<n; i++) {
208     ordcol[i] = ic[iperm[i]-1];
209   };
210   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
211   ierr = ISDestroy(isicol);CHKERRQ(ierr);
212 
213   ierr = PetscFree(iperm);CHKERRQ(ierr);
214 
215   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr);
216   ierr = PetscFree(ordcol);CHKERRQ(ierr);
217 
218   /*----- put together the new matrix -----*/
219 
220   ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr);
221   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
222   ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr);
223   (*fact)->factor    = FACTOR_LU;
224   (*fact)->assembled = PETSC_TRUE;
225 
226   b = (Mat_SeqAIJ*)(*fact)->data;
227   ierr = PetscFree(b->imax);CHKERRQ(ierr);
228   b->sorted        = PETSC_FALSE;
229   b->singlemalloc  = PETSC_FALSE;
230   /* the next line frees the default space generated by the MatCreate() */
231   ierr             = PetscFree(b->a);CHKERRQ(ierr);
232   ierr             = PetscFree(b->ilen);CHKERRQ(ierr);
233   b->a             = new_a;
234   b->j             = new_j;
235   b->i             = new_i;
236   b->ilen          = 0;
237   b->imax          = 0;
238   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
239   b->row           = isirow;
240   b->col           = iscolf;
241   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
242   b->maxnz = b->nz = new_i[n];
243   ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
244   (*fact)->info.factor_mallocs = 0;
245 
246   ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
247 
248   /* check out for identical nodes. If found, use inode functions */
249   ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr);
250 
251   af = ((double)b->nz)/((double)a->nz) + .001;
252   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af);
253   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af);
254   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af);
255   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n");
256 
257   PetscFunctionReturn(0);
258 #endif
259 }
260 
261 /*
262     Factorization code for AIJ format.
263 */
264 #undef __FUNCT__
265 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
266 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
267 {
268   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
269   IS             isicol;
270   PetscErrorCode ierr;
271   PetscInt       *r,*ic,i,n = A->m,*ai = a->i,*aj = a->j;
272   PetscInt       *ainew,*ajnew,jmax,*fill,*ajtmp,nz;
273   PetscInt       *idnew,idx,row,m,fm,nnz,nzi,reallocs = 0,nzbd,*im;
274   PetscReal      f;
275 
276   PetscFunctionBegin;
277   if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
278   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
279   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
280   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
281 
282   /* get new row pointers */
283   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr);
284   ainew[0] = 0;
285   /* don't know how many column pointers are needed so estimate */
286   f = info->fill;
287   jmax  = (PetscInt)(f*ai[n]+1);
288   ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr);
289   /* fill is a linked list of nonzeros in active row */
290   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr);
291   im   = fill + n + 1;
292   /* idnew is location of diagonal in factor */
293   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&idnew);CHKERRQ(ierr);
294   idnew[0] = 0;
295 
296   for (i=0; i<n; i++) {
297     /* first copy previous fill into linked list */
298     nnz     = nz    = ai[r[i]+1] - ai[r[i]];
299     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
300     ajtmp   = aj + ai[r[i]];
301     fill[n] = n;
302     while (nz--) {
303       fm  = n;
304       idx = ic[*ajtmp++];
305       do {
306         m  = fm;
307         fm = fill[m];
308       } while (fm < idx);
309       fill[m]   = idx;
310       fill[idx] = fm;
311     }
312     row = fill[n];
313     while (row < i) {
314       ajtmp = ajnew + idnew[row] + 1;
315       nzbd  = 1 + idnew[row] - ainew[row];
316       nz    = im[row] - nzbd;
317       fm    = row;
318       while (nz-- > 0) {
319         idx = *ajtmp++ ;
320         nzbd++;
321         if (idx == i) im[row] = nzbd;
322         do {
323           m  = fm;
324           fm = fill[m];
325         } while (fm < idx);
326         if (fm != idx) {
327           fill[m]   = idx;
328           fill[idx] = fm;
329           fm        = idx;
330           nnz++;
331         }
332       }
333       row = fill[row];
334     }
335     /* copy new filled row into permanent storage */
336     ainew[i+1] = ainew[i] + nnz;
337     if (ainew[i+1] > jmax) {
338       /* estimate how much additional space we will need */
339       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
340       /* just double the memory each time */
341       PetscInt maxadd = jmax;
342       /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */
343       if (maxadd < nnz) maxadd = (n-i)*(nnz+1);
344       jmax += maxadd;
345 
346       /* allocate a longer ajnew */
347       ierr  = PetscMalloc(jmax*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
348       ierr  = PetscMemcpy(ajtmp,ajnew,(ainew[i])*sizeof(PetscInt));CHKERRQ(ierr);
349       ierr  = PetscFree(ajnew);CHKERRQ(ierr);
350       ajnew = ajtmp;
351       reallocs++; /* count how many times we realloc */
352     }
353     ajtmp = ajnew + ainew[i];
354     fm    = fill[n];
355     nzi   = 0;
356     im[i] = nnz;
357     while (nnz--) {
358       if (fm < i) nzi++;
359       *ajtmp++ = fm ;
360       fm       = fill[fm];
361     }
362     idnew[i] = ainew[i] + nzi;
363   }
364   if (ai[n] != 0) {
365     PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
366     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
367     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af);
368     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af);
369     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n");
370   } else {
371     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n");
372   }
373 
374   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
375   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
376 
377   ierr = PetscFree(fill);CHKERRQ(ierr);
378 
379   /* put together the new matrix */
380   ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr);
381   ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr);
382   ierr = MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);CHKERRQ(ierr);
383   PetscLogObjectParent(*B,isicol);
384   b    = (Mat_SeqAIJ*)(*B)->data;
385   ierr = PetscFree(b->imax);CHKERRQ(ierr);
386   b->singlemalloc = PETSC_FALSE;
387   /* the next line frees the default space generated by the Create() */
388   ierr = PetscFree(b->a);CHKERRQ(ierr);
389   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
390   ierr          = PetscMalloc((ainew[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
391   b->j          = ajnew;
392   b->i          = ainew;
393   b->diag       = idnew;
394   b->ilen       = 0;
395   b->imax       = 0;
396   b->row        = isrow;
397   b->col        = iscol;
398   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
399   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
400   b->icol       = isicol;
401   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
402   /* In b structure:  Free imax, ilen, old a, old j.
403      Allocate idnew, solve_work, new a, new j */
404   PetscLogObjectMemory(*B,(ainew[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));
405   b->maxnz = b->nz = ainew[n] ;
406 
407   (*B)->factor                 =  FACTOR_LU;
408   (*B)->info.factor_mallocs    = reallocs;
409   (*B)->info.fill_ratio_given  = f;
410   ierr = Mat_AIJ_CheckInode(*B,PETSC_FALSE);CHKERRQ(ierr);
411   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
412 
413   if (ai[n] != 0) {
414     (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
415   } else {
416     (*B)->info.fill_ratio_needed = 0.0;
417   }
418   PetscFunctionReturn(0);
419 }
420 /* ----------------------------------------------------------- */
421 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth);
422 
423 #include "src/ksp/pc/impls/factor/factor.h"
424 #undef __FUNCT__
425 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
426 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
427 {
428   Mat            C=*B;
429   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
430   IS             isrow = b->row,isicol = b->icol;
431   PetscErrorCode ierr;
432   PetscInt       *r,*ic,i,j,n=A->m,*ai=b->i,*aj=b->j;
433   PetscInt       *ajtmpold,*ajtmp,nz,row,*ics;
434   PetscInt       *diag_offset = b->diag,diag,*pj;
435   PetscScalar    *rtmp,*v,*pc,multiplier,*pv,*rtmps;
436   PetscReal      zeropivot,rs,d,shift_nonzero;
437   PetscReal      row_shift,shift_top=0.;
438   PetscTruth     shift_pd;
439   Shift_Ctx      sctx;
440   PetscInt       newshift;
441 
442   PetscFunctionBegin;
443   shift_nonzero  = info->shiftnz;
444   shift_pd       = info->shiftpd;
445   zeropivot      = info->zeropivot;
446 
447   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
448   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
449   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
450   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
451   rtmps = rtmp; ics = ic;
452 
453   if (!a->diag) {
454     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
455   }
456   /* if both shift schemes are chosen by user, only use shift_pd */
457   if (shift_pd && shift_nonzero) shift_nonzero = 0.0;
458   if (shift_pd) { /* set shift_top=max{row_shift} */
459     PetscInt *aai = a->i,*ddiag = a->diag;
460     shift_top = 0;
461     for (i=0; i<n; i++) {
462       d = PetscAbsScalar((a->a)[ddiag[i]]);
463       /* calculate amt of shift needed for this row */
464       if (d<=0) {
465         row_shift = 0;
466       } else {
467         row_shift = -2*d;
468       }
469       v  = a->a+aai[i];
470       nz = aai[i+1] - aai[i];
471       for (j=0; j<nz; j++)
472 	row_shift += PetscAbsScalar(v[j]);
473       if (row_shift>shift_top) shift_top = row_shift;
474     }
475   }
476 
477   sctx.shift_amount = 0;
478   sctx.shift_top    = shift_top;
479   sctx.nshift       = 0;
480   sctx.nshift_max   = 5;
481   sctx.shift_lo     = 0.;
482   sctx.shift_hi     = 1.;
483   do {
484     sctx.lushift = PETSC_FALSE;
485     for (i=0; i<n; i++){
486       nz    = ai[i+1] - ai[i];
487       ajtmp = aj + ai[i];
488       for  (j=0; j<nz; j++) rtmps[ajtmp[j]] = 0.0;
489 
490       /* load in initial (unfactored row) */
491       nz       = a->i[r[i]+1] - a->i[r[i]];
492       ajtmpold = a->j + a->i[r[i]];
493       v        = a->a + a->i[r[i]];
494       for (j=0; j<nz; j++) {
495         rtmp[ics[ajtmpold[j]]] = v[j];
496       }
497       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
498 
499       row = *ajtmp++;
500       while  (row < i) {
501         pc = rtmp + row;
502         if (*pc != 0.0) {
503           pv         = b->a + diag_offset[row];
504           pj         = b->j + diag_offset[row] + 1;
505           multiplier = *pc / *pv++;
506           *pc        = multiplier;
507           nz         = ai[row+1] - diag_offset[row] - 1;
508           for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
509           PetscLogFlops(2*nz);
510         }
511         row = *ajtmp++;
512       }
513       /* finished row so stick it into b->a */
514       pv   = b->a + ai[i] ;
515       pj   = b->j + ai[i] ;
516       nz   = ai[i+1] - ai[i];
517       diag = diag_offset[i] - ai[i];
518       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
519       rs   = 0.0;
520       for (j=0; j<nz; j++) {
521         pv[j] = rtmps[pj[j]];
522         if (j != diag) rs += PetscAbsScalar(pv[j]);
523       }
524 
525       sctx.rs  = rs;
526       sctx.pv  = pv[diag];
527       newshift = 0;
528       ierr = PCLUFactorCheckShift(A,info,B,&sctx,&newshift);CHKERRQ(ierr);
529       if (newshift == 1){
530         break;
531       } else if (newshift == -1){
532         SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs);
533       }
534     }
535 
536     if (shift_pd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
537       /*
538        * if no shift in this attempt & shifting & started shifting & can refine,
539        * then try lower shift
540        */
541       sctx.shift_hi       = info->shift_fraction;
542       info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
543       sctx.shift_amount   = info->shift_fraction * sctx.shift_top;
544       sctx.lushift        = PETSC_TRUE;
545       sctx.nshift++;
546     }
547   } while (sctx.lushift);
548 
549   /* invert diagonal entries for simplier triangular solves */
550   for (i=0; i<n; i++) {
551     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
552   }
553 
554   ierr = PetscFree(rtmp);CHKERRQ(ierr);
555   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
556   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
557   C->factor = FACTOR_LU;
558   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
559   C->assembled = PETSC_TRUE;
560   PetscLogFlops(C->n);
561   if (sctx.nshift){
562     if (shift_nonzero) {
563       PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of shift_nonzero tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount);
564     } else if (shift_pd) {
565       b->lu_shift_fraction = info->shift_fraction;
566       PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: diagonal shifted up by %e fraction top_value %e number shifts %D\n",info->shift_fraction,shift_top,sctx.nshift);
567     }
568   }
569   PetscFunctionReturn(0);
570 }
571 
572 #undef __FUNCT__
573 #define __FUNCT__ "MatUsePETSc_SeqAIJ"
574 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A)
575 {
576   PetscFunctionBegin;
577   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
578   A->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
579   PetscFunctionReturn(0);
580 }
581 
582 
583 /* ----------------------------------------------------------- */
584 #undef __FUNCT__
585 #define __FUNCT__ "MatLUFactor_SeqAIJ"
586 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
587 {
588   PetscErrorCode ierr;
589   Mat            C;
590 
591   PetscFunctionBegin;
592   ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr);
593   ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr);
594   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
595   PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);
596   PetscFunctionReturn(0);
597 }
598 /* ----------------------------------------------------------- */
599 #undef __FUNCT__
600 #define __FUNCT__ "MatSolve_SeqAIJ"
601 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
602 {
603   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
604   IS             iscol = a->col,isrow = a->row;
605   PetscErrorCode ierr;
606   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
607   PetscInt       nz,*rout,*cout;
608   PetscScalar    *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
609 
610   PetscFunctionBegin;
611   if (!n) PetscFunctionReturn(0);
612 
613   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
614   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
615   tmp  = a->solve_work;
616 
617   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
618   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
619 
620   /* forward solve the lower triangular */
621   tmp[0] = b[*r++];
622   tmps   = tmp;
623   for (i=1; i<n; i++) {
624     v   = aa + ai[i] ;
625     vi  = aj + ai[i] ;
626     nz  = a->diag[i] - ai[i];
627     sum = b[*r++];
628     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
629     tmp[i] = sum;
630   }
631 
632   /* backward solve the upper triangular */
633   for (i=n-1; i>=0; i--){
634     v   = aa + a->diag[i] + 1;
635     vi  = aj + a->diag[i] + 1;
636     nz  = ai[i+1] - a->diag[i] - 1;
637     sum = tmp[i];
638     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
639     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
640   }
641 
642   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
643   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
644   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
645   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
646   PetscLogFlops(2*a->nz - A->n);
647   PetscFunctionReturn(0);
648 }
649 
650 /* ----------------------------------------------------------- */
651 #undef __FUNCT__
652 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
653 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
654 {
655   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
656   PetscErrorCode ierr;
657   PetscInt       n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag;
658   PetscScalar    *x,*b,*aa = a->a;
659 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
660   PetscInt       adiag_i,i,*vi,nz,ai_i;
661   PetscScalar    *v,sum;
662 #endif
663 
664   PetscFunctionBegin;
665   if (!n) PetscFunctionReturn(0);
666 
667   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
668   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
669 
670 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
671   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
672 #else
673   /* forward solve the lower triangular */
674   x[0] = b[0];
675   for (i=1; i<n; i++) {
676     ai_i = ai[i];
677     v    = aa + ai_i;
678     vi   = aj + ai_i;
679     nz   = adiag[i] - ai_i;
680     sum  = b[i];
681     while (nz--) sum -= *v++ * x[*vi++];
682     x[i] = sum;
683   }
684 
685   /* backward solve the upper triangular */
686   for (i=n-1; i>=0; i--){
687     adiag_i = adiag[i];
688     v       = aa + adiag_i + 1;
689     vi      = aj + adiag_i + 1;
690     nz      = ai[i+1] - adiag_i - 1;
691     sum     = x[i];
692     while (nz--) sum -= *v++ * x[*vi++];
693     x[i]    = sum*aa[adiag_i];
694   }
695 #endif
696   PetscLogFlops(2*a->nz - A->n);
697   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
698   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
699   PetscFunctionReturn(0);
700 }
701 
702 #undef __FUNCT__
703 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
704 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
705 {
706   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
707   IS             iscol = a->col,isrow = a->row;
708   PetscErrorCode ierr;
709   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
710   PetscInt       nz,*rout,*cout;
711   PetscScalar    *x,*b,*tmp,*aa = a->a,sum,*v;
712 
713   PetscFunctionBegin;
714   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
715 
716   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
717   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
718   tmp  = a->solve_work;
719 
720   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
721   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
722 
723   /* forward solve the lower triangular */
724   tmp[0] = b[*r++];
725   for (i=1; i<n; i++) {
726     v   = aa + ai[i] ;
727     vi  = aj + ai[i] ;
728     nz  = a->diag[i] - ai[i];
729     sum = b[*r++];
730     while (nz--) sum -= *v++ * tmp[*vi++ ];
731     tmp[i] = sum;
732   }
733 
734   /* backward solve the upper triangular */
735   for (i=n-1; i>=0; i--){
736     v   = aa + a->diag[i] + 1;
737     vi  = aj + a->diag[i] + 1;
738     nz  = ai[i+1] - a->diag[i] - 1;
739     sum = tmp[i];
740     while (nz--) sum -= *v++ * tmp[*vi++ ];
741     tmp[i] = sum*aa[a->diag[i]];
742     x[*c--] += tmp[i];
743   }
744 
745   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
746   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
747   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
748   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
749   PetscLogFlops(2*a->nz);
750 
751   PetscFunctionReturn(0);
752 }
753 /* -------------------------------------------------------------------*/
754 #undef __FUNCT__
755 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
756 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
757 {
758   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
759   IS             iscol = a->col,isrow = a->row;
760   PetscErrorCode ierr;
761   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
762   PetscInt       nz,*rout,*cout,*diag = a->diag;
763   PetscScalar    *x,*b,*tmp,*aa = a->a,*v,s1;
764 
765   PetscFunctionBegin;
766   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
767   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
768   tmp  = a->solve_work;
769 
770   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
771   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
772 
773   /* copy the b into temp work space according to permutation */
774   for (i=0; i<n; i++) tmp[i] = b[c[i]];
775 
776   /* forward solve the U^T */
777   for (i=0; i<n; i++) {
778     v   = aa + diag[i] ;
779     vi  = aj + diag[i] + 1;
780     nz  = ai[i+1] - diag[i] - 1;
781     s1  = tmp[i];
782     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
783     while (nz--) {
784       tmp[*vi++ ] -= (*v++)*s1;
785     }
786     tmp[i] = s1;
787   }
788 
789   /* backward solve the L^T */
790   for (i=n-1; i>=0; i--){
791     v   = aa + diag[i] - 1 ;
792     vi  = aj + diag[i] - 1 ;
793     nz  = diag[i] - ai[i];
794     s1  = tmp[i];
795     while (nz--) {
796       tmp[*vi-- ] -= (*v--)*s1;
797     }
798   }
799 
800   /* copy tmp into x according to permutation */
801   for (i=0; i<n; i++) x[r[i]] = tmp[i];
802 
803   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
804   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
805   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
806   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
807 
808   PetscLogFlops(2*a->nz-A->n);
809   PetscFunctionReturn(0);
810 }
811 
812 #undef __FUNCT__
813 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
814 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
815 {
816   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
817   IS             iscol = a->col,isrow = a->row;
818   PetscErrorCode ierr;
819   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
820   PetscInt       nz,*rout,*cout,*diag = a->diag;
821   PetscScalar    *x,*b,*tmp,*aa = a->a,*v;
822 
823   PetscFunctionBegin;
824   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
825 
826   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
827   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
828   tmp = a->solve_work;
829 
830   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
831   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
832 
833   /* copy the b into temp work space according to permutation */
834   for (i=0; i<n; i++) tmp[i] = b[c[i]];
835 
836   /* forward solve the U^T */
837   for (i=0; i<n; i++) {
838     v   = aa + diag[i] ;
839     vi  = aj + diag[i] + 1;
840     nz  = ai[i+1] - diag[i] - 1;
841     tmp[i] *= *v++;
842     while (nz--) {
843       tmp[*vi++ ] -= (*v++)*tmp[i];
844     }
845   }
846 
847   /* backward solve the L^T */
848   for (i=n-1; i>=0; i--){
849     v   = aa + diag[i] - 1 ;
850     vi  = aj + diag[i] - 1 ;
851     nz  = diag[i] - ai[i];
852     while (nz--) {
853       tmp[*vi-- ] -= (*v--)*tmp[i];
854     }
855   }
856 
857   /* copy tmp into x according to permutation */
858   for (i=0; i<n; i++) x[r[i]] += tmp[i];
859 
860   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
861   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
862   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
863   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
864 
865   PetscLogFlops(2*a->nz);
866   PetscFunctionReturn(0);
867 }
868 /* ----------------------------------------------------------------*/
869 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat);
870 
871 #undef __FUNCT__
872 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
873 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
874 {
875   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
876   IS             isicol;
877   PetscErrorCode ierr;
878   PetscInt       *r,*ic,prow,n = A->m,*ai = a->i,*aj = a->j;
879   PetscInt       *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev;
880   PetscInt       *dloc,idx,row,m,fm,nzf,nzi,len, reallocs = 0,dcount = 0;
881   PetscInt       incrlev,nnz,i,levels,diagonal_fill;
882   PetscTruth     col_identity,row_identity;
883   PetscReal      f;
884 
885   PetscFunctionBegin;
886   f             = info->fill;
887   levels        = (PetscInt)info->levels;
888   diagonal_fill = (PetscInt)info->diagonal_fill;
889   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
890 
891   /* special case that simply copies fill pattern */
892   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
893   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
894   if (!levels && row_identity && col_identity) {
895     ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
896     (*fact)->factor = FACTOR_LU;
897     b               = (Mat_SeqAIJ*)(*fact)->data;
898     if (!b->diag) {
899       ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
900     }
901     ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
902     b->row              = isrow;
903     b->col              = iscol;
904     b->icol             = isicol;
905     ierr                = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
906     (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
907     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
908     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
909     PetscFunctionReturn(0);
910   }
911 
912   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
913   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
914 
915   /* get new row pointers */
916   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr);
917   ainew[0] = 0;
918   /* don't know how many column pointers are needed so estimate */
919   jmax = (PetscInt)(f*(ai[n]+1));
920   ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr);
921   /* ajfill is level of fill for each fill entry */
922   ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajfill);CHKERRQ(ierr);
923   /* fill is a linked list of nonzeros in active row */
924   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr);
925   /* im is level for each filled value */
926   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&im);CHKERRQ(ierr);
927   /* dloc is location of diagonal in factor */
928   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&dloc);CHKERRQ(ierr);
929   dloc[0]  = 0;
930   for (prow=0; prow<n; prow++) {
931 
932     /* copy current row into linked list */
933     nzf     = nz  = ai[r[prow]+1] - ai[r[prow]];
934     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
935     xi      = aj + ai[r[prow]];
936     fill[n] = n;
937     fill[prow] = -1; /* marker to indicate if diagonal exists */
938     while (nz--) {
939       fm  = n;
940       idx = ic[*xi++ ];
941       do {
942         m  = fm;
943         fm = fill[m];
944       } while (fm < idx);
945       fill[m]   = idx;
946       fill[idx] = fm;
947       im[idx]   = 0;
948     }
949 
950     /* make sure diagonal entry is included */
951     if (diagonal_fill && fill[prow] == -1) {
952       fm = n;
953       while (fill[fm] < prow) fm = fill[fm];
954       fill[prow] = fill[fm]; /* insert diagonal into linked list */
955       fill[fm]   = prow;
956       im[prow]   = 0;
957       nzf++;
958       dcount++;
959     }
960 
961     nzi = 0;
962     row = fill[n];
963     while (row < prow) {
964       incrlev = im[row] + 1;
965       nz      = dloc[row];
966       xi      = ajnew  + ainew[row]  + nz + 1;
967       flev    = ajfill + ainew[row]  + nz + 1;
968       nnz     = ainew[row+1] - ainew[row] - nz - 1;
969       fm      = row;
970       while (nnz-- > 0) {
971         idx = *xi++ ;
972         if (*flev + incrlev > levels) {
973           flev++;
974           continue;
975         }
976         do {
977           m  = fm;
978           fm = fill[m];
979         } while (fm < idx);
980         if (fm != idx) {
981           im[idx]   = *flev + incrlev;
982           fill[m]   = idx;
983           fill[idx] = fm;
984           fm        = idx;
985           nzf++;
986         } else {
987           if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev;
988         }
989         flev++;
990       }
991       row = fill[row];
992       nzi++;
993     }
994     /* copy new filled row into permanent storage */
995     ainew[prow+1] = ainew[prow] + nzf;
996     if (ainew[prow+1] > jmax) {
997 
998       /* estimate how much additional space we will need */
999       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
1000       /* just double the memory each time */
1001       /*  maxadd = (PetscInt)((f*(ai[n]+!shift)*(n-prow+5))/n); */
1002       PetscInt maxadd = jmax;
1003       if (maxadd < nzf) maxadd = (n-prow)*(nzf+1);
1004       jmax += maxadd;
1005 
1006       /* allocate a longer ajnew and ajfill */
1007       ierr   = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr);
1008       ierr   = PetscMemcpy(xi,ajnew,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr);
1009       ierr   = PetscFree(ajnew);CHKERRQ(ierr);
1010       ajnew  = xi;
1011       ierr   = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr);
1012       ierr   = PetscMemcpy(xi,ajfill,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr);
1013       ierr   = PetscFree(ajfill);CHKERRQ(ierr);
1014       ajfill = xi;
1015       reallocs++; /* count how many times we realloc */
1016     }
1017     xi          = ajnew + ainew[prow] ;
1018     flev        = ajfill + ainew[prow] ;
1019     dloc[prow]  = nzi;
1020     fm          = fill[n];
1021     while (nzf--) {
1022       *xi++   = fm ;
1023       *flev++ = im[fm];
1024       fm      = fill[fm];
1025     }
1026     /* make sure row has diagonal entry */
1027     if (ajnew[ainew[prow]+dloc[prow]] != prow) {
1028       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
1029     try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow);
1030     }
1031   }
1032   ierr = PetscFree(ajfill);CHKERRQ(ierr);
1033   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1034   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1035   ierr = PetscFree(fill);CHKERRQ(ierr);
1036   ierr = PetscFree(im);CHKERRQ(ierr);
1037 
1038   {
1039     PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
1040     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
1041     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af);
1042     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af);
1043     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n");
1044     if (diagonal_fill) {
1045       PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount);
1046     }
1047   }
1048 
1049   /* put together the new matrix */
1050   ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr);
1051   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
1052   ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr);
1053   PetscLogObjectParent(*fact,isicol);
1054   b = (Mat_SeqAIJ*)(*fact)->data;
1055   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1056   b->singlemalloc = PETSC_FALSE;
1057   len = (ainew[n] )*sizeof(PetscScalar);
1058   /* the next line frees the default space generated by the Create() */
1059   ierr = PetscFree(b->a);CHKERRQ(ierr);
1060   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1061   ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr);
1062   b->j          = ajnew;
1063   b->i          = ainew;
1064   for (i=0; i<n; i++) dloc[i] += ainew[i];
1065   b->diag       = dloc;
1066   b->ilen       = 0;
1067   b->imax       = 0;
1068   b->row        = isrow;
1069   b->col        = iscol;
1070   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1071   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1072   b->icol       = isicol;
1073   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1074   /* In b structure:  Free imax, ilen, old a, old j.
1075      Allocate dloc, solve_work, new a, new j */
1076   PetscLogObjectMemory(*fact,(ainew[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));
1077   b->maxnz             = b->nz = ainew[n] ;
1078   (*fact)->factor = FACTOR_LU;
1079   ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr);
1080   (*fact)->ops->lufactornumeric =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
1081   (*fact)->info.factor_mallocs    = reallocs;
1082   (*fact)->info.fill_ratio_given  = f;
1083   (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]);
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 #include "src/mat/impls/sbaij/seq/sbaij.h"
1088 #undef __FUNCT__
1089 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
1090 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
1091 {
1092   Mat            C = *B;
1093   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1094   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
1095   IS             ip=b->row;
1096   PetscErrorCode ierr;
1097   PetscInt       *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol;
1098   PetscInt       *ai=a->i,*aj=a->j;
1099   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1100   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1101   PetscReal      zeropivot,shift_amount,rs,shift_nonzero;
1102   PetscTruth     chshift,shift_pd;
1103   PetscInt       nshift=0;
1104 
1105   PetscFunctionBegin;
1106   shift_nonzero  = info->shiftnz;
1107   shift_pd       = info->shiftpd;
1108   zeropivot      = info->zeropivot;
1109 
1110   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
1111 
1112   /* initialization */
1113   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1114   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
1115   jl   = il + mbs;
1116   rtmp = (MatScalar*)(jl + mbs);
1117 
1118   shift_amount = 0;
1119   do {
1120     chshift = PETSC_FALSE;
1121     for (i=0; i<mbs; i++) {
1122       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1123     }
1124 
1125     for (k = 0; k<mbs; k++){
1126       bval = ba + bi[k];
1127       /* initialize k-th row by the perm[k]-th row of A */
1128       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1129       for (j = jmin; j < jmax; j++){
1130         col = rip[aj[j]];
1131         if (col >= k){ /* only take upper triangular entry */
1132           rtmp[col] = aa[j];
1133           *bval++  = 0.0; /* for in-place factorization */
1134         }
1135       }
1136       /* shift the diagonal of the matrix */
1137       if (nshift) rtmp[k] += shift_amount;
1138 
1139       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1140       dk = rtmp[k];
1141       i = jl[k]; /* first row to be added to k_th row  */
1142 
1143       while (i < k){
1144         nexti = jl[i]; /* next row to be added to k_th row */
1145 
1146         /* compute multiplier, update diag(k) and U(i,k) */
1147         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1148         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
1149         dk += uikdi*ba[ili];
1150         ba[ili] = uikdi; /* -U(i,k) */
1151 
1152         /* add multiple of row i to k-th row */
1153         jmin = ili + 1; jmax = bi[i+1];
1154         if (jmin < jmax){
1155           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1156           /* update il and jl for row i */
1157           il[i] = jmin;
1158           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1159         }
1160         i = nexti;
1161       }
1162 
1163       /* shift the diagonals when zero pivot is detected */
1164       /* compute rs=sum of abs(off-diagonal) */
1165       rs   = 0.0;
1166       jmin = bi[k]+1;
1167       nz   = bi[k+1] - jmin;
1168       if (nz){
1169         bcol = bj + jmin;
1170         while (nz--){
1171           rs += PetscAbsScalar(rtmp[*bcol++]);
1172         }
1173       }
1174       if (PetscAbsScalar(dk) <= zeropivot*rs && shift_nonzero){
1175         /* force |diag(*B)| > zeropivot*rs */
1176         if (!nshift){
1177           shift_amount = shift_nonzero;
1178         } else {
1179           shift_amount *= 2.0;
1180         }
1181         chshift = PETSC_TRUE;
1182         nshift++;
1183         break;
1184       } else if (PetscRealPart(dk) <= zeropivot*rs && shift_pd){
1185         /* calculate a shift that would make this row diagonally dominant */
1186 	shift_amount = PetscMax(rs+PetscAbs(PetscRealPart(dk)),1.1*shift_amount);
1187 	chshift      = PETSC_TRUE;
1188 	/* Unlike in the ILU case there is no exit condition on nshift:
1189 	   we increase the shift until it converges. There is no guarantee that
1190 	   this algorithm converges faster or slower, or is better or worse
1191 	   than the ILU algorithm. */
1192 	nshift++;
1193 	break;
1194       } else if (PetscAbsScalar(dk) <= zeropivot*rs){
1195         SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs);
1196       }
1197 
1198       /* copy data into U(k,:) */
1199       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1200       jmin = bi[k]+1; jmax = bi[k+1];
1201       if (jmin < jmax) {
1202         for (j=jmin; j<jmax; j++){
1203           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1204         }
1205         /* add the k-th row into il and jl */
1206         il[k] = jmin;
1207         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1208       }
1209     }
1210   } while (chshift);
1211   ierr = PetscFree(il);CHKERRQ(ierr);
1212 
1213   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
1214   C->factor       = FACTOR_CHOLESKY;
1215   C->assembled    = PETSC_TRUE;
1216   C->preallocated = PETSC_TRUE;
1217   PetscLogFlops(C->m);
1218   if (nshift){
1219     if (shift_nonzero) {
1220       PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shift_nonzero tries %D, shift_amount %g\n",nshift,shift_amount);
1221     } else if (shift_pd) {
1222       PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shift_pd tries %D, shift_amount %g\n",nshift,shift_amount);
1223     }
1224   }
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 #include "petscbt.h"
1229 #include "src/mat/utils/freespace.h"
1230 #undef __FUNCT__
1231 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
1232 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1233 {
1234   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1235   Mat_SeqSBAIJ   *b;
1236   Mat            B;
1237   PetscErrorCode ierr;
1238   PetscTruth     perm_identity;
1239   PetscInt       reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui;
1240   PetscInt       jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1241   PetscInt       nlnk,*lnk,*lnk_lvl,ncols,ncols_upper,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr;
1242   PetscReal      fill=info->fill,levels=info->levels;
1243   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1244   FreeSpaceList  free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1245   PetscBT        lnkbt;
1246 
1247   PetscFunctionBegin;
1248   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1249   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1250 
1251   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1252   ui[0] = 0;
1253 
1254   /* special case that simply copies fill pattern */
1255   if (!levels && perm_identity) {
1256     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
1257     for (i=0; i<am; i++) {
1258       ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1259     }
1260     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1261     cols = uj;
1262     for (i=0; i<am; i++) {
1263       aj    = a->j + a->diag[i];
1264       ncols = ui[i+1] - ui[i];
1265       for (j=0; j<ncols; j++) *cols++ = *aj++;
1266     }
1267   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1268     /* initialization */
1269     ierr  = PetscMalloc((2*am+1)*sizeof(PetscInt),&cols_lvl);CHKERRQ(ierr);
1270 
1271     /* jl: linked list for storing indices of the pivot rows
1272        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1273     ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr);
1274     il         = jl + am;
1275     uj_ptr     = (PetscInt**)(il + am);
1276     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1277     for (i=0; i<am; i++){
1278       jl[i] = am; il[i] = 0;
1279     }
1280 
1281     /* create and initialize a linked list for storing column indices of the active row k */
1282     nlnk = am + 1;
1283     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1284 
1285     /* initial FreeSpace size is fill*(ai[am]+1) */
1286     ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1287     current_space = free_space;
1288     ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
1289     current_space_lvl = free_space_lvl;
1290 
1291     for (k=0; k<am; k++){  /* for each active row k */
1292       /* initialize lnk by the column indices of row rip[k] of A */
1293       nzk   = 0;
1294       ncols = ai[rip[k]+1] - ai[rip[k]];
1295       ncols_upper = 0;
1296       cols        = cols_lvl + am;
1297       for (j=0; j<ncols; j++){
1298         i = rip[*(aj + ai[rip[k]] + j)];
1299         if (i >= k){ /* only take upper triangular entry */
1300           cols[ncols_upper] = i;
1301           cols_lvl[ncols_upper] = -1;  /* initialize level for nonzero entries */
1302           ncols_upper++;
1303         }
1304       }
1305       ierr = PetscIncompleteLLAdd(ncols_upper,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1306       nzk += nlnk;
1307 
1308       /* update lnk by computing fill-in for each pivot row to be merged in */
1309       prow = jl[k]; /* 1st pivot row */
1310 
1311       while (prow < k){
1312         nextprow = jl[prow];
1313 
1314         /* merge prow into k-th row */
1315         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1316         jmax = ui[prow+1];
1317         ncols = jmax-jmin;
1318         i     = jmin - ui[prow];
1319         cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1320         for (j=0; j<ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + i + j);
1321         ierr = PetscIncompleteLLAdd(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1322         nzk += nlnk;
1323 
1324         /* update il and jl for prow */
1325         if (jmin < jmax){
1326           il[prow] = jmin;
1327           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1328         }
1329         prow = nextprow;
1330       }
1331 
1332       /* if free space is not available, make more free space */
1333       if (current_space->local_remaining<nzk) {
1334         i = am - k + 1; /* num of unfactored rows */
1335         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1336         ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
1337         ierr = GetMoreSpace(i,&current_space_lvl);CHKERRQ(ierr);
1338         reallocs++;
1339       }
1340 
1341       /* copy data into free_space and free_space_lvl, then initialize lnk */
1342       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1343 
1344       /* add the k-th row into il and jl */
1345       if (nzk > 1){
1346         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1347         jl[k] = jl[i]; jl[i] = k;
1348         il[k] = ui[k] + 1;
1349       }
1350       uj_ptr[k]     = current_space->array;
1351       uj_lvl_ptr[k] = current_space_lvl->array;
1352 
1353       current_space->array           += nzk;
1354       current_space->local_used      += nzk;
1355       current_space->local_remaining -= nzk;
1356 
1357       current_space_lvl->array           += nzk;
1358       current_space_lvl->local_used      += nzk;
1359       current_space_lvl->local_remaining -= nzk;
1360 
1361       ui[k+1] = ui[k] + nzk;
1362     }
1363 
1364     if (ai[am] != 0) {
1365       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1366       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af);
1367       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af);
1368       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af);
1369     } else {
1370       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n");
1371     }
1372 
1373     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1374     ierr = PetscFree(jl);CHKERRQ(ierr);
1375     ierr = PetscFree(cols_lvl);CHKERRQ(ierr);
1376 
1377     /* destroy list of free space and other temporary array(s) */
1378     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1379     ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1380     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1381     ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr);
1382 
1383   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
1384 
1385   /* put together the new matrix in MATSEQSBAIJ format */
1386   ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr);
1387   B = *fact;
1388   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1389   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
1390 
1391   b    = (Mat_SeqSBAIJ*)B->data;
1392   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1393   b->singlemalloc = PETSC_FALSE;
1394   /* the next line frees the default space generated by the Create() */
1395   ierr = PetscFree(b->a);CHKERRQ(ierr);
1396   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1397   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1398   b->j    = uj;
1399   b->i    = ui;
1400   b->diag = 0;
1401   b->ilen = 0;
1402   b->imax = 0;
1403   b->row  = perm;
1404   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1405   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1406   b->icol = perm;
1407   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1408   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1409   PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
1410   b->maxnz = b->nz = ui[am];
1411 
1412   B->factor                 = FACTOR_CHOLESKY;
1413   B->info.factor_mallocs    = reallocs;
1414   B->info.fill_ratio_given  = fill;
1415   if (ai[am] != 0) {
1416     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1417   } else {
1418     B->info.fill_ratio_needed = 0.0;
1419   }
1420   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1421   if (perm_identity){
1422     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1423     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1424   }
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 #undef __FUNCT__
1429 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
1430 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1431 {
1432   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1433   Mat_SeqSBAIJ   *b;
1434   Mat            B;
1435   PetscErrorCode ierr;
1436   PetscTruth     perm_identity;
1437   PetscReal      fill = info->fill;
1438   PetscInt       *rip,*riip,i,mbs=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow;
1439   PetscInt       *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1440   PetscInt       nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1441   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1442   PetscBT        lnkbt;
1443   IS             iperm;
1444 
1445   PetscFunctionBegin;
1446   /* check whether perm is the identity mapping */
1447   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1448   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1449 
1450   if (!perm_identity){
1451     /* check if perm is symmetric! */
1452     ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1453     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1454     for (i=0; i<mbs; i++) {
1455       if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation");
1456     }
1457     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1458     ierr = ISDestroy(iperm);CHKERRQ(ierr);
1459   }
1460 
1461   /* initialization */
1462   ierr  = PetscMalloc((mbs+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1463   ui[0] = 0;
1464 
1465   /* jl: linked list for storing indices of the pivot rows
1466      il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
1467   ierr = PetscMalloc((3*mbs+1)*sizeof(PetscInt)+mbs*sizeof(PetscInt*),&jl);CHKERRQ(ierr);
1468   il     = jl + mbs;
1469   cols   = il + mbs;
1470   ui_ptr = (PetscInt**)(cols + mbs);
1471   for (i=0; i<mbs; i++){
1472     jl[i] = mbs; il[i] = 0;
1473   }
1474 
1475   /* create and initialize a linked list for storing column indices of the active row k */
1476   nlnk = mbs + 1;
1477   ierr = PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1478 
1479   /* initial FreeSpace size is fill*(ai[mbs]+1) */
1480   ierr = GetMoreSpace((PetscInt)(fill*(ai[mbs]+1)),&free_space);CHKERRQ(ierr);
1481   current_space = free_space;
1482 
1483   for (k=0; k<mbs; k++){  /* for each active row k */
1484     /* initialize lnk by the column indices of row rip[k] of A */
1485     nzk   = 0;
1486     ncols = ai[rip[k]+1] - ai[rip[k]];
1487     ncols_upper = 0;
1488     for (j=0; j<ncols; j++){
1489       i = rip[*(aj + ai[rip[k]] + j)];
1490       if (i >= k){ /* only take upper triangular entry */
1491         cols[ncols_upper] = i;
1492         ncols_upper++;
1493       }
1494     }
1495     ierr = PetscLLAdd(ncols_upper,cols,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1496     nzk += nlnk;
1497 
1498     /* update lnk by computing fill-in for each pivot row to be merged in */
1499     prow = jl[k]; /* 1st pivot row */
1500 
1501     while (prow < k){
1502       nextprow = jl[prow];
1503       /* merge prow into k-th row */
1504       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
1505       jmax = ui[prow+1];
1506       ncols = jmax-jmin;
1507       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
1508       ierr = PetscLLAdd(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1509       nzk += nlnk;
1510 
1511       /* update il and jl for prow */
1512       if (jmin < jmax){
1513         il[prow] = jmin;
1514         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1515       }
1516       prow = nextprow;
1517     }
1518 
1519     /* if free space is not available, make more free space */
1520     if (current_space->local_remaining<nzk) {
1521       i = mbs - k + 1; /* num of unfactored rows */
1522       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1523       ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
1524       reallocs++;
1525     }
1526 
1527     /* copy data into free space, then initialize lnk */
1528     ierr = PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1529 
1530     /* add the k-th row into il and jl */
1531     if (nzk-1 > 0){
1532       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
1533       jl[k] = jl[i]; jl[i] = k;
1534       il[k] = ui[k] + 1;
1535     }
1536     ui_ptr[k] = current_space->array;
1537     current_space->array           += nzk;
1538     current_space->local_used      += nzk;
1539     current_space->local_remaining -= nzk;
1540 
1541     ui[k+1] = ui[k] + nzk;
1542   }
1543 
1544   if (ai[mbs] != 0) {
1545     PetscReal af = ((PetscReal)(2*ui[mbs]-mbs))/((PetscReal)ai[mbs]);
1546     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af);
1547     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af);
1548     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af);
1549   } else {
1550      PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n");
1551   }
1552 
1553   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1554   ierr = PetscFree(jl);CHKERRQ(ierr);
1555 
1556   /* destroy list of free space and other temporary array(s) */
1557   ierr = PetscMalloc((ui[mbs]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1558   ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1559   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1560 
1561   /* put together the new matrix in MATSEQSBAIJ format */
1562   ierr = MatCreate(PETSC_COMM_SELF,mbs,mbs,mbs,mbs,fact);CHKERRQ(ierr);
1563   B    = *fact;
1564   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1565   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
1566 
1567   b = (Mat_SeqSBAIJ*)B->data;
1568   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1569   b->singlemalloc = PETSC_FALSE;
1570   /* the next line frees the default space generated by the Create() */
1571   ierr = PetscFree(b->a);CHKERRQ(ierr);
1572   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1573   ierr = PetscMalloc((ui[mbs]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1574   b->j    = uj;
1575   b->i    = ui;
1576   b->diag = 0;
1577   b->ilen = 0;
1578   b->imax = 0;
1579   b->row  = perm;
1580   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1581   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1582   b->icol = perm;
1583   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1584   ierr    = PetscMalloc((mbs+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1585   PetscLogObjectMemory(B,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));
1586   b->maxnz = b->nz = ui[mbs];
1587 
1588   B->factor                 = FACTOR_CHOLESKY;
1589   B->info.factor_mallocs    = reallocs;
1590   B->info.fill_ratio_given  = fill;
1591   if (ai[mbs] != 0) {
1592     B->info.fill_ratio_needed = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]);
1593   } else {
1594     B->info.fill_ratio_needed = 0.0;
1595   }
1596   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1597   if (perm_identity){
1598     (*fact)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1599     (*fact)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1600   }
1601   PetscFunctionReturn(0);
1602 }
1603