xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision 3c9e8598273a0ead7f0d51888945a303d2e241a0)
1 
2 #include "src/mat/impls/aij/seq/aij.h"
3 #include "src/inline/dot.h"
4 #include "src/inline/spops.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ"
8 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
9 {
10   PetscFunctionBegin;
11 
12   SETERRQ(PETSC_ERR_SUP,"Code not written");
13 #if !defined(PETSC_USE_DEBUG)
14   PetscFunctionReturn(0);
15 #endif
16 }
17 
18 
19 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat);
20 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth);
21 
22 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
23 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
24 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*);
25 
26 #undef __FUNCT__
27 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
28   /* ------------------------------------------------------------
29 
30           This interface was contribed by Tony Caola
31 
32      This routine is an interface to the pivoting drop-tolerance
33      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
34      SPARSEKIT2.
35 
36      The SPARSEKIT2 routines used here are covered by the GNU
37      copyright; see the file gnu in this directory.
38 
39      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
40      help in getting this routine ironed out.
41 
42      The major drawback to this routine is that if info->fill is
43      not large enough it fails rather than allocating more space;
44      this can be fixed by hacking/improving the f2c version of
45      Yousef Saad's code.
46 
47      ------------------------------------------------------------
48 */
49 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact)
50 {
51 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
52   PetscFunctionBegin;
53   SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
54   You can obtain the drop tolerance routines by installing PETSc from\n\
55   www.mcs.anl.gov/petsc\n");
56 #else
57   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
58   IS             iscolf,isicol,isirow;
59   PetscTruth     reorder;
60   PetscErrorCode ierr,sierr;
61   PetscInt       *c,*r,*ic,i,n = A->m;
62   PetscInt       *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
63   PetscInt       *ordcol,*iwk,*iperm,*jw;
64   PetscInt       jmax,lfill,job,*o_i,*o_j;
65   PetscScalar    *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
66   PetscReal      af;
67 
68   PetscFunctionBegin;
69 
70   if (info->dt == PETSC_DEFAULT)      info->dt      = .005;
71   if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax);
72   if (info->dtcol == PETSC_DEFAULT)   info->dtcol   = .01;
73   if (info->fill == PETSC_DEFAULT)    info->fill    = ((double)(n*(info->dtcount+1)))/a->nz;
74   lfill   = (PetscInt)(info->dtcount/2.0);
75   jmax    = (PetscInt)(info->fill*a->nz);
76 
77 
78   /* ------------------------------------------------------------
79      If reorder=.TRUE., then the original matrix has to be
80      reordered to reflect the user selected ordering scheme, and
81      then de-reordered so it is in it's original format.
82      Because Saad's dperm() is NOT in place, we have to copy
83      the original matrix and allocate more storage. . .
84      ------------------------------------------------------------
85   */
86 
87   /* set reorder to true if either isrow or iscol is not identity */
88   ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr);
89   if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);}
90   reorder = PetscNot(reorder);
91 
92 
93   /* storage for ilu factor */
94   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr);
95   ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr);
96   ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr);
97   ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr);
98 
99   /* ------------------------------------------------------------
100      Make sure that everything is Fortran formatted (1-Based)
101      ------------------------------------------------------------
102   */
103   for (i=old_i[0];i<old_i[n];i++) {
104     old_j[i]++;
105   }
106   for(i=0;i<n+1;i++) {
107     old_i[i]++;
108   };
109 
110 
111   if (reorder) {
112     ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr);
113     ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
114     for(i=0;i<n;i++) {
115       r[i]  = r[i]+1;
116       c[i]  = c[i]+1;
117     }
118     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr);
119     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr);
120     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr);
121     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
122     for (i=0;i<n;i++) {
123       r[i]  = r[i]-1;
124       c[i]  = c[i]-1;
125     }
126     ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr);
127     ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
128     o_a = old_a2;
129     o_j = old_j2;
130     o_i = old_i2;
131   } else {
132     o_a = old_a;
133     o_j = old_j;
134     o_i = old_i;
135   }
136 
137   /* ------------------------------------------------------------
138      Call Saad's ilutp() routine to generate the factorization
139      ------------------------------------------------------------
140   */
141 
142   ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr);
143   ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr);
144   ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr);
145 
146   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
147   if (sierr) {
148     switch (sierr) {
149       case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
150       case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
151       case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
152       case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
153       case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax);
154       default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
155     }
156   }
157 
158   ierr = PetscFree(w);CHKERRQ(ierr);
159   ierr = PetscFree(jw);CHKERRQ(ierr);
160 
161   /* ------------------------------------------------------------
162      Saad's routine gives the result in Modified Sparse Row (msr)
163      Convert to Compressed Sparse Row format (csr)
164      ------------------------------------------------------------
165   */
166 
167   ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr);
168   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr);
169 
170   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
171 
172   ierr = PetscFree(iwk);CHKERRQ(ierr);
173   ierr = PetscFree(wk);CHKERRQ(ierr);
174 
175   if (reorder) {
176     ierr = PetscFree(old_a2);CHKERRQ(ierr);
177     ierr = PetscFree(old_j2);CHKERRQ(ierr);
178     ierr = PetscFree(old_i2);CHKERRQ(ierr);
179   } else {
180     /* fix permutation of old_j that the factorization introduced */
181     for (i=old_i[0]; i<old_i[n]; i++) {
182       old_j[i-1] = iperm[old_j[i-1]-1];
183     }
184   }
185 
186   /* get rid of the shift to indices starting at 1 */
187   for (i=0; i<n+1; i++) {
188     old_i[i]--;
189   }
190   for (i=old_i[0];i<old_i[n];i++) {
191     old_j[i]--;
192   }
193 
194   /* Make the factored matrix 0-based */
195   for (i=0; i<n+1; i++) {
196     new_i[i]--;
197   }
198   for (i=new_i[0];i<new_i[n];i++) {
199     new_j[i]--;
200   }
201 
202   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
203   /*-- permute the right-hand-side and solution vectors           --*/
204   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
205   ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr);
206   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
207   for(i=0; i<n; i++) {
208     ordcol[i] = ic[iperm[i]-1];
209   };
210   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
211   ierr = ISDestroy(isicol);CHKERRQ(ierr);
212 
213   ierr = PetscFree(iperm);CHKERRQ(ierr);
214 
215   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr);
216   ierr = PetscFree(ordcol);CHKERRQ(ierr);
217 
218   /*----- put together the new matrix -----*/
219 
220   ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr);
221   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
222   ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr);
223   (*fact)->factor    = FACTOR_LU;
224   (*fact)->assembled = PETSC_TRUE;
225 
226   b = (Mat_SeqAIJ*)(*fact)->data;
227   ierr = PetscFree(b->imax);CHKERRQ(ierr);
228   b->sorted        = PETSC_FALSE;
229   b->singlemalloc  = PETSC_FALSE;
230   /* the next line frees the default space generated by the MatCreate() */
231   ierr             = PetscFree(b->a);CHKERRQ(ierr);
232   ierr             = PetscFree(b->ilen);CHKERRQ(ierr);
233   b->a             = new_a;
234   b->j             = new_j;
235   b->i             = new_i;
236   b->ilen          = 0;
237   b->imax          = 0;
238   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
239   b->row           = isirow;
240   b->col           = iscolf;
241   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
242   b->maxnz = b->nz = new_i[n];
243   ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
244   (*fact)->info.factor_mallocs = 0;
245 
246   ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
247 
248   /* check out for identical nodes. If found, use inode functions */
249   ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr);
250 
251   af = ((double)b->nz)/((double)a->nz) + .001;
252   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af);
253   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af);
254   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af);
255   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n");
256 
257   PetscFunctionReturn(0);
258 #endif
259 }
260 
261 /*
262     Factorization code for AIJ format.
263 */
264 #undef __FUNCT__
265 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
266 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
267 {
268   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
269   IS             isicol;
270   PetscErrorCode ierr;
271   PetscInt       *r,*ic,i,n = A->m,*ai = a->i,*aj = a->j;
272   PetscInt       *ainew,*ajnew,jmax,*fill,*ajtmp,nz;
273   PetscInt       *idnew,idx,row,m,fm,nnz,nzi,reallocs = 0,nzbd,*im;
274   PetscReal      f;
275 
276   PetscFunctionBegin;
277   if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
278   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
279   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
280   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
281 
282   /* get new row pointers */
283   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr);
284   ainew[0] = 0;
285   /* don't know how many column pointers are needed so estimate */
286   f = info->fill;
287   jmax  = (PetscInt)(f*ai[n]+1);
288   ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr);
289   /* fill is a linked list of nonzeros in active row */
290   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr);
291   im   = fill + n + 1;
292   /* idnew is location of diagonal in factor */
293   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&idnew);CHKERRQ(ierr);
294   idnew[0] = 0;
295 
296   for (i=0; i<n; i++) {
297     /* first copy previous fill into linked list */
298     nnz     = nz    = ai[r[i]+1] - ai[r[i]];
299     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
300     ajtmp   = aj + ai[r[i]];
301     fill[n] = n;
302     while (nz--) {
303       fm  = n;
304       idx = ic[*ajtmp++];
305       do {
306         m  = fm;
307         fm = fill[m];
308       } while (fm < idx);
309       fill[m]   = idx;
310       fill[idx] = fm;
311     }
312     row = fill[n];
313     while (row < i) {
314       ajtmp = ajnew + idnew[row] + 1;
315       nzbd  = 1 + idnew[row] - ainew[row];
316       nz    = im[row] - nzbd;
317       fm    = row;
318       while (nz-- > 0) {
319         idx = *ajtmp++ ;
320         nzbd++;
321         if (idx == i) im[row] = nzbd;
322         do {
323           m  = fm;
324           fm = fill[m];
325         } while (fm < idx);
326         if (fm != idx) {
327           fill[m]   = idx;
328           fill[idx] = fm;
329           fm        = idx;
330           nnz++;
331         }
332       }
333       row = fill[row];
334     }
335     /* copy new filled row into permanent storage */
336     ainew[i+1] = ainew[i] + nnz;
337     if (ainew[i+1] > jmax) {
338 
339       /* estimate how much additional space we will need */
340       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
341       /* just double the memory each time */
342       PetscInt maxadd = jmax;
343       /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */
344       if (maxadd < nnz) maxadd = (n-i)*(nnz+1);
345       jmax += maxadd;
346 
347       /* allocate a longer ajnew */
348       ierr  = PetscMalloc(jmax*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
349       ierr  = PetscMemcpy(ajtmp,ajnew,(ainew[i])*sizeof(PetscInt));CHKERRQ(ierr);
350       ierr  = PetscFree(ajnew);CHKERRQ(ierr);
351       ajnew = ajtmp;
352       reallocs++; /* count how many times we realloc */
353     }
354     ajtmp = ajnew + ainew[i];
355     fm    = fill[n];
356     nzi   = 0;
357     im[i] = nnz;
358     while (nnz--) {
359       if (fm < i) nzi++;
360       *ajtmp++ = fm ;
361       fm       = fill[fm];
362     }
363     idnew[i] = ainew[i] + nzi;
364   }
365   if (ai[n] != 0) {
366     PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
367     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
368     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af);
369     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af);
370     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n");
371   } else {
372     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n");
373   }
374 
375   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
376   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
377 
378   ierr = PetscFree(fill);CHKERRQ(ierr);
379 
380   /* put together the new matrix */
381   ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr);
382   ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr);
383   ierr = MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);CHKERRQ(ierr);
384   PetscLogObjectParent(*B,isicol);
385   b    = (Mat_SeqAIJ*)(*B)->data;
386   ierr = PetscFree(b->imax);CHKERRQ(ierr);
387   b->singlemalloc = PETSC_FALSE;
388   /* the next line frees the default space generated by the Create() */
389   ierr = PetscFree(b->a);CHKERRQ(ierr);
390   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
391   ierr          = PetscMalloc((ainew[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
392   b->j          = ajnew;
393   b->i          = ainew;
394   b->diag       = idnew;
395   b->ilen       = 0;
396   b->imax       = 0;
397   b->row        = isrow;
398   b->col        = iscol;
399   b->lu_damping        = info->damping;
400   b->lu_zeropivot      = info->zeropivot;
401   b->lu_shift          = info->shift;
402   b->lu_shift_fraction = info->shift_fraction;
403   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
404   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
405   b->icol       = isicol;
406   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
407   /* In b structure:  Free imax, ilen, old a, old j.
408      Allocate idnew, solve_work, new a, new j */
409   PetscLogObjectMemory(*B,(ainew[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));
410   b->maxnz = b->nz = ainew[n] ;
411 
412   (*B)->factor                 =  FACTOR_LU;
413   (*B)->info.factor_mallocs    = reallocs;
414   (*B)->info.fill_ratio_given  = f;
415   ierr = Mat_AIJ_CheckInode(*B,PETSC_FALSE);CHKERRQ(ierr);
416   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
417 
418   if (ai[n] != 0) {
419     (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
420   } else {
421     (*B)->info.fill_ratio_needed = 0.0;
422   }
423   PetscFunctionReturn(0);
424 }
425 /* ----------------------------------------------------------- */
426 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth);
427 
428 #undef __FUNCT__
429 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
430 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B)
431 {
432   Mat            C = *B;
433   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ *)C->data;
434   IS             isrow = b->row,isicol = b->icol;
435   PetscErrorCode ierr;
436   PetscInt       *r,*ic,i,j,n = A->m,*ai = b->i,*aj = b->j;
437   PetscInt       *ajtmpold,*ajtmp,nz,row,*ics;
438   PetscInt       *diag_offset = b->diag,diag,*pj,ndamp = 0, nshift=0;
439   PetscScalar    *rtmp,*v,*pc,multiplier,*pv,*rtmps;
440   PetscReal      damping = b->lu_damping, zeropivot = b->lu_zeropivot,rs,d;
441   PetscReal      row_shift,shift_fraction,shift_amount,shift_lo=0., shift_hi=1., shift_top=0.;
442   PetscTruth     damp,lushift;
443 
444   PetscFunctionBegin;
445   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
446   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
447   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
448   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
449   rtmps = rtmp; ics = ic;
450 
451   if (!a->diag) {
452     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
453   }
454 
455   if (b->lu_shift) { /* set max shift */
456     PetscInt *aai = a->i,*ddiag = a->diag;
457     shift_top = 0;
458     for (i=0; i<n; i++) {
459       d = PetscAbsScalar((a->a)[ddiag[i]]);
460       /* calculate amt of shift needed for this row */
461       if (d<=0) {
462         row_shift = 0;
463       } else {
464         row_shift = -2*d;
465       }
466       v = a->a+aai[i];
467       for (j=0; j<aai[i+1]-aai[i]; j++)
468 	row_shift += PetscAbsScalar(v[j]);
469       if (row_shift>shift_top) shift_top = row_shift;
470     }
471   }
472 
473   shift_fraction = 0; shift_amount = 0;
474   do {
475     damp    = PETSC_FALSE;
476     lushift = PETSC_FALSE;
477     for (i=0; i<n; i++) {
478       nz    = ai[i+1] - ai[i];
479       ajtmp = aj + ai[i];
480       for  (j=0; j<nz; j++) rtmps[ajtmp[j]] = 0.0;
481 
482       /* load in initial (unfactored row) */
483       nz       = a->i[r[i]+1] - a->i[r[i]];
484       ajtmpold = a->j + a->i[r[i]];
485       v        = a->a + a->i[r[i]];
486       for (j=0; j<nz; j++) {
487         rtmp[ics[ajtmpold[j]]] = v[j];
488       }
489       rtmp[ics[r[i]]] += damping + shift_amount; /* damp the diagonal of the matrix */
490 
491       row = *ajtmp++ ;
492       while  (row < i) {
493         pc = rtmp + row;
494         if (*pc != 0.0) {
495           pv         = b->a + diag_offset[row];
496           pj         = b->j + diag_offset[row] + 1;
497           multiplier = *pc / *pv++;
498           *pc        = multiplier;
499           nz         = ai[row+1] - diag_offset[row] - 1;
500           for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
501           PetscLogFlops(2*nz);
502         }
503         row = *ajtmp++;
504       }
505       /* finished row so stick it into b->a */
506       pv   = b->a + ai[i] ;
507       pj   = b->j + ai[i] ;
508       nz   = ai[i+1] - ai[i];
509       diag = diag_offset[i] - ai[i];
510       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
511       rs   = 0.0;
512       for (j=0; j<nz; j++) {
513         pv[j] = rtmps[pj[j]];
514         if (j != diag) rs += PetscAbsScalar(pv[j]);
515       }
516 #define MAX_NSHIFT 5
517       if (PetscRealPart(pv[diag]) <= zeropivot*rs && b->lu_shift) {
518 	if (nshift>MAX_NSHIFT) {
519 	  SETERRQ(PETSC_ERR_CONV_FAILED,"Unable to determine shift to enforce positive definite preconditioner");
520 	} else if (nshift==MAX_NSHIFT) {
521 	  shift_fraction = shift_hi;
522 	  lushift      = PETSC_FALSE;
523 	} else {
524 	  shift_lo = shift_fraction; shift_fraction = (shift_hi+shift_lo)/2.;
525 	  lushift      = PETSC_TRUE;
526 	}
527 	shift_amount = shift_fraction * shift_top;
528         nshift++;
529         break;
530       }
531       if (PetscAbsScalar(pv[diag]) <= zeropivot*rs) {
532         if (damping) {
533           if (ndamp) damping *= 2.0;
534           damp = PETSC_TRUE;
535           ndamp++;
536           break;
537         } else {
538           SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(pv[diag]),zeropivot,rs);
539         }
540       }
541     }
542     if (!lushift && b->lu_shift && shift_fraction>0 && nshift<MAX_NSHIFT) {
543       /*
544        * if no shift in this attempt & shifting & started shifting & can refine,
545        * then try lower shift
546        */
547       shift_hi       = shift_fraction;
548       shift_fraction = (shift_hi+shift_lo)/2.;
549       shift_amount   = shift_fraction * shift_top;
550       lushift        = PETSC_TRUE;
551       nshift++;
552     }
553   } while (damp || lushift);
554 
555   /* invert diagonal entries for simplier triangular solves */
556   for (i=0; i<n; i++) {
557     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
558   }
559 
560   ierr = PetscFree(rtmp);CHKERRQ(ierr);
561   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
562   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
563   C->factor = FACTOR_LU;
564   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
565   C->assembled = PETSC_TRUE;
566   PetscLogFlops(C->n);
567   if (ndamp) {
568     PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of damping tries %D damping value %g\n",ndamp,damping);
569   }
570   if (nshift) {
571     b->lu_shift_fraction = shift_fraction;
572     PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: diagonal shifted up by %e fraction top_value %e number shifts %D\n",shift_fraction,shift_top,nshift);
573   }
574   PetscFunctionReturn(0);
575 }
576 
577 #undef __FUNCT__
578 #define __FUNCT__ "MatUsePETSc_SeqAIJ"
579 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A)
580 {
581   PetscFunctionBegin;
582   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
583   A->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
584   PetscFunctionReturn(0);
585 }
586 
587 
588 /* ----------------------------------------------------------- */
589 #undef __FUNCT__
590 #define __FUNCT__ "MatLUFactor_SeqAIJ"
591 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
592 {
593   PetscErrorCode ierr;
594   Mat            C;
595 
596   PetscFunctionBegin;
597   ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr);
598   ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr);
599   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
600   PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);
601   PetscFunctionReturn(0);
602 }
603 /* ----------------------------------------------------------- */
604 #undef __FUNCT__
605 #define __FUNCT__ "MatSolve_SeqAIJ"
606 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
607 {
608   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
609   IS             iscol = a->col,isrow = a->row;
610   PetscErrorCode ierr;
611   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
612   PetscInt       nz,*rout,*cout;
613   PetscScalar    *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
614 
615   PetscFunctionBegin;
616   if (!n) PetscFunctionReturn(0);
617 
618   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
619   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
620   tmp  = a->solve_work;
621 
622   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
623   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
624 
625   /* forward solve the lower triangular */
626   tmp[0] = b[*r++];
627   tmps   = tmp;
628   for (i=1; i<n; i++) {
629     v   = aa + ai[i] ;
630     vi  = aj + ai[i] ;
631     nz  = a->diag[i] - ai[i];
632     sum = b[*r++];
633     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
634     tmp[i] = sum;
635   }
636 
637   /* backward solve the upper triangular */
638   for (i=n-1; i>=0; i--){
639     v   = aa + a->diag[i] + 1;
640     vi  = aj + a->diag[i] + 1;
641     nz  = ai[i+1] - a->diag[i] - 1;
642     sum = tmp[i];
643     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
644     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
645   }
646 
647   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
648   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
649   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
650   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
651   PetscLogFlops(2*a->nz - A->n);
652   PetscFunctionReturn(0);
653 }
654 
655 /* ----------------------------------------------------------- */
656 #undef __FUNCT__
657 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
658 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
659 {
660   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
661   PetscErrorCode ierr;
662   PetscInt       n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag;
663   PetscScalar    *x,*b,*aa = a->a;
664 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
665   PetscInt       adiag_i,i,*vi,nz,ai_i;
666   PetscScalar    *v,sum;
667 #endif
668 
669   PetscFunctionBegin;
670   if (!n) PetscFunctionReturn(0);
671 
672   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
673   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
674 
675 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
676   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
677 #else
678   /* forward solve the lower triangular */
679   x[0] = b[0];
680   for (i=1; i<n; i++) {
681     ai_i = ai[i];
682     v    = aa + ai_i;
683     vi   = aj + ai_i;
684     nz   = adiag[i] - ai_i;
685     sum  = b[i];
686     while (nz--) sum -= *v++ * x[*vi++];
687     x[i] = sum;
688   }
689 
690   /* backward solve the upper triangular */
691   for (i=n-1; i>=0; i--){
692     adiag_i = adiag[i];
693     v       = aa + adiag_i + 1;
694     vi      = aj + adiag_i + 1;
695     nz      = ai[i+1] - adiag_i - 1;
696     sum     = x[i];
697     while (nz--) sum -= *v++ * x[*vi++];
698     x[i]    = sum*aa[adiag_i];
699   }
700 #endif
701   PetscLogFlops(2*a->nz - A->n);
702   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
703   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
704   PetscFunctionReturn(0);
705 }
706 
707 #undef __FUNCT__
708 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
709 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
710 {
711   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
712   IS             iscol = a->col,isrow = a->row;
713   PetscErrorCode ierr;
714   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
715   PetscInt       nz,*rout,*cout;
716   PetscScalar    *x,*b,*tmp,*aa = a->a,sum,*v;
717 
718   PetscFunctionBegin;
719   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
720 
721   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
722   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
723   tmp  = a->solve_work;
724 
725   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
726   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
727 
728   /* forward solve the lower triangular */
729   tmp[0] = b[*r++];
730   for (i=1; i<n; i++) {
731     v   = aa + ai[i] ;
732     vi  = aj + ai[i] ;
733     nz  = a->diag[i] - ai[i];
734     sum = b[*r++];
735     while (nz--) sum -= *v++ * tmp[*vi++ ];
736     tmp[i] = sum;
737   }
738 
739   /* backward solve the upper triangular */
740   for (i=n-1; i>=0; i--){
741     v   = aa + a->diag[i] + 1;
742     vi  = aj + a->diag[i] + 1;
743     nz  = ai[i+1] - a->diag[i] - 1;
744     sum = tmp[i];
745     while (nz--) sum -= *v++ * tmp[*vi++ ];
746     tmp[i] = sum*aa[a->diag[i]];
747     x[*c--] += tmp[i];
748   }
749 
750   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
751   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
752   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
753   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
754   PetscLogFlops(2*a->nz);
755 
756   PetscFunctionReturn(0);
757 }
758 /* -------------------------------------------------------------------*/
759 #undef __FUNCT__
760 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
761 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
762 {
763   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
764   IS             iscol = a->col,isrow = a->row;
765   PetscErrorCode ierr;
766   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
767   PetscInt       nz,*rout,*cout,*diag = a->diag;
768   PetscScalar    *x,*b,*tmp,*aa = a->a,*v,s1;
769 
770   PetscFunctionBegin;
771   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
772   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
773   tmp  = a->solve_work;
774 
775   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
776   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
777 
778   /* copy the b into temp work space according to permutation */
779   for (i=0; i<n; i++) tmp[i] = b[c[i]];
780 
781   /* forward solve the U^T */
782   for (i=0; i<n; i++) {
783     v   = aa + diag[i] ;
784     vi  = aj + diag[i] + 1;
785     nz  = ai[i+1] - diag[i] - 1;
786     s1  = tmp[i];
787     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
788     while (nz--) {
789       tmp[*vi++ ] -= (*v++)*s1;
790     }
791     tmp[i] = s1;
792   }
793 
794   /* backward solve the L^T */
795   for (i=n-1; i>=0; i--){
796     v   = aa + diag[i] - 1 ;
797     vi  = aj + diag[i] - 1 ;
798     nz  = diag[i] - ai[i];
799     s1  = tmp[i];
800     while (nz--) {
801       tmp[*vi-- ] -= (*v--)*s1;
802     }
803   }
804 
805   /* copy tmp into x according to permutation */
806   for (i=0; i<n; i++) x[r[i]] = tmp[i];
807 
808   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
809   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
810   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
811   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
812 
813   PetscLogFlops(2*a->nz-A->n);
814   PetscFunctionReturn(0);
815 }
816 
817 #undef __FUNCT__
818 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
819 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
820 {
821   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
822   IS             iscol = a->col,isrow = a->row;
823   PetscErrorCode ierr;
824   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
825   PetscInt       nz,*rout,*cout,*diag = a->diag;
826   PetscScalar    *x,*b,*tmp,*aa = a->a,*v;
827 
828   PetscFunctionBegin;
829   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
830 
831   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
832   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
833   tmp = a->solve_work;
834 
835   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
836   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
837 
838   /* copy the b into temp work space according to permutation */
839   for (i=0; i<n; i++) tmp[i] = b[c[i]];
840 
841   /* forward solve the U^T */
842   for (i=0; i<n; i++) {
843     v   = aa + diag[i] ;
844     vi  = aj + diag[i] + 1;
845     nz  = ai[i+1] - diag[i] - 1;
846     tmp[i] *= *v++;
847     while (nz--) {
848       tmp[*vi++ ] -= (*v++)*tmp[i];
849     }
850   }
851 
852   /* backward solve the L^T */
853   for (i=n-1; i>=0; i--){
854     v   = aa + diag[i] - 1 ;
855     vi  = aj + diag[i] - 1 ;
856     nz  = diag[i] - ai[i];
857     while (nz--) {
858       tmp[*vi-- ] -= (*v--)*tmp[i];
859     }
860   }
861 
862   /* copy tmp into x according to permutation */
863   for (i=0; i<n; i++) x[r[i]] += tmp[i];
864 
865   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
866   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
867   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
868   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
869 
870   PetscLogFlops(2*a->nz);
871   PetscFunctionReturn(0);
872 }
873 /* ----------------------------------------------------------------*/
874 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat);
875 
876 #undef __FUNCT__
877 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
878 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
879 {
880   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
881   IS             isicol;
882   PetscErrorCode ierr;
883   PetscInt       *r,*ic,prow,n = A->m,*ai = a->i,*aj = a->j;
884   PetscInt       *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev;
885   PetscInt       *dloc,idx,row,m,fm,nzf,nzi,len, reallocs = 0,dcount = 0;
886   PetscInt       incrlev,nnz,i,levels,diagonal_fill;
887   PetscTruth     col_identity,row_identity;
888   PetscReal      f;
889 
890   PetscFunctionBegin;
891   f             = info->fill;
892   levels        = (PetscInt)info->levels;
893   diagonal_fill = (PetscInt)info->diagonal_fill;
894   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
895 
896   /* special case that simply copies fill pattern */
897   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
898   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
899   if (!levels && row_identity && col_identity) {
900     ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
901     (*fact)->factor = FACTOR_LU;
902     b               = (Mat_SeqAIJ*)(*fact)->data;
903     if (!b->diag) {
904       ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
905     }
906     ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
907     b->row              = isrow;
908     b->col              = iscol;
909     b->icol             = isicol;
910     b->lu_damping       = info->damping;
911     b->lu_zeropivot     = info->zeropivot;
912     b->lu_shift         = info->shift;
913     b->lu_shift_fraction= info->shift_fraction;
914     ierr                = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
915     (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
916     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
917     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
918     PetscFunctionReturn(0);
919   }
920 
921   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
922   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
923 
924   /* get new row pointers */
925   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr);
926   ainew[0] = 0;
927   /* don't know how many column pointers are needed so estimate */
928   jmax = (PetscInt)(f*(ai[n]+1));
929   ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr);
930   /* ajfill is level of fill for each fill entry */
931   ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajfill);CHKERRQ(ierr);
932   /* fill is a linked list of nonzeros in active row */
933   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr);
934   /* im is level for each filled value */
935   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&im);CHKERRQ(ierr);
936   /* dloc is location of diagonal in factor */
937   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&dloc);CHKERRQ(ierr);
938   dloc[0]  = 0;
939   for (prow=0; prow<n; prow++) {
940 
941     /* copy current row into linked list */
942     nzf     = nz  = ai[r[prow]+1] - ai[r[prow]];
943     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
944     xi      = aj + ai[r[prow]] ;
945     fill[n]    = n;
946     fill[prow] = -1; /* marker to indicate if diagonal exists */
947     while (nz--) {
948       fm  = n;
949       idx = ic[*xi++ ];
950       do {
951         m  = fm;
952         fm = fill[m];
953       } while (fm < idx);
954       fill[m]   = idx;
955       fill[idx] = fm;
956       im[idx]   = 0;
957     }
958 
959     /* make sure diagonal entry is included */
960     if (diagonal_fill && fill[prow] == -1) {
961       fm = n;
962       while (fill[fm] < prow) fm = fill[fm];
963       fill[prow] = fill[fm]; /* insert diagonal into linked list */
964       fill[fm]   = prow;
965       im[prow]   = 0;
966       nzf++;
967       dcount++;
968     }
969 
970     nzi = 0;
971     row = fill[n];
972     while (row < prow) {
973       incrlev = im[row] + 1;
974       nz      = dloc[row];
975       xi      = ajnew  + ainew[row]  + nz + 1;
976       flev    = ajfill + ainew[row]  + nz + 1;
977       nnz     = ainew[row+1] - ainew[row] - nz - 1;
978       fm      = row;
979       while (nnz-- > 0) {
980         idx = *xi++ ;
981         if (*flev + incrlev > levels) {
982           flev++;
983           continue;
984         }
985         do {
986           m  = fm;
987           fm = fill[m];
988         } while (fm < idx);
989         if (fm != idx) {
990           im[idx]   = *flev + incrlev;
991           fill[m]   = idx;
992           fill[idx] = fm;
993           fm        = idx;
994           nzf++;
995         } else {
996           if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev;
997         }
998         flev++;
999       }
1000       row = fill[row];
1001       nzi++;
1002     }
1003     /* copy new filled row into permanent storage */
1004     ainew[prow+1] = ainew[prow] + nzf;
1005     if (ainew[prow+1] > jmax) {
1006 
1007       /* estimate how much additional space we will need */
1008       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
1009       /* just double the memory each time */
1010       /*  maxadd = (PetscInt)((f*(ai[n]+!shift)*(n-prow+5))/n); */
1011       PetscInt maxadd = jmax;
1012       if (maxadd < nzf) maxadd = (n-prow)*(nzf+1);
1013       jmax += maxadd;
1014 
1015       /* allocate a longer ajnew and ajfill */
1016       ierr   = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr);
1017       ierr   = PetscMemcpy(xi,ajnew,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr);
1018       ierr   = PetscFree(ajnew);CHKERRQ(ierr);
1019       ajnew  = xi;
1020       ierr   = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr);
1021       ierr   = PetscMemcpy(xi,ajfill,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr);
1022       ierr   = PetscFree(ajfill);CHKERRQ(ierr);
1023       ajfill = xi;
1024       reallocs++; /* count how many times we realloc */
1025     }
1026     xi          = ajnew + ainew[prow] ;
1027     flev        = ajfill + ainew[prow] ;
1028     dloc[prow]  = nzi;
1029     fm          = fill[n];
1030     while (nzf--) {
1031       *xi++   = fm ;
1032       *flev++ = im[fm];
1033       fm      = fill[fm];
1034     }
1035     /* make sure row has diagonal entry */
1036     if (ajnew[ainew[prow]+dloc[prow]] != prow) {
1037       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
1038     try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow);
1039     }
1040   }
1041   ierr = PetscFree(ajfill);CHKERRQ(ierr);
1042   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1043   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1044   ierr = PetscFree(fill);CHKERRQ(ierr);
1045   ierr = PetscFree(im);CHKERRQ(ierr);
1046 
1047   {
1048     PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
1049     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
1050     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af);
1051     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af);
1052     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n");
1053     if (diagonal_fill) {
1054       PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount);
1055     }
1056   }
1057 
1058   /* put together the new matrix */
1059   ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr);
1060   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
1061   ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr);
1062   PetscLogObjectParent(*fact,isicol);
1063   b = (Mat_SeqAIJ*)(*fact)->data;
1064   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1065   b->singlemalloc = PETSC_FALSE;
1066   len = (ainew[n] )*sizeof(PetscScalar);
1067   /* the next line frees the default space generated by the Create() */
1068   ierr = PetscFree(b->a);CHKERRQ(ierr);
1069   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1070   ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr);
1071   b->j          = ajnew;
1072   b->i          = ainew;
1073   for (i=0; i<n; i++) dloc[i] += ainew[i];
1074   b->diag       = dloc;
1075   b->ilen       = 0;
1076   b->imax       = 0;
1077   b->row        = isrow;
1078   b->col        = iscol;
1079   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1080   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1081   b->icol       = isicol;
1082   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1083   /* In b structure:  Free imax, ilen, old a, old j.
1084      Allocate dloc, solve_work, new a, new j */
1085   PetscLogObjectMemory(*fact,(ainew[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));
1086   b->maxnz             = b->nz = ainew[n] ;
1087   b->lu_damping        = info->damping;
1088   b->lu_shift          = info->shift;
1089   b->lu_shift_fraction = info->shift_fraction;
1090   b->lu_zeropivot = info->zeropivot;
1091   (*fact)->factor = FACTOR_LU;
1092   ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr);
1093   (*fact)->ops->lufactornumeric =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
1094 
1095   (*fact)->info.factor_mallocs    = reallocs;
1096   (*fact)->info.fill_ratio_given  = f;
1097   (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]);
1098   PetscFunctionReturn(0);
1099 }
1100 
1101 #include "src/mat/impls/sbaij/seq/sbaij.h"
1102 #undef __FUNCT__
1103 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
1104 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,Mat *fact)
1105 {
1106   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1107   PetscErrorCode ierr,(*f)(Mat,Mat*);
1108 
1109   PetscFunctionBegin;
1110   printf(" MatCholeskyFactorNumeric_SeqAIJ ...\n");
1111   if (!a->sbaijMat){
1112     ierr = MatConvert_SeqAIJ_SeqSBAIJ(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr);
1113   }
1114   ierr = PetscObjectQueryFunction((PetscObject) *fact,"MatCholeskyFactorNumeric",(void (**)(void))&f);CHKERRQ(ierr);
1115   ierr = (*f)(a->sbaijMat,fact);CHKERRQ(ierr);
1116   ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);
1117   a->sbaijMat = PETSC_NULL;
1118   PetscFunctionReturn(0);
1119 }
1120 #undef __FUNCT__
1121 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering"
1122 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering(Mat A,Mat *fact)
1123 {
1124   Mat            C = *fact;
1125   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1126   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
1127   PetscErrorCode ierr;
1128   PetscInt       i,j,am = A->m;
1129   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
1130   PetscInt       k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz,ndamp = 0;
1131   MatScalar      *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval;
1132   PetscReal      damping=b->factor_damping, zeropivot=b->factor_zeropivot,shift_amount;
1133   PetscTruth     damp,chshift;
1134   PetscInt       nshift=0;
1135 
1136   PetscFunctionBegin;
1137   /* initialization */
1138   /* il and jl record the first nonzero element in each row of the accessing
1139      window U(0:k, k:am-1).
1140      jl:    list of rows to be added to uneliminated rows
1141             i>= k: jl(i) is the first row to be added to row i
1142             i<  k: jl(i) is the row following row i in some list of rows
1143             jl(i) = am indicates the end of a list
1144      il(i): points to the first nonzero element in U(i,k:am-1)
1145   */
1146   nz   = (2*am+1)*sizeof(PetscInt)+am*sizeof(MatScalar);
1147   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
1148   jl   = il + am;
1149   rtmp = (MatScalar*)(jl + am);
1150 
1151   shift_amount = 0;
1152   do {
1153     damp = PETSC_FALSE;
1154     chshift = PETSC_FALSE;
1155     for (i=0; i<am; i++) {
1156       rtmp[i] = 0.0; jl[i] = am; il[0] = 0;
1157     }
1158 
1159     for (k = 0; k<am; k++){
1160     /*initialize k-th row with elements nonzero in row perm(k) of A */
1161       nz   = ai[k+1] - ai[k];
1162       acol = aj + ai[k];
1163       aval = aa + ai[k];
1164       bval = ba + bi[k];
1165       while (nz -- ){
1166         if (*acol < k) { /* skip lower triangular entries */
1167           acol++; aval++;
1168         } else {
1169           rtmp[*acol++] = *aval++;
1170           *bval++       = 0.0; /* for in-place factorization */
1171         }
1172       }
1173       /* damp the diagonal of the matrix */
1174       if (ndamp||nshift) rtmp[k] += damping+shift_amount;
1175 
1176       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1177       dk = rtmp[k];
1178       i  = jl[k]; /* first row to be added to k_th row  */
1179 
1180       while (i < k){
1181         nexti = jl[i]; /* next row to be added to k_th row */
1182         /* compute multiplier, update D(k) and U(i,k) */
1183         ili   = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1184         uikdi = - ba[ili]*ba[bi[i]];
1185         dk   += uikdi*ba[ili];
1186         ba[ili] = uikdi; /* -U(i,k) */
1187 
1188         /* add multiple of row i to k-th row ... */
1189         jmin = ili + 1;
1190         nz   = bi[i+1] - jmin;
1191         if (nz > 0){
1192           bcol = bj + jmin;
1193           bval = ba + jmin;
1194           while (nz --) rtmp[*bcol++] += uikdi*(*bval++);
1195           /* update il and jl for i-th row */
1196           il[i] = jmin;
1197           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1198         }
1199         i = nexti;
1200       }
1201 
1202       if (PetscRealPart(dk) < zeropivot && b->factor_shift){
1203 	/* calculate a shift that would make this row diagonally dominant */
1204 	PetscReal rs = PetscAbs(PetscRealPart(dk));
1205 	jmin      = bi[k]+1;
1206 	nz        = bi[k+1] - jmin;
1207 	if (nz){
1208 	  bcol = bj + jmin;
1209 	  bval = ba + jmin;
1210 	  while (nz--){
1211 	    rs += PetscAbsScalar(rtmp[*bcol++]);
1212 	  }
1213 	}
1214 	/* if this shift is less than the previous, just up the previous
1215 	   one by a bit */
1216 	shift_amount = PetscMax(rs,1.1*shift_amount);
1217 	chshift  = PETSC_TRUE;
1218 	/* Unlike in the ILU case there is no exit condition on nshift:
1219 	   we increase the shift until it converges. There is no guarantee that
1220 	   this algorithm converges faster or slower, or is better or worse
1221 	   than the ILU algorithm. */
1222 	nshift++;
1223 	break;
1224       }
1225       if (PetscRealPart(dk) < zeropivot){
1226         if (damping == (PetscReal) PETSC_DECIDE) damping = -PetscRealPart(dk)/(k+1);
1227         if (damping > 0.0) {
1228           if (ndamp) damping *= 2.0;
1229           damp = PETSC_TRUE;
1230           ndamp++;
1231           break;
1232         } else if (PetscAbsScalar(dk) < zeropivot){
1233           SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g",k,PetscRealPart(dk),zeropivot);
1234         } else {
1235           PetscLogInfo((PetscObject)A,"Negative pivot %g in row %D of Cholesky factorization\n",PetscRealPart(dk),k);
1236         }
1237       }
1238 
1239       /* copy data into U(k,:) */
1240       ba[bi[k]] = 1.0/dk;
1241       jmin      = bi[k]+1;
1242       nz        = bi[k+1] - jmin;
1243       if (nz){
1244         bcol = bj + jmin;
1245         bval = ba + jmin;
1246         while (nz--){
1247           *bval++       = rtmp[*bcol];
1248           rtmp[*bcol++] = 0.0;
1249         }
1250         /* add k-th row into il and jl */
1251         il[k] = jmin;
1252         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1253       }
1254     }
1255   } while (damp||chshift);
1256   ierr = PetscFree(il);CHKERRQ(ierr);
1257 
1258   C->factor       = FACTOR_CHOLESKY;
1259   C->assembled    = PETSC_TRUE;
1260   C->preallocated = PETSC_TRUE;
1261   PetscLogFlops(C->m);
1262   if (ndamp) {
1263     PetscLogInfo(0,"MatCholeskyFactorNumerical_SeqAIJ_NaturalOrdering: number of damping tries %D damping value %g\n",ndamp,damping);
1264   }
1265   if (nshift) {
1266     PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering diagonal shifted %D shifts\n",nshift);
1267   }
1268   PetscFunctionReturn(0);
1269 }
1270 
1271 #include "petscbt.h"
1272 #include "src/mat/utils/freespace.h"
1273 #undef __FUNCT__
1274 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
1275 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1276 {
1277   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1278   Mat_SeqSBAIJ   *b;
1279   Mat            B;
1280   PetscErrorCode ierr;
1281   PetscTruth     perm_identity;
1282   PetscInt       reallocs=0,*rip,i,*ai,*aj,am=A->m,*ui;
1283   PetscInt       jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1284   PetscInt       nlnk,*lnk,*lnk_lvl,ncols,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr;
1285   PetscReal      fill=info->fill,levels=info->levels;
1286   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1287   FreeSpaceList  free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1288   PetscBT        lnkbt;
1289 
1290   PetscFunctionBegin;
1291   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1292   if (!perm_identity){
1293     SETERRQ(PETSC_ERR_SUP,"Non-identity permutation is not supported yet");
1294   }
1295   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1296   ai = a->i; aj = a->j;
1297   ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
1298 
1299   /* levels = 0: simply copies fill pattern */
1300   if (!levels) {
1301     ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1302     for (i=0; i<am; i++) {
1303       ui[i] = ai[i+1] - a->diag[i]; /* ui: rowlengths - changes when !perm_identity */
1304     }
1305     ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr);
1306     B = *fact;
1307     ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1308     ierr = MatSeqSBAIJSetPreallocation(B,1,0,ui);CHKERRQ(ierr);
1309 
1310     b  = (Mat_SeqSBAIJ*)B->data;
1311     uj = b->j;
1312     for (i=0; i<am; i++) {
1313       aj = a->j + a->diag[i];
1314       for (j=0; j<ui[i]; j++){
1315         *uj++ = *aj++;
1316       }
1317       b->ilen[i] = ui[i];
1318     }
1319     ierr = PetscFree(ui);CHKERRQ(ierr);
1320     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1321     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1322 
1323     B->ops->solve                 = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1324     ierr = PetscObjectComposeFunction((PetscObject)B,"MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering","dummyname",(FCNVOID)B->ops->choleskyfactornumeric);CHKERRQ(ierr);
1325     B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering;
1326     PetscFunctionReturn(0);
1327   }
1328 
1329   /* initialization */
1330   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1331   ui[0] = 0;
1332   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&cols_lvl);CHKERRQ(ierr);
1333 
1334   /* jl: linked list for storing indices of the pivot rows
1335      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1336   ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr);
1337   il         = jl + am;
1338   uj_ptr     = (PetscInt**)(il + am);
1339   uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1340   for (i=0; i<am; i++){
1341     jl[i] = am; il[i] = 0;
1342   }
1343 
1344   /* create and initialize a linked list for storing column indices of the active row k */
1345   nlnk = am + 1;
1346   ierr = PetscLLCreate_PermutedLeveled(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1347 
1348   /* initial FreeSpace size is fill*(ai[am]+1) */
1349   ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1350   current_space = free_space;
1351   ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
1352   current_space_lvl = free_space_lvl;
1353 
1354   for (k=0; k<am; k++){  /* for each active row k */
1355     /* initialize lnk by the column indices of row rip[k] of A */
1356     nzk   = 0;
1357     ncols = ai[rip[k]+1] - a->diag[rip[k]]; /* changes for !perm_identity */
1358     cols  = aj + a->diag[rip[k]];
1359     for (j=0; j<ncols; j++) cols_lvl[j] = -1;  /* initialize level for nonzero entries */
1360     ierr = PetscLLAdd_PermutedLeveled(ncols,cols,levels,cols_lvl,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1361     nzk += nlnk;
1362 
1363     /* update lnk by computing fill-in for each pivot row to be merged in */
1364     prow = jl[k]; /* 1st pivot row */
1365 
1366     while (prow < k){
1367       nextprow = jl[prow];
1368 
1369       /* merge prow into k-th row */
1370       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1371       jmax = ui[prow+1];
1372       ncols = jmax-jmin;
1373       i     = jmin - ui[prow];
1374       cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1375       for (j=0; j<ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + i + j);
1376       ierr = PetscLLAdd_PermutedLeveled(ncols,cols,levels,cols_lvl,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1377       nzk += nlnk;
1378 
1379       /* update il and jl for prow */
1380       if (jmin < jmax){
1381         il[prow] = jmin;
1382         j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1383       }
1384       prow = nextprow;
1385     }
1386 
1387     /* if free space is not available, make more free space */
1388     if (current_space->local_remaining<nzk) {
1389       i = am - k + 1; /* num of unfactored rows */
1390       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1391       ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
1392       ierr = GetMoreSpace(i,&current_space_lvl);CHKERRQ(ierr);
1393       reallocs++;
1394     }
1395 
1396     /* copy data into free_space and free_space_lvl, then initialize lnk */
1397     ierr = PetscLLClean_PermutedLeveled(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1398 
1399     /* add the k-th row into il and jl */
1400     if (nzk-1 > 0){
1401       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1402       jl[k] = jl[i]; jl[i] = k;
1403       il[k] = ui[k] + 1;
1404     }
1405     uj_ptr[k]     = current_space->array;
1406     uj_lvl_ptr[k] = current_space_lvl->array;
1407 
1408     current_space->array           += nzk;
1409     current_space->local_used      += nzk;
1410     current_space->local_remaining -= nzk;
1411 
1412     current_space_lvl->array           += nzk;
1413     current_space_lvl->local_used      += nzk;
1414     current_space_lvl->local_remaining -= nzk;
1415 
1416     ui[k+1] = ui[k] + nzk;
1417   }
1418 
1419   if (ai[am] != 0) {
1420     PetscReal af = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1421     PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af);
1422     PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af);
1423     PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af);
1424   } else {
1425      PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n");
1426   }
1427 
1428   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1429   ierr = PetscFree(jl);CHKERRQ(ierr);
1430   ierr = PetscFree(cols_lvl);CHKERRQ(ierr);
1431 
1432   /* destroy list of free space and other temporary array(s) */
1433   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1434   ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1435   ierr = PetscLLDestroy_PermutedLeveled(lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1436   ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr);
1437 
1438   /* put together the new matrix in MATSEQSBAIJ format */
1439   ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr);
1440   B = *fact;
1441   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1442   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
1443 
1444   b = (Mat_SeqSBAIJ*)B->data;
1445   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1446   b->singlemalloc = PETSC_FALSE;
1447   /* the next line frees the default space generated by the Create() */
1448   ierr = PetscFree(b->a);CHKERRQ(ierr);
1449   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1450   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1451   b->j    = uj;
1452   b->i    = ui;
1453   b->diag = 0;
1454   b->ilen = 0;
1455   b->imax = 0;
1456   b->row  = perm;
1457   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1458   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1459   b->icol = perm;
1460   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1461   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1462   PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
1463   b->maxnz = b->nz = ui[am];
1464 
1465   B->factor                 = FACTOR_CHOLESKY;
1466   B->info.factor_mallocs    = reallocs;
1467   B->info.fill_ratio_given  = fill;
1468   if (ai[am] != 0) {
1469     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1470   } else {
1471     B->info.fill_ratio_needed = 0.0;
1472   }
1473 
1474   B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1475   B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1476   ierr = PetscObjectComposeFunction((PetscObject) B,"MatCholeskyFactorNumeric_NaturalOrdering","dummyname",(FCNVOID)B->ops->choleskyfactornumeric);CHKERRQ(ierr);
1477   B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering;
1478   PetscFunctionReturn(0);
1479 }
1480 
1481 #undef __FUNCT__
1482 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
1483 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1484 {
1485   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1486   Mat_SeqSBAIJ   *b;
1487   PetscErrorCode ierr;
1488   PetscTruth     perm_identity;
1489   PetscReal      fill = info->fill;
1490   PetscInt       *rip,i,am=A->m,*ai,*aj,reallocs=0,prow;
1491   PetscInt       *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1492   PetscInt       nlnk,*lnk,ncols,*cols,*uj,**uj_ptr;
1493   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1494   PetscBT        lnkbt;
1495 
1496   PetscFunctionBegin;
1497   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1498   if (!perm_identity){
1499     SETERRQ(PETSC_ERR_SUP,"Non-identity permutation is not supported yet");
1500   }
1501 
1502   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1503   ai = a->i; aj = a->j;
1504   ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
1505 
1506   /* initialization */
1507   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1508   ui[0] = 0;
1509 
1510   /* jl: linked list for storing indices of the pivot rows
1511      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1512   ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt*),&jl);CHKERRQ(ierr);
1513   il     = jl + am;
1514   uj_ptr = (PetscInt**)(il + am);
1515   for (i=0; i<am; i++){
1516     jl[i] = am; il[i] = 0;
1517   }
1518 
1519   /* create and initialize a linked list for storing column indices of the active row k */
1520   nlnk = am + 1;
1521   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1522 
1523   /* initial FreeSpace size is fill*(ai[am]+1) */
1524   ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1525   current_space = free_space;
1526 
1527   for (k=0; k<am; k++){  /* for each active row k */
1528     /* initialize lnk by the column indices of row rip[k] of A */
1529     nzk     = 0;
1530     ncols    = ai[rip[k]+1] - a->diag[rip[k]]; /* diag would change for !perm_identity */
1531     cols = aj + a->diag[rip[k]];
1532     ierr = PetscLLAdd(ncols,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1533     nzk += nlnk;
1534 
1535     /* update lnk by computing fill-in for each pivot row to be merged in */
1536     prow = jl[k]; /* 1st pivot row */
1537 
1538     while (prow < k){
1539       nextprow = jl[prow];
1540 
1541       /* merge prow into k-th row */
1542       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1543       jmax = ui[prow+1];
1544       ncols = jmax-jmin;
1545       cols = uj_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1546       ierr = PetscLLAdd(ncols,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1547       nzk += nlnk;
1548 
1549       /* update il and jl for prow */
1550       if (jmin < jmax){
1551         il[prow] = jmin;
1552         j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1553       }
1554       prow = nextprow;
1555     }
1556 
1557     /* if free space is not available, make more free space */
1558     if (current_space->local_remaining<nzk) {
1559       i = am - k + 1; /* num of unfactored rows */
1560       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1561       ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
1562       reallocs++;
1563     }
1564 
1565     /* copy data into free space, then initialize lnk */
1566     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1567 
1568     /* add the k-th row into il and jl */
1569     if (nzk-1 > 0){
1570       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1571       jl[k] = jl[i]; jl[i] = k;
1572       il[k] = ui[k] + 1;
1573     }
1574     uj_ptr[k] = current_space->array;
1575     current_space->array           += nzk;
1576     current_space->local_used      += nzk;
1577     current_space->local_remaining -= nzk;
1578 
1579     ui[k+1] = ui[k] + nzk;
1580   }
1581 
1582   if (ai[am] != 0) {
1583     PetscReal af = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1584     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af);
1585     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af);
1586     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af);
1587   } else {
1588      PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n");
1589   }
1590 
1591   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1592   ierr = PetscFree(jl);CHKERRQ(ierr);
1593 
1594   /* destroy list of free space and other temporary array(s) */
1595   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1596   ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1597   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1598 
1599   /* put together the new matrix in MATSEQSBAIJ format */
1600   ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr);
1601   ierr = MatSetType(*fact,MATSEQSBAIJ);CHKERRQ(ierr);
1602   ierr = MatSeqSBAIJSetPreallocation(*fact,1,0,PETSC_NULL);CHKERRQ(ierr);
1603 
1604   b = (Mat_SeqSBAIJ*)(*fact)->data;
1605   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1606   b->singlemalloc = PETSC_FALSE;
1607   /* the next line frees the default space generated by the Create() */
1608   ierr = PetscFree(b->a);CHKERRQ(ierr);
1609   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1610   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1611   b->j    = uj;
1612   b->i    = ui;
1613   b->diag = 0;
1614   b->ilen = 0;
1615   b->imax = 0;
1616   b->row  = perm;
1617   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1618   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1619   b->icol = perm;
1620   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1621   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1622   /* In b structure:  Free imax, ilen, old a, old j.
1623      Allocate idnew, solve_work, new a, new j */
1624   PetscLogObjectMemory(*fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
1625   b->maxnz = b->nz = ui[am];
1626 
1627   (*fact)->factor                 = FACTOR_CHOLESKY;
1628   (*fact)->info.factor_mallocs    = reallocs;
1629   (*fact)->info.fill_ratio_given  = fill;
1630   if (ai[am] != 0) {
1631     (*fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1632   } else {
1633     (*fact)->info.fill_ratio_needed = 0.0;
1634   }
1635 
1636   (*fact)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1637   (*fact)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1638   ierr = PetscObjectComposeFunction((PetscObject) *fact,"MatCholeskyFactorNumeric_NaturalOrdering","dummyname",(FCNVOID)(*fact)->ops->choleskyfactornumeric);CHKERRQ(ierr);
1639   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering;
1640   PetscFunctionReturn(0);
1641 }
1642