xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision d4dbac3cd6da13dc60b5d653f928051cfef0d71e)
1 
2 #include "src/mat/impls/aij/seq/aij.h"
3 #include "src/inline/dot.h"
4 #include "src/inline/spops.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ"
8 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
9 {
10   PetscFunctionBegin;
11 
12   SETERRQ(PETSC_ERR_SUP,"Code not written");
13 #if !defined(PETSC_USE_DEBUG)
14   PetscFunctionReturn(0);
15 #endif
16 }
17 
18 
19 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat);
20 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth);
21 
22 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
23 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
24 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*);
25 
26 #undef __FUNCT__
27 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
28   /* ------------------------------------------------------------
29 
30           This interface was contribed by Tony Caola
31 
32      This routine is an interface to the pivoting drop-tolerance
33      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
34      SPARSEKIT2.
35 
36      The SPARSEKIT2 routines used here are covered by the GNU
37      copyright; see the file gnu in this directory.
38 
39      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
40      help in getting this routine ironed out.
41 
42      The major drawback to this routine is that if info->fill is
43      not large enough it fails rather than allocating more space;
44      this can be fixed by hacking/improving the f2c version of
45      Yousef Saad's code.
46 
47      ------------------------------------------------------------
48 */
49 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact)
50 {
51 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
52   PetscFunctionBegin;
53   SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
54   You can obtain the drop tolerance routines by installing PETSc from\n\
55   www.mcs.anl.gov/petsc\n");
56 #else
57   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
58   IS             iscolf,isicol,isirow;
59   PetscTruth     reorder;
60   PetscErrorCode ierr,sierr;
61   PetscInt       *c,*r,*ic,i,n = A->m;
62   PetscInt       *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
63   PetscInt       *ordcol,*iwk,*iperm,*jw;
64   PetscInt       jmax,lfill,job,*o_i,*o_j;
65   PetscScalar    *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
66   PetscReal      af;
67 
68   PetscFunctionBegin;
69 
70   if (info->dt == PETSC_DEFAULT)      info->dt      = .005;
71   if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax);
72   if (info->dtcol == PETSC_DEFAULT)   info->dtcol   = .01;
73   if (info->fill == PETSC_DEFAULT)    info->fill    = ((double)(n*(info->dtcount+1)))/a->nz;
74   lfill   = (PetscInt)(info->dtcount/2.0);
75   jmax    = (PetscInt)(info->fill*a->nz);
76 
77 
78   /* ------------------------------------------------------------
79      If reorder=.TRUE., then the original matrix has to be
80      reordered to reflect the user selected ordering scheme, and
81      then de-reordered so it is in it's original format.
82      Because Saad's dperm() is NOT in place, we have to copy
83      the original matrix and allocate more storage. . .
84      ------------------------------------------------------------
85   */
86 
87   /* set reorder to true if either isrow or iscol is not identity */
88   ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr);
89   if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);}
90   reorder = PetscNot(reorder);
91 
92 
93   /* storage for ilu factor */
94   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr);
95   ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr);
96   ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr);
97   ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr);
98 
99   /* ------------------------------------------------------------
100      Make sure that everything is Fortran formatted (1-Based)
101      ------------------------------------------------------------
102   */
103   for (i=old_i[0];i<old_i[n];i++) {
104     old_j[i]++;
105   }
106   for(i=0;i<n+1;i++) {
107     old_i[i]++;
108   };
109 
110 
111   if (reorder) {
112     ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr);
113     ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
114     for(i=0;i<n;i++) {
115       r[i]  = r[i]+1;
116       c[i]  = c[i]+1;
117     }
118     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr);
119     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr);
120     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr);
121     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
122     for (i=0;i<n;i++) {
123       r[i]  = r[i]-1;
124       c[i]  = c[i]-1;
125     }
126     ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr);
127     ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
128     o_a = old_a2;
129     o_j = old_j2;
130     o_i = old_i2;
131   } else {
132     o_a = old_a;
133     o_j = old_j;
134     o_i = old_i;
135   }
136 
137   /* ------------------------------------------------------------
138      Call Saad's ilutp() routine to generate the factorization
139      ------------------------------------------------------------
140   */
141 
142   ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr);
143   ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr);
144   ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr);
145 
146   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
147   if (sierr) {
148     switch (sierr) {
149       case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
150       case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
151       case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
152       case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
153       case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax);
154       default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
155     }
156   }
157 
158   ierr = PetscFree(w);CHKERRQ(ierr);
159   ierr = PetscFree(jw);CHKERRQ(ierr);
160 
161   /* ------------------------------------------------------------
162      Saad's routine gives the result in Modified Sparse Row (msr)
163      Convert to Compressed Sparse Row format (csr)
164      ------------------------------------------------------------
165   */
166 
167   ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr);
168   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr);
169 
170   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
171 
172   ierr = PetscFree(iwk);CHKERRQ(ierr);
173   ierr = PetscFree(wk);CHKERRQ(ierr);
174 
175   if (reorder) {
176     ierr = PetscFree(old_a2);CHKERRQ(ierr);
177     ierr = PetscFree(old_j2);CHKERRQ(ierr);
178     ierr = PetscFree(old_i2);CHKERRQ(ierr);
179   } else {
180     /* fix permutation of old_j that the factorization introduced */
181     for (i=old_i[0]; i<old_i[n]; i++) {
182       old_j[i-1] = iperm[old_j[i-1]-1];
183     }
184   }
185 
186   /* get rid of the shift to indices starting at 1 */
187   for (i=0; i<n+1; i++) {
188     old_i[i]--;
189   }
190   for (i=old_i[0];i<old_i[n];i++) {
191     old_j[i]--;
192   }
193 
194   /* Make the factored matrix 0-based */
195   for (i=0; i<n+1; i++) {
196     new_i[i]--;
197   }
198   for (i=new_i[0];i<new_i[n];i++) {
199     new_j[i]--;
200   }
201 
202   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
203   /*-- permute the right-hand-side and solution vectors           --*/
204   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
205   ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr);
206   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
207   for(i=0; i<n; i++) {
208     ordcol[i] = ic[iperm[i]-1];
209   };
210   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
211   ierr = ISDestroy(isicol);CHKERRQ(ierr);
212 
213   ierr = PetscFree(iperm);CHKERRQ(ierr);
214 
215   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr);
216   ierr = PetscFree(ordcol);CHKERRQ(ierr);
217 
218   /*----- put together the new matrix -----*/
219 
220   ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr);
221   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
222   ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr);
223   (*fact)->factor    = FACTOR_LU;
224   (*fact)->assembled = PETSC_TRUE;
225 
226   b = (Mat_SeqAIJ*)(*fact)->data;
227   ierr = PetscFree(b->imax);CHKERRQ(ierr);
228   b->sorted        = PETSC_FALSE;
229   b->singlemalloc  = PETSC_FALSE;
230   /* the next line frees the default space generated by the MatCreate() */
231   ierr             = PetscFree(b->a);CHKERRQ(ierr);
232   ierr             = PetscFree(b->ilen);CHKERRQ(ierr);
233   b->a             = new_a;
234   b->j             = new_j;
235   b->i             = new_i;
236   b->ilen          = 0;
237   b->imax          = 0;
238   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
239   b->row           = isirow;
240   b->col           = iscolf;
241   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
242   b->maxnz = b->nz = new_i[n];
243   ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
244   (*fact)->info.factor_mallocs = 0;
245 
246   ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
247 
248   /* check out for identical nodes. If found, use inode functions */
249   ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr);
250 
251   af = ((double)b->nz)/((double)a->nz) + .001;
252   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af);
253   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af);
254   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af);
255   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n");
256 
257   PetscFunctionReturn(0);
258 #endif
259 }
260 
261 /*
262     Factorization code for AIJ format.
263 */
264 #undef __FUNCT__
265 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
266 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
267 {
268   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
269   IS             isicol;
270   PetscErrorCode ierr;
271   PetscInt       *r,*ic,i,n = A->m,*ai = a->i,*aj = a->j;
272   PetscInt       *ainew,*ajnew,jmax,*fill,*ajtmp,nz;
273   PetscInt       *idnew,idx,row,m,fm,nnz,nzi,reallocs = 0,nzbd,*im;
274   PetscReal      f;
275 
276   PetscFunctionBegin;
277   if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
278   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
279   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
280   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
281 
282   /* get new row pointers */
283   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr);
284   ainew[0] = 0;
285   /* don't know how many column pointers are needed so estimate */
286   f = info->fill;
287   jmax  = (PetscInt)(f*ai[n]+1);
288   ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr);
289   /* fill is a linked list of nonzeros in active row */
290   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr);
291   im   = fill + n + 1;
292   /* idnew is location of diagonal in factor */
293   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&idnew);CHKERRQ(ierr);
294   idnew[0] = 0;
295 
296   for (i=0; i<n; i++) {
297     /* first copy previous fill into linked list */
298     nnz     = nz    = ai[r[i]+1] - ai[r[i]];
299     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
300     ajtmp   = aj + ai[r[i]];
301     fill[n] = n;
302     while (nz--) {
303       fm  = n;
304       idx = ic[*ajtmp++];
305       do {
306         m  = fm;
307         fm = fill[m];
308       } while (fm < idx);
309       fill[m]   = idx;
310       fill[idx] = fm;
311     }
312     row = fill[n];
313     while (row < i) {
314       ajtmp = ajnew + idnew[row] + 1;
315       nzbd  = 1 + idnew[row] - ainew[row];
316       nz    = im[row] - nzbd;
317       fm    = row;
318       while (nz-- > 0) {
319         idx = *ajtmp++ ;
320         nzbd++;
321         if (idx == i) im[row] = nzbd;
322         do {
323           m  = fm;
324           fm = fill[m];
325         } while (fm < idx);
326         if (fm != idx) {
327           fill[m]   = idx;
328           fill[idx] = fm;
329           fm        = idx;
330           nnz++;
331         }
332       }
333       row = fill[row];
334     }
335     /* copy new filled row into permanent storage */
336     ainew[i+1] = ainew[i] + nnz;
337     if (ainew[i+1] > jmax) {
338 
339       /* estimate how much additional space we will need */
340       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
341       /* just double the memory each time */
342       PetscInt maxadd = jmax;
343       /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */
344       if (maxadd < nnz) maxadd = (n-i)*(nnz+1);
345       jmax += maxadd;
346 
347       /* allocate a longer ajnew */
348       ierr  = PetscMalloc(jmax*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
349       ierr  = PetscMemcpy(ajtmp,ajnew,(ainew[i])*sizeof(PetscInt));CHKERRQ(ierr);
350       ierr  = PetscFree(ajnew);CHKERRQ(ierr);
351       ajnew = ajtmp;
352       reallocs++; /* count how many times we realloc */
353     }
354     ajtmp = ajnew + ainew[i];
355     fm    = fill[n];
356     nzi   = 0;
357     im[i] = nnz;
358     while (nnz--) {
359       if (fm < i) nzi++;
360       *ajtmp++ = fm ;
361       fm       = fill[fm];
362     }
363     idnew[i] = ainew[i] + nzi;
364   }
365   if (ai[n] != 0) {
366     PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
367     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
368     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af);
369     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af);
370     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n");
371   } else {
372     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n");
373   }
374 
375   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
376   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
377 
378   ierr = PetscFree(fill);CHKERRQ(ierr);
379 
380   /* put together the new matrix */
381   ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr);
382   ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr);
383   ierr = MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);CHKERRQ(ierr);
384   PetscLogObjectParent(*B,isicol);
385   b    = (Mat_SeqAIJ*)(*B)->data;
386   ierr = PetscFree(b->imax);CHKERRQ(ierr);
387   b->singlemalloc = PETSC_FALSE;
388   /* the next line frees the default space generated by the Create() */
389   ierr = PetscFree(b->a);CHKERRQ(ierr);
390   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
391   ierr          = PetscMalloc((ainew[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
392   b->j          = ajnew;
393   b->i          = ainew;
394   b->diag       = idnew;
395   b->ilen       = 0;
396   b->imax       = 0;
397   b->row        = isrow;
398   b->col        = iscol;
399   b->lu_damping        = info->damping;
400   b->lu_zeropivot      = info->zeropivot;
401   b->lu_shift          = info->shift;
402   b->lu_shift_fraction = info->shift_fraction;
403   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
404   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
405   b->icol       = isicol;
406   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
407   /* In b structure:  Free imax, ilen, old a, old j.
408      Allocate idnew, solve_work, new a, new j */
409   PetscLogObjectMemory(*B,(ainew[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));
410   b->maxnz = b->nz = ainew[n] ;
411 
412   (*B)->factor                 =  FACTOR_LU;
413   (*B)->info.factor_mallocs    = reallocs;
414   (*B)->info.fill_ratio_given  = f;
415   ierr = Mat_AIJ_CheckInode(*B,PETSC_FALSE);CHKERRQ(ierr);
416   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
417 
418   if (ai[n] != 0) {
419     (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
420   } else {
421     (*B)->info.fill_ratio_needed = 0.0;
422   }
423   PetscFunctionReturn(0);
424 }
425 /* ----------------------------------------------------------- */
426 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth);
427 
428 #undef __FUNCT__
429 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
430 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B)
431 {
432   Mat            C = *B;
433   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ *)C->data;
434   IS             isrow = b->row,isicol = b->icol;
435   PetscErrorCode ierr;
436   PetscInt       *r,*ic,i,j,n = A->m,*ai = b->i,*aj = b->j;
437   PetscInt       *ajtmpold,*ajtmp,nz,row,*ics;
438   PetscInt       *diag_offset = b->diag,diag,*pj,ndamp = 0, nshift=0;
439   PetscScalar    *rtmp,*v,*pc,multiplier,*pv,*rtmps;
440   PetscReal      damping = b->lu_damping, zeropivot = b->lu_zeropivot,rs,d;
441   PetscReal      row_shift,shift_fraction,shift_amount,shift_lo=0., shift_hi=1., shift_top=0.;
442   PetscTruth     damp,lushift;
443 
444   PetscFunctionBegin;
445   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
446   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
447   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
448   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
449   rtmps = rtmp; ics = ic;
450 
451   if (!a->diag) {
452     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
453   }
454 
455   if (b->lu_shift) { /* set max shift */
456     PetscInt *aai = a->i,*ddiag = a->diag;
457     shift_top = 0;
458     for (i=0; i<n; i++) {
459       d = PetscAbsScalar((a->a)[ddiag[i]]);
460       /* calculate amt of shift needed for this row */
461       if (d<=0) {
462         row_shift = 0;
463       } else {
464         row_shift = -2*d;
465       }
466       v = a->a+aai[i];
467       for (j=0; j<aai[i+1]-aai[i]; j++)
468 	row_shift += PetscAbsScalar(v[j]);
469       if (row_shift>shift_top) shift_top = row_shift;
470     }
471   }
472 
473   shift_fraction = 0; shift_amount = 0;
474   do {
475     damp    = PETSC_FALSE;
476     lushift = PETSC_FALSE;
477     for (i=0; i<n; i++) {
478       nz    = ai[i+1] - ai[i];
479       ajtmp = aj + ai[i];
480       for  (j=0; j<nz; j++) rtmps[ajtmp[j]] = 0.0;
481 
482       /* load in initial (unfactored row) */
483       nz       = a->i[r[i]+1] - a->i[r[i]];
484       ajtmpold = a->j + a->i[r[i]];
485       v        = a->a + a->i[r[i]];
486       for (j=0; j<nz; j++) {
487         rtmp[ics[ajtmpold[j]]] = v[j];
488       }
489       rtmp[ics[r[i]]] += damping + shift_amount; /* damp the diagonal of the matrix */
490 
491       row = *ajtmp++ ;
492       while  (row < i) {
493         pc = rtmp + row;
494         if (*pc != 0.0) {
495           pv         = b->a + diag_offset[row];
496           pj         = b->j + diag_offset[row] + 1;
497           multiplier = *pc / *pv++;
498           *pc        = multiplier;
499           nz         = ai[row+1] - diag_offset[row] - 1;
500           for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
501           PetscLogFlops(2*nz);
502         }
503         row = *ajtmp++;
504       }
505       /* finished row so stick it into b->a */
506       pv   = b->a + ai[i] ;
507       pj   = b->j + ai[i] ;
508       nz   = ai[i+1] - ai[i];
509       diag = diag_offset[i] - ai[i];
510       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
511       rs   = 0.0;
512       for (j=0; j<nz; j++) {
513         pv[j] = rtmps[pj[j]];
514         if (j != diag) rs += PetscAbsScalar(pv[j]);
515       }
516 #define MAX_NSHIFT 5
517       if (PetscRealPart(pv[diag]) <= zeropivot*rs && b->lu_shift) {
518 	if (nshift>MAX_NSHIFT) {
519 	  SETERRQ(PETSC_ERR_CONV_FAILED,"Unable to determine shift to enforce positive definite preconditioner");
520 	} else if (nshift==MAX_NSHIFT) {
521 	  shift_fraction = shift_hi;
522 	  lushift      = PETSC_FALSE;
523 	} else {
524 	  shift_lo = shift_fraction; shift_fraction = (shift_hi+shift_lo)/2.;
525 	  lushift      = PETSC_TRUE;
526 	}
527 	shift_amount = shift_fraction * shift_top;
528         nshift++;
529         break;
530       }
531       if (PetscAbsScalar(pv[diag]) <= zeropivot*rs) {
532         if (damping) {
533           if (ndamp) damping *= 2.0;
534           damp = PETSC_TRUE;
535           ndamp++;
536           break;
537         } else {
538           SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(pv[diag]),zeropivot,rs);
539         }
540       }
541     }
542     if (!lushift && b->lu_shift && shift_fraction>0 && nshift<MAX_NSHIFT) {
543       /*
544        * if no shift in this attempt & shifting & started shifting & can refine,
545        * then try lower shift
546        */
547       shift_hi       = shift_fraction;
548       shift_fraction = (shift_hi+shift_lo)/2.;
549       shift_amount   = shift_fraction * shift_top;
550       lushift        = PETSC_TRUE;
551       nshift++;
552     }
553   } while (damp || lushift);
554 
555   /* invert diagonal entries for simplier triangular solves */
556   for (i=0; i<n; i++) {
557     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
558   }
559 
560   ierr = PetscFree(rtmp);CHKERRQ(ierr);
561   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
562   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
563   C->factor = FACTOR_LU;
564   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
565   C->assembled = PETSC_TRUE;
566   PetscLogFlops(C->n);
567   if (ndamp) {
568     PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of damping tries %D damping value %g\n",ndamp,damping);
569   }
570   if (nshift) {
571     b->lu_shift_fraction = shift_fraction;
572     PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: diagonal shifted up by %e fraction top_value %e number shifts %D\n",shift_fraction,shift_top,nshift);
573   }
574   PetscFunctionReturn(0);
575 }
576 
577 #undef __FUNCT__
578 #define __FUNCT__ "MatUsePETSc_SeqAIJ"
579 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A)
580 {
581   PetscFunctionBegin;
582   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
583   A->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
584   PetscFunctionReturn(0);
585 }
586 
587 
588 /* ----------------------------------------------------------- */
589 #undef __FUNCT__
590 #define __FUNCT__ "MatLUFactor_SeqAIJ"
591 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
592 {
593   PetscErrorCode ierr;
594   Mat            C;
595 
596   PetscFunctionBegin;
597   ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr);
598   ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr);
599   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
600   PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);
601   PetscFunctionReturn(0);
602 }
603 /* ----------------------------------------------------------- */
604 #undef __FUNCT__
605 #define __FUNCT__ "MatSolve_SeqAIJ"
606 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
607 {
608   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
609   IS             iscol = a->col,isrow = a->row;
610   PetscErrorCode ierr;
611   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
612   PetscInt       nz,*rout,*cout;
613   PetscScalar    *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
614 
615   PetscFunctionBegin;
616   if (!n) PetscFunctionReturn(0);
617 
618   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
619   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
620   tmp  = a->solve_work;
621 
622   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
623   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
624 
625   /* forward solve the lower triangular */
626   tmp[0] = b[*r++];
627   tmps   = tmp;
628   for (i=1; i<n; i++) {
629     v   = aa + ai[i] ;
630     vi  = aj + ai[i] ;
631     nz  = a->diag[i] - ai[i];
632     sum = b[*r++];
633     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
634     tmp[i] = sum;
635   }
636 
637   /* backward solve the upper triangular */
638   for (i=n-1; i>=0; i--){
639     v   = aa + a->diag[i] + 1;
640     vi  = aj + a->diag[i] + 1;
641     nz  = ai[i+1] - a->diag[i] - 1;
642     sum = tmp[i];
643     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
644     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
645   }
646 
647   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
648   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
649   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
650   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
651   PetscLogFlops(2*a->nz - A->n);
652   PetscFunctionReturn(0);
653 }
654 
655 /* ----------------------------------------------------------- */
656 #undef __FUNCT__
657 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
658 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
659 {
660   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
661   PetscErrorCode ierr;
662   PetscInt       n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag;
663   PetscScalar    *x,*b,*aa = a->a;
664 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
665   PetscInt       adiag_i,i,*vi,nz,ai_i;
666   PetscScalar    *v,sum;
667 #endif
668 
669   PetscFunctionBegin;
670   if (!n) PetscFunctionReturn(0);
671 
672   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
673   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
674 
675 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
676   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
677 #else
678   /* forward solve the lower triangular */
679   x[0] = b[0];
680   for (i=1; i<n; i++) {
681     ai_i = ai[i];
682     v    = aa + ai_i;
683     vi   = aj + ai_i;
684     nz   = adiag[i] - ai_i;
685     sum  = b[i];
686     while (nz--) sum -= *v++ * x[*vi++];
687     x[i] = sum;
688   }
689 
690   /* backward solve the upper triangular */
691   for (i=n-1; i>=0; i--){
692     adiag_i = adiag[i];
693     v       = aa + adiag_i + 1;
694     vi      = aj + adiag_i + 1;
695     nz      = ai[i+1] - adiag_i - 1;
696     sum     = x[i];
697     while (nz--) sum -= *v++ * x[*vi++];
698     x[i]    = sum*aa[adiag_i];
699   }
700 #endif
701   PetscLogFlops(2*a->nz - A->n);
702   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
703   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
704   PetscFunctionReturn(0);
705 }
706 
707 #undef __FUNCT__
708 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
709 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
710 {
711   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
712   IS             iscol = a->col,isrow = a->row;
713   PetscErrorCode ierr;
714   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
715   PetscInt       nz,*rout,*cout;
716   PetscScalar    *x,*b,*tmp,*aa = a->a,sum,*v;
717 
718   PetscFunctionBegin;
719   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
720 
721   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
722   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
723   tmp  = a->solve_work;
724 
725   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
726   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
727 
728   /* forward solve the lower triangular */
729   tmp[0] = b[*r++];
730   for (i=1; i<n; i++) {
731     v   = aa + ai[i] ;
732     vi  = aj + ai[i] ;
733     nz  = a->diag[i] - ai[i];
734     sum = b[*r++];
735     while (nz--) sum -= *v++ * tmp[*vi++ ];
736     tmp[i] = sum;
737   }
738 
739   /* backward solve the upper triangular */
740   for (i=n-1; i>=0; i--){
741     v   = aa + a->diag[i] + 1;
742     vi  = aj + a->diag[i] + 1;
743     nz  = ai[i+1] - a->diag[i] - 1;
744     sum = tmp[i];
745     while (nz--) sum -= *v++ * tmp[*vi++ ];
746     tmp[i] = sum*aa[a->diag[i]];
747     x[*c--] += tmp[i];
748   }
749 
750   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
751   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
752   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
753   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
754   PetscLogFlops(2*a->nz);
755 
756   PetscFunctionReturn(0);
757 }
758 /* -------------------------------------------------------------------*/
759 #undef __FUNCT__
760 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
761 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
762 {
763   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
764   IS             iscol = a->col,isrow = a->row;
765   PetscErrorCode ierr;
766   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
767   PetscInt       nz,*rout,*cout,*diag = a->diag;
768   PetscScalar    *x,*b,*tmp,*aa = a->a,*v,s1;
769 
770   PetscFunctionBegin;
771   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
772   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
773   tmp  = a->solve_work;
774 
775   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
776   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
777 
778   /* copy the b into temp work space according to permutation */
779   for (i=0; i<n; i++) tmp[i] = b[c[i]];
780 
781   /* forward solve the U^T */
782   for (i=0; i<n; i++) {
783     v   = aa + diag[i] ;
784     vi  = aj + diag[i] + 1;
785     nz  = ai[i+1] - diag[i] - 1;
786     s1  = tmp[i];
787     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
788     while (nz--) {
789       tmp[*vi++ ] -= (*v++)*s1;
790     }
791     tmp[i] = s1;
792   }
793 
794   /* backward solve the L^T */
795   for (i=n-1; i>=0; i--){
796     v   = aa + diag[i] - 1 ;
797     vi  = aj + diag[i] - 1 ;
798     nz  = diag[i] - ai[i];
799     s1  = tmp[i];
800     while (nz--) {
801       tmp[*vi-- ] -= (*v--)*s1;
802     }
803   }
804 
805   /* copy tmp into x according to permutation */
806   for (i=0; i<n; i++) x[r[i]] = tmp[i];
807 
808   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
809   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
810   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
811   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
812 
813   PetscLogFlops(2*a->nz-A->n);
814   PetscFunctionReturn(0);
815 }
816 
817 #undef __FUNCT__
818 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
819 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
820 {
821   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
822   IS             iscol = a->col,isrow = a->row;
823   PetscErrorCode ierr;
824   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
825   PetscInt       nz,*rout,*cout,*diag = a->diag;
826   PetscScalar    *x,*b,*tmp,*aa = a->a,*v;
827 
828   PetscFunctionBegin;
829   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
830 
831   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
832   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
833   tmp = a->solve_work;
834 
835   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
836   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
837 
838   /* copy the b into temp work space according to permutation */
839   for (i=0; i<n; i++) tmp[i] = b[c[i]];
840 
841   /* forward solve the U^T */
842   for (i=0; i<n; i++) {
843     v   = aa + diag[i] ;
844     vi  = aj + diag[i] + 1;
845     nz  = ai[i+1] - diag[i] - 1;
846     tmp[i] *= *v++;
847     while (nz--) {
848       tmp[*vi++ ] -= (*v++)*tmp[i];
849     }
850   }
851 
852   /* backward solve the L^T */
853   for (i=n-1; i>=0; i--){
854     v   = aa + diag[i] - 1 ;
855     vi  = aj + diag[i] - 1 ;
856     nz  = diag[i] - ai[i];
857     while (nz--) {
858       tmp[*vi-- ] -= (*v--)*tmp[i];
859     }
860   }
861 
862   /* copy tmp into x according to permutation */
863   for (i=0; i<n; i++) x[r[i]] += tmp[i];
864 
865   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
866   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
867   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
868   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
869 
870   PetscLogFlops(2*a->nz);
871   PetscFunctionReturn(0);
872 }
873 /* ----------------------------------------------------------------*/
874 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat);
875 
876 #undef __FUNCT__
877 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
878 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
879 {
880   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
881   IS             isicol;
882   PetscErrorCode ierr;
883   PetscInt       *r,*ic,prow,n = A->m,*ai = a->i,*aj = a->j;
884   PetscInt       *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev;
885   PetscInt       *dloc,idx,row,m,fm,nzf,nzi,len, reallocs = 0,dcount = 0;
886   PetscInt       incrlev,nnz,i,levels,diagonal_fill;
887   PetscTruth     col_identity,row_identity;
888   PetscReal      f;
889 
890   PetscFunctionBegin;
891   f             = info->fill;
892   levels        = (PetscInt)info->levels;
893   diagonal_fill = (PetscInt)info->diagonal_fill;
894   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
895 
896   /* special case that simply copies fill pattern */
897   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
898   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
899   if (!levels && row_identity && col_identity) {
900     ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
901     (*fact)->factor = FACTOR_LU;
902     b               = (Mat_SeqAIJ*)(*fact)->data;
903     if (!b->diag) {
904       ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
905     }
906     ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
907     b->row              = isrow;
908     b->col              = iscol;
909     b->icol             = isicol;
910     b->lu_damping       = info->damping;
911     b->lu_zeropivot     = info->zeropivot;
912     b->lu_shift         = info->shift;
913     b->lu_shift_fraction= info->shift_fraction;
914     ierr                = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
915     (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
916     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
917     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
918     PetscFunctionReturn(0);
919   }
920 
921   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
922   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
923 
924   /* get new row pointers */
925   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr);
926   ainew[0] = 0;
927   /* don't know how many column pointers are needed so estimate */
928   jmax = (PetscInt)(f*(ai[n]+1));
929   ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr);
930   /* ajfill is level of fill for each fill entry */
931   ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajfill);CHKERRQ(ierr);
932   /* fill is a linked list of nonzeros in active row */
933   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr);
934   /* im is level for each filled value */
935   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&im);CHKERRQ(ierr);
936   /* dloc is location of diagonal in factor */
937   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&dloc);CHKERRQ(ierr);
938   dloc[0]  = 0;
939   for (prow=0; prow<n; prow++) {
940 
941     /* copy current row into linked list */
942     nzf     = nz  = ai[r[prow]+1] - ai[r[prow]];
943     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
944     xi      = aj + ai[r[prow]] ;
945     fill[n]    = n;
946     fill[prow] = -1; /* marker to indicate if diagonal exists */
947     while (nz--) {
948       fm  = n;
949       idx = ic[*xi++ ];
950       do {
951         m  = fm;
952         fm = fill[m];
953       } while (fm < idx);
954       fill[m]   = idx;
955       fill[idx] = fm;
956       im[idx]   = 0;
957     }
958 
959     /* make sure diagonal entry is included */
960     if (diagonal_fill && fill[prow] == -1) {
961       fm = n;
962       while (fill[fm] < prow) fm = fill[fm];
963       fill[prow] = fill[fm]; /* insert diagonal into linked list */
964       fill[fm]   = prow;
965       im[prow]   = 0;
966       nzf++;
967       dcount++;
968     }
969 
970     nzi = 0;
971     row = fill[n];
972     while (row < prow) {
973       incrlev = im[row] + 1;
974       nz      = dloc[row];
975       xi      = ajnew  + ainew[row]  + nz + 1;
976       flev    = ajfill + ainew[row]  + nz + 1;
977       nnz     = ainew[row+1] - ainew[row] - nz - 1;
978       fm      = row;
979       while (nnz-- > 0) {
980         idx = *xi++ ;
981         if (*flev + incrlev > levels) {
982           flev++;
983           continue;
984         }
985         do {
986           m  = fm;
987           fm = fill[m];
988         } while (fm < idx);
989         if (fm != idx) {
990           im[idx]   = *flev + incrlev;
991           fill[m]   = idx;
992           fill[idx] = fm;
993           fm        = idx;
994           nzf++;
995         } else {
996           if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev;
997         }
998         flev++;
999       }
1000       row = fill[row];
1001       nzi++;
1002     }
1003     /* copy new filled row into permanent storage */
1004     ainew[prow+1] = ainew[prow] + nzf;
1005     if (ainew[prow+1] > jmax) {
1006 
1007       /* estimate how much additional space we will need */
1008       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
1009       /* just double the memory each time */
1010       /*  maxadd = (PetscInt)((f*(ai[n]+!shift)*(n-prow+5))/n); */
1011       PetscInt maxadd = jmax;
1012       if (maxadd < nzf) maxadd = (n-prow)*(nzf+1);
1013       jmax += maxadd;
1014 
1015       /* allocate a longer ajnew and ajfill */
1016       ierr   = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr);
1017       ierr   = PetscMemcpy(xi,ajnew,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr);
1018       ierr   = PetscFree(ajnew);CHKERRQ(ierr);
1019       ajnew  = xi;
1020       ierr   = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr);
1021       ierr   = PetscMemcpy(xi,ajfill,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr);
1022       ierr   = PetscFree(ajfill);CHKERRQ(ierr);
1023       ajfill = xi;
1024       reallocs++; /* count how many times we realloc */
1025     }
1026     xi          = ajnew + ainew[prow] ;
1027     flev        = ajfill + ainew[prow] ;
1028     dloc[prow]  = nzi;
1029     fm          = fill[n];
1030     while (nzf--) {
1031       *xi++   = fm ;
1032       *flev++ = im[fm];
1033       fm      = fill[fm];
1034     }
1035     /* make sure row has diagonal entry */
1036     if (ajnew[ainew[prow]+dloc[prow]] != prow) {
1037       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
1038     try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow);
1039     }
1040   }
1041   ierr = PetscFree(ajfill);CHKERRQ(ierr);
1042   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1043   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1044   ierr = PetscFree(fill);CHKERRQ(ierr);
1045   ierr = PetscFree(im);CHKERRQ(ierr);
1046 
1047   {
1048     PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
1049     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
1050     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af);
1051     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af);
1052     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n");
1053     if (diagonal_fill) {
1054       PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount);
1055     }
1056   }
1057 
1058   /* put together the new matrix */
1059   ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr);
1060   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
1061   ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr);
1062   PetscLogObjectParent(*fact,isicol);
1063   b = (Mat_SeqAIJ*)(*fact)->data;
1064   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1065   b->singlemalloc = PETSC_FALSE;
1066   len = (ainew[n] )*sizeof(PetscScalar);
1067   /* the next line frees the default space generated by the Create() */
1068   ierr = PetscFree(b->a);CHKERRQ(ierr);
1069   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1070   ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr);
1071   b->j          = ajnew;
1072   b->i          = ainew;
1073   for (i=0; i<n; i++) dloc[i] += ainew[i];
1074   b->diag       = dloc;
1075   b->ilen       = 0;
1076   b->imax       = 0;
1077   b->row        = isrow;
1078   b->col        = iscol;
1079   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1080   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1081   b->icol       = isicol;
1082   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1083   /* In b structure:  Free imax, ilen, old a, old j.
1084      Allocate dloc, solve_work, new a, new j */
1085   PetscLogObjectMemory(*fact,(ainew[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));
1086   b->maxnz             = b->nz = ainew[n] ;
1087   b->lu_damping        = info->damping;
1088   b->lu_shift          = info->shift;
1089   b->lu_shift_fraction = info->shift_fraction;
1090   b->lu_zeropivot = info->zeropivot;
1091   (*fact)->factor = FACTOR_LU;
1092   ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr);
1093   (*fact)->ops->lufactornumeric =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
1094 
1095   (*fact)->info.factor_mallocs    = reallocs;
1096   (*fact)->info.fill_ratio_given  = f;
1097   (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]);
1098   PetscFunctionReturn(0);
1099 }
1100 
1101 #include "src/mat/impls/sbaij/seq/sbaij.h"
1102 #undef __FUNCT__
1103 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
1104 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,Mat *B)
1105 {
1106   Mat            C = *B;
1107   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1108   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
1109   IS             ip=b->row;
1110   PetscErrorCode ierr;
1111   PetscInt       *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol;
1112   PetscInt       *ai=a->i,*aj=a->j;
1113   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1114   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1115   PetscReal      damping=b->factor_damping,zeropivot=b->factor_zeropivot,shift_amount;
1116   PetscTruth     damp,chshift;
1117   PetscInt       nshift=0,ndamp=0;
1118 
1119   PetscFunctionBegin;
1120   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
1121 
1122   /* initialization */
1123   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1124   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
1125   jl   = il + mbs;
1126   rtmp = (MatScalar*)(jl + mbs);
1127 
1128   shift_amount = 0;
1129   do {
1130     damp = PETSC_FALSE;
1131     chshift = PETSC_FALSE;
1132     for (i=0; i<mbs; i++) {
1133       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1134     }
1135 
1136     for (k = 0; k<mbs; k++){
1137       bval = ba + bi[k];
1138       /* initialize k-th row by the perm[k]-th row of A */
1139       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1140       for (j = jmin; j < jmax; j++){
1141         col = rip[aj[j]];
1142         if (col >= k){ /* only take upper triangular entry */
1143           rtmp[col] = aa[j];
1144           *bval++  = 0.0; /* for in-place factorization */
1145         }
1146       }
1147       /* damp the diagonal of the matrix */
1148       if (ndamp||nshift) rtmp[k] += damping+shift_amount;
1149 
1150       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1151       dk = rtmp[k];
1152       i = jl[k]; /* first row to be added to k_th row  */
1153 
1154       while (i < k){
1155         nexti = jl[i]; /* next row to be added to k_th row */
1156 
1157         /* compute multiplier, update diag(k) and U(i,k) */
1158         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1159         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
1160         dk += uikdi*ba[ili];
1161         ba[ili] = uikdi; /* -U(i,k) */
1162 
1163         /* add multiple of row i to k-th row */
1164         jmin = ili + 1; jmax = bi[i+1];
1165         if (jmin < jmax){
1166           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1167           /* update il and jl for row i */
1168           il[i] = jmin;
1169           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1170         }
1171         i = nexti;
1172       }
1173 
1174       if (PetscRealPart(dk) < zeropivot && b->factor_shift){
1175 	/* calculate a shift that would make this row diagonally dominant */
1176 	PetscReal rs = PetscAbs(PetscRealPart(dk));
1177 	jmin      = bi[k]+1;
1178 	nz        = bi[k+1] - jmin;
1179 	if (nz){
1180 	  bcol = bj + jmin;
1181 	  bval = ba + jmin;
1182 	  while (nz--){
1183 	    rs += PetscAbsScalar(rtmp[*bcol++]);
1184 	  }
1185 	}
1186 	/* if this shift is less than the previous, just up the previous
1187 	   one by a bit */
1188 	shift_amount = PetscMax(rs,1.1*shift_amount);
1189 	chshift  = PETSC_TRUE;
1190 	/* Unlike in the ILU case there is no exit condition on nshift:
1191 	   we increase the shift until it converges. There is no guarantee that
1192 	   this algorithm converges faster or slower, or is better or worse
1193 	   than the ILU algorithm. */
1194 	nshift++;
1195 	break;
1196       }
1197       if (PetscRealPart(dk) < zeropivot){
1198         if (damping == (PetscReal) PETSC_DECIDE) damping = -PetscRealPart(dk)/(k+1);
1199         if (damping > 0.0) {
1200           if (ndamp) damping *= 2.0;
1201           damp = PETSC_TRUE;
1202           ndamp++;
1203           break;
1204         } else if (PetscAbsScalar(dk) < zeropivot){
1205           SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g",k,PetscRealPart(dk),zeropivot);
1206         } else {
1207           PetscLogInfo((PetscObject)A,"Negative pivot %g in row %D of Cholesky factorization\n",PetscRealPart(dk),k);
1208         }
1209       }
1210 
1211       /* copy data into U(k,:) */
1212       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1213       jmin = bi[k]+1; jmax = bi[k+1];
1214       if (jmin < jmax) {
1215         for (j=jmin; j<jmax; j++){
1216           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1217         }
1218         /* add the k-th row into il and jl */
1219         il[k] = jmin;
1220         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1221       }
1222     }
1223   } while (damp||chshift);
1224   ierr = PetscFree(il);CHKERRQ(ierr);
1225 
1226   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
1227   C->factor       = FACTOR_CHOLESKY;
1228   C->assembled    = PETSC_TRUE;
1229   C->preallocated = PETSC_TRUE;
1230   PetscLogFlops(C->m);
1231   if (ndamp) {
1232     PetscLogInfo(0,"MatCholeskyFactorNumerical_SeqAIJ: number of damping tries %D damping value %g\n",ndamp,damping);
1233   }
1234   if (nshift) {
1235     PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ diagonal shifted %D shifts\n",nshift);
1236   }
1237   PetscFunctionReturn(0);
1238 }
1239 #undef __FUNCT__
1240 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering"
1241 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering(Mat A,Mat *fact)
1242 {
1243   Mat            C = *fact;
1244   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1245   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
1246   PetscErrorCode ierr;
1247   PetscInt       i,j,am = A->m;
1248   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
1249   PetscInt       k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz,ndamp = 0;
1250   MatScalar      *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval;
1251   PetscReal      damping=b->factor_damping, zeropivot=b->factor_zeropivot,shift_amount;
1252   PetscTruth     damp,chshift;
1253   PetscInt       nshift=0;
1254 
1255   PetscFunctionBegin;
1256   /* initialization */
1257   nz   = (2*am+1)*sizeof(PetscInt)+am*sizeof(MatScalar);
1258   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
1259   jl   = il + am;
1260   rtmp = (MatScalar*)(jl + am);
1261 
1262   shift_amount = 0;
1263   do {
1264     damp = PETSC_FALSE;
1265     chshift = PETSC_FALSE;
1266     for (i=0; i<am; i++) {
1267       rtmp[i] = 0.0; jl[i] = am; il[0] = 0;
1268     }
1269 
1270     for (k = 0; k<am; k++){
1271     /* initialize k-th row with elements nonzero in row perm(k) of A */
1272       nz   = ai[k+1] - ai[k];
1273       acol = aj + ai[k];
1274       aval = aa + ai[k];
1275       bval = ba + bi[k];
1276       while (nz -- ){
1277         if (*acol < k) { /* skip lower triangular entries */
1278           acol++; aval++;
1279         } else {
1280           rtmp[*acol++] = *aval++;
1281           *bval++       = 0.0; /* for in-place factorization */
1282         }
1283       }
1284       /* damp the diagonal of the matrix */
1285       if (ndamp||nshift) rtmp[k] += damping+shift_amount;
1286 
1287       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1288       dk = rtmp[k];
1289       i  = jl[k]; /* first row to be added to k_th row  */
1290 
1291       while (i < k){
1292         nexti = jl[i]; /* next row to be added to k_th row */
1293         /* compute multiplier, update D(k) and U(i,k) */
1294         ili   = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1295         uikdi = - ba[ili]*ba[bi[i]];
1296         dk   += uikdi*ba[ili];
1297         ba[ili] = uikdi; /* -U(i,k) */
1298 
1299         /* add multiple of row i to k-th row ... */
1300         jmin = ili + 1;
1301         nz   = bi[i+1] - jmin;
1302         if (nz > 0){
1303           bcol = bj + jmin;
1304           bval = ba + jmin;
1305           while (nz --) rtmp[*bcol++] += uikdi*(*bval++);
1306           /* update il and jl for i-th row */
1307           il[i] = jmin;
1308           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1309         }
1310         i = nexti;
1311       }
1312 
1313       if (PetscRealPart(dk) < zeropivot && b->factor_shift){
1314 	/* calculate a shift that would make this row diagonally dominant */
1315 	PetscReal rs = PetscAbs(PetscRealPart(dk));
1316 	jmin      = bi[k]+1;
1317 	nz        = bi[k+1] - jmin;
1318 	if (nz){
1319 	  bcol = bj + jmin;
1320 	  bval = ba + jmin;
1321 	  while (nz--){
1322 	    rs += PetscAbsScalar(rtmp[*bcol++]);
1323 	  }
1324 	}
1325 	/* if this shift is less than the previous, just up the previous
1326 	   one by a bit */
1327 	shift_amount = PetscMax(rs,1.1*shift_amount);
1328 	chshift  = PETSC_TRUE;
1329 	/* Unlike in the ILU case there is no exit condition on nshift:
1330 	   we increase the shift until it converges. There is no guarantee that
1331 	   this algorithm converges faster or slower, or is better or worse
1332 	   than the ILU algorithm. */
1333 	nshift++;
1334 	break;
1335       }
1336       if (PetscRealPart(dk) < zeropivot){
1337         if (damping == (PetscReal) PETSC_DECIDE) damping = -PetscRealPart(dk)/(k+1);
1338         if (damping > 0.0) {
1339           if (ndamp) damping *= 2.0;
1340           damp = PETSC_TRUE;
1341           ndamp++;
1342           break;
1343         } else if (PetscAbsScalar(dk) < zeropivot){
1344           SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g",k,PetscRealPart(dk),zeropivot);
1345         } else {
1346           PetscLogInfo((PetscObject)A,"Negative pivot %g in row %D of Cholesky factorization\n",PetscRealPart(dk),k);
1347         }
1348       }
1349 
1350       /* copy data into U(k,:) */
1351       ba[bi[k]] = 1.0/dk;
1352       jmin      = bi[k]+1;
1353       nz        = bi[k+1] - jmin;
1354       if (nz){
1355         bcol = bj + jmin;
1356         bval = ba + jmin;
1357         while (nz--){
1358           *bval++       = rtmp[*bcol];
1359           rtmp[*bcol++] = 0.0;
1360         }
1361         /* add k-th row into il and jl */
1362         il[k] = jmin;
1363         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1364       }
1365     }
1366   } while (damp||chshift);
1367   ierr = PetscFree(il);CHKERRQ(ierr);
1368 
1369   C->factor       = FACTOR_CHOLESKY;
1370   C->assembled    = PETSC_TRUE;
1371   C->preallocated = PETSC_TRUE;
1372   PetscLogFlops(C->m);
1373   if (ndamp) {
1374     PetscLogInfo(0,"MatCholeskyFactorNumerical_SeqAIJ_NaturalOrdering: number of damping tries %D damping value %g\n",ndamp,damping);
1375   }
1376   if (nshift) {
1377     PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering diagonal shifted %D shifts\n",nshift);
1378   }
1379   PetscFunctionReturn(0);
1380 }
1381 
1382 #include "petscbt.h"
1383 #include "src/mat/utils/freespace.h"
1384 #undef __FUNCT__
1385 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
1386 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1387 {
1388   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1389   Mat_SeqSBAIJ   *b;
1390   Mat            B;
1391   PetscErrorCode ierr;
1392   PetscTruth     perm_identity;
1393   PetscInt       reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui;
1394   PetscInt       jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1395   PetscInt       nlnk,*lnk,*lnk_lvl,ncols,ncols_upper,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr;
1396   PetscReal      fill=info->fill,levels=info->levels;
1397   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1398   FreeSpaceList  free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1399   PetscBT        lnkbt;
1400 
1401   PetscFunctionBegin;
1402   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1403   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1404 
1405   /* special case that simply copies fill pattern */
1406   if (!levels && perm_identity) {
1407     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
1408     ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1409     for (i=0; i<am; i++) {
1410       ui[i] = ai[i+1] - a->diag[i]; /* ui: rowlengths - changes when !perm_identity */
1411     }
1412     ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr);
1413     B = *fact;
1414     ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1415     ierr = MatSeqSBAIJSetPreallocation(B,1,0,ui);CHKERRQ(ierr);
1416 
1417     b  = (Mat_SeqSBAIJ*)B->data;
1418     uj = b->j;
1419     for (i=0; i<am; i++) {
1420       aj = a->j + a->diag[i];
1421       for (j=0; j<ui[i]; j++){
1422         *uj++ = *aj++;
1423       }
1424       b->ilen[i] = ui[i];
1425     }
1426     ierr = PetscFree(ui);CHKERRQ(ierr);
1427     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1428     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1429 
1430     B->ops->solve                 = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1431     B->ops->solvetranspose        = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1432     ierr = PetscObjectComposeFunction((PetscObject)B,"MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering","dummyname",(FCNVOID)B->ops->choleskyfactornumeric);CHKERRQ(ierr);
1433     B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering;
1434     PetscFunctionReturn(0);
1435   }
1436 
1437   /* initialization */
1438   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1439   ui[0] = 0;
1440   ierr  = PetscMalloc((2*am+1)*sizeof(PetscInt),&cols_lvl);CHKERRQ(ierr);
1441 
1442   /* jl: linked list for storing indices of the pivot rows
1443      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1444   ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr);
1445   il         = jl + am;
1446   uj_ptr     = (PetscInt**)(il + am);
1447   uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1448   for (i=0; i<am; i++){
1449     jl[i] = am; il[i] = 0;
1450   }
1451 
1452   /* create and initialize a linked list for storing column indices of the active row k */
1453   nlnk = am + 1;
1454   ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1455 
1456   /* initial FreeSpace size is fill*(ai[am]+1) */
1457   ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1458   current_space = free_space;
1459   ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
1460   current_space_lvl = free_space_lvl;
1461 
1462   for (k=0; k<am; k++){  /* for each active row k */
1463     /* initialize lnk by the column indices of row rip[k] of A */
1464     nzk   = 0;
1465     ncols = ai[rip[k]+1] - ai[rip[k]];
1466     ncols_upper = 0;
1467     cols        = cols_lvl + am;
1468     for (j=0; j<ncols; j++){
1469       i = rip[*(aj + ai[rip[k]] + j)];
1470       if (i >= k){ /* only take upper triangular entry */
1471         cols[ncols_upper] = i;
1472         cols_lvl[ncols_upper] = -1;  /* initialize level for nonzero entries */
1473         ncols_upper++;
1474       }
1475     }
1476     ierr = PetscIncompleteLLAdd(ncols_upper,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1477     nzk += nlnk;
1478 
1479     /* update lnk by computing fill-in for each pivot row to be merged in */
1480     prow = jl[k]; /* 1st pivot row */
1481 
1482     while (prow < k){
1483       nextprow = jl[prow];
1484 
1485       /* merge prow into k-th row */
1486       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1487       jmax = ui[prow+1];
1488       ncols = jmax-jmin;
1489       i     = jmin - ui[prow];
1490       cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1491       for (j=0; j<ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + i + j);
1492       ierr = PetscIncompleteLLAdd(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1493       nzk += nlnk;
1494 
1495       /* update il and jl for prow */
1496       if (jmin < jmax){
1497         il[prow] = jmin;
1498         j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1499       }
1500       prow = nextprow;
1501     }
1502 
1503     /* if free space is not available, make more free space */
1504     if (current_space->local_remaining<nzk) {
1505       i = am - k + 1; /* num of unfactored rows */
1506       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1507       ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
1508       ierr = GetMoreSpace(i,&current_space_lvl);CHKERRQ(ierr);
1509       reallocs++;
1510     }
1511 
1512     /* copy data into free_space and free_space_lvl, then initialize lnk */
1513     ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1514 
1515     /* add the k-th row into il and jl */
1516     if (nzk-1 > 0){
1517       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1518       jl[k] = jl[i]; jl[i] = k;
1519       il[k] = ui[k] + 1;
1520     }
1521     uj_ptr[k]     = current_space->array;
1522     uj_lvl_ptr[k] = current_space_lvl->array;
1523 
1524     current_space->array           += nzk;
1525     current_space->local_used      += nzk;
1526     current_space->local_remaining -= nzk;
1527 
1528     current_space_lvl->array           += nzk;
1529     current_space_lvl->local_used      += nzk;
1530     current_space_lvl->local_remaining -= nzk;
1531 
1532     ui[k+1] = ui[k] + nzk;
1533   }
1534 
1535   if (ai[am] != 0) {
1536     PetscReal af = ((PetscReal)(2*ui[am]-am))/((PetscReal)ai[am]);
1537     PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af);
1538     PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af);
1539     PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af);
1540   } else {
1541      PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n");
1542   }
1543 
1544   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1545   ierr = PetscFree(jl);CHKERRQ(ierr);
1546   ierr = PetscFree(cols_lvl);CHKERRQ(ierr);
1547 
1548   /* destroy list of free space and other temporary array(s) */
1549   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1550   ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1551   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1552   ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr);
1553 
1554   /* put together the new matrix in MATSEQSBAIJ format */
1555   ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr);
1556   B = *fact;
1557   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1558   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
1559 
1560   b = (Mat_SeqSBAIJ*)B->data;
1561   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1562   b->singlemalloc = PETSC_FALSE;
1563   /* the next line frees the default space generated by the Create() */
1564   ierr = PetscFree(b->a);CHKERRQ(ierr);
1565   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1566   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1567   b->j    = uj;
1568   b->i    = ui;
1569   b->diag = 0;
1570   b->ilen = 0;
1571   b->imax = 0;
1572   b->row  = perm;
1573   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1574   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1575   b->icol = perm;
1576   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1577   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1578   PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
1579   b->maxnz = b->nz = ui[am];
1580 
1581   B->factor                 = FACTOR_CHOLESKY;
1582   B->info.factor_mallocs    = reallocs;
1583   B->info.fill_ratio_given  = fill;
1584   if (ai[am] != 0) {
1585     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1586   } else {
1587     B->info.fill_ratio_needed = 0.0;
1588   }
1589   if (perm_identity){
1590     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1591     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1592     ierr = PetscObjectComposeFunction((PetscObject)B,"MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering","dummyname",(FCNVOID)B->ops->choleskyfactornumeric);CHKERRQ(ierr);
1593     B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering;
1594   } else {
1595     (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1596   }
1597   PetscFunctionReturn(0);
1598 }
1599 
1600 #undef __FUNCT__
1601 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
1602 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1603 {
1604   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1605   Mat_SeqSBAIJ   *b;
1606   Mat            B;
1607   PetscErrorCode ierr;
1608   PetscTruth     perm_identity;
1609   PetscReal      fill = info->fill;
1610   PetscInt       *rip,*riip,i,mbs=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow;
1611   PetscInt       *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1612   PetscInt       nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1613   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1614   PetscBT        lnkbt;
1615   IS             iperm;
1616 
1617   PetscFunctionBegin;
1618   /* check whether perm is the identity mapping */
1619   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1620   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1621 
1622   if (!perm_identity){
1623     /* check if perm is symmetric! */
1624     ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1625     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1626     for (i=0; i<mbs; i++) {
1627       if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation");
1628     }
1629     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1630     ierr = ISDestroy(iperm);CHKERRQ(ierr);
1631   }
1632 
1633   /* initialization */
1634   ierr  = PetscMalloc((mbs+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1635   ui[0] = 0;
1636 
1637   /* jl: linked list for storing indices of the pivot rows
1638      il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
1639   ierr = PetscMalloc((3*mbs+1)*sizeof(PetscInt)+mbs*sizeof(PetscInt*),&jl);CHKERRQ(ierr);
1640   il     = jl + mbs;
1641   cols   = il + mbs;
1642   ui_ptr = (PetscInt**)(cols + mbs);
1643   for (i=0; i<mbs; i++){
1644     jl[i] = mbs; il[i] = 0;
1645   }
1646 
1647   /* create and initialize a linked list for storing column indices of the active row k */
1648   nlnk = mbs + 1;
1649   ierr = PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1650 
1651   /* initial FreeSpace size is fill*(ai[mbs]+1) */
1652   ierr = GetMoreSpace((PetscInt)(fill*(ai[mbs]+1)),&free_space);CHKERRQ(ierr);
1653   current_space = free_space;
1654 
1655   for (k=0; k<mbs; k++){  /* for each active row k */
1656     /* initialize lnk by the column indices of row rip[k] of A */
1657     nzk   = 0;
1658     ncols = ai[rip[k]+1] - ai[rip[k]];
1659     ncols_upper = 0;
1660     for (j=0; j<ncols; j++){
1661       i = rip[*(aj + ai[rip[k]] + j)];
1662       if (i >= k){ /* only take upper triangular entry */
1663         cols[ncols_upper] = i;
1664         ncols_upper++;
1665       }
1666     }
1667     ierr = PetscLLAdd(ncols_upper,cols,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1668     nzk += nlnk;
1669 
1670     /* update lnk by computing fill-in for each pivot row to be merged in */
1671     prow = jl[k]; /* 1st pivot row */
1672 
1673     while (prow < k){
1674       nextprow = jl[prow];
1675       /* merge prow into k-th row */
1676       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
1677       jmax = ui[prow+1];
1678       ncols = jmax-jmin;
1679       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
1680       ierr = PetscLLAdd(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1681       nzk += nlnk;
1682 
1683       /* update il and jl for prow */
1684       if (jmin < jmax){
1685         il[prow] = jmin;
1686         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1687       }
1688       prow = nextprow;
1689     }
1690 
1691     /* if free space is not available, make more free space */
1692     if (current_space->local_remaining<nzk) {
1693       i = mbs - k + 1; /* num of unfactored rows */
1694       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1695       ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
1696       reallocs++;
1697     }
1698 
1699     /* copy data into free space, then initialize lnk */
1700     ierr = PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1701 
1702     /* add the k-th row into il and jl */
1703     if (nzk-1 > 0){
1704       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
1705       jl[k] = jl[i]; jl[i] = k;
1706       il[k] = ui[k] + 1;
1707     }
1708     ui_ptr[k] = current_space->array;
1709     current_space->array           += nzk;
1710     current_space->local_used      += nzk;
1711     current_space->local_remaining -= nzk;
1712 
1713     ui[k+1] = ui[k] + nzk;
1714   }
1715 
1716   if (ai[mbs] != 0) {
1717     PetscReal af = ((PetscReal)(2*ui[mbs]-mbs))/((PetscReal)ai[mbs]);
1718     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af);
1719     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af);
1720     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af);
1721   } else {
1722      PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n");
1723   }
1724 
1725   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1726   ierr = PetscFree(jl);CHKERRQ(ierr);
1727 
1728   /* destroy list of free space and other temporary array(s) */
1729   ierr = PetscMalloc((ui[mbs]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1730   ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1731   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1732 
1733   /* put together the new matrix in MATSEQSBAIJ format */
1734   ierr = MatCreate(PETSC_COMM_SELF,mbs,mbs,mbs,mbs,fact);CHKERRQ(ierr);
1735   B    = *fact;
1736   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1737   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
1738 
1739   b = (Mat_SeqSBAIJ*)B->data;
1740   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1741   b->singlemalloc = PETSC_FALSE;
1742   /* the next line frees the default space generated by the Create() */
1743   ierr = PetscFree(b->a);CHKERRQ(ierr);
1744   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1745   ierr = PetscMalloc((ui[mbs]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1746   b->j    = uj;
1747   b->i    = ui;
1748   b->diag = 0;
1749   b->ilen = 0;
1750   b->imax = 0;
1751   b->row  = perm;
1752   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1753   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1754   b->icol = perm;
1755   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1756   ierr    = PetscMalloc((mbs+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1757   PetscLogObjectMemory(B,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));
1758   b->maxnz = b->nz = ui[mbs];
1759 
1760   B->factor                 = FACTOR_CHOLESKY;
1761   B->info.factor_mallocs    = reallocs;
1762   B->info.fill_ratio_given  = fill;
1763   if (ai[mbs] != 0) {
1764     B->info.fill_ratio_needed = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]);
1765   } else {
1766     B->info.fill_ratio_needed = 0.0;
1767   }
1768   if (perm_identity){
1769     (*fact)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1770     (*fact)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1771     ierr = PetscObjectComposeFunction((PetscObject)(*fact),"MatCholeskyFactorNumeric_NaturalOrdering","dummyname",(FCNVOID)(*fact)->ops->choleskyfactornumeric);CHKERRQ(ierr);
1772     (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering;
1773   } else {
1774     (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1775   }
1776   PetscFunctionReturn(0);
1777 }
1778