xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision 9ee9b77b2cca71d2a08ebc06fc4da7cd72e3025a)
1 
2 #include "src/mat/impls/aij/seq/aij.h"
3 #include "src/inline/dot.h"
4 #include "src/inline/spops.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ"
8 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
9 {
10   PetscFunctionBegin;
11 
12   SETERRQ(PETSC_ERR_SUP,"Code not written");
13 #if !defined(PETSC_USE_DEBUG)
14   PetscFunctionReturn(0);
15 #endif
16 }
17 
18 
19 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat);
20 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth);
21 
22 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
23 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
24 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*);
25 
26 #undef __FUNCT__
27 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
28   /* ------------------------------------------------------------
29 
30           This interface was contribed by Tony Caola
31 
32      This routine is an interface to the pivoting drop-tolerance
33      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
34      SPARSEKIT2.
35 
36      The SPARSEKIT2 routines used here are covered by the GNU
37      copyright; see the file gnu in this directory.
38 
39      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
40      help in getting this routine ironed out.
41 
42      The major drawback to this routine is that if info->fill is
43      not large enough it fails rather than allocating more space;
44      this can be fixed by hacking/improving the f2c version of
45      Yousef Saad's code.
46 
47      ------------------------------------------------------------
48 */
49 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact)
50 {
51 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
52   PetscFunctionBegin;
53   SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
54   You can obtain the drop tolerance routines by installing PETSc from\n\
55   www.mcs.anl.gov/petsc\n");
56 #else
57   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
58   IS             iscolf,isicol,isirow;
59   PetscTruth     reorder;
60   PetscErrorCode ierr,sierr;
61   PetscInt       *c,*r,*ic,i,n = A->m;
62   PetscInt       *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
63   PetscInt       *ordcol,*iwk,*iperm,*jw;
64   PetscInt       jmax,lfill,job,*o_i,*o_j;
65   PetscScalar    *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
66   PetscReal      af;
67 
68   PetscFunctionBegin;
69 
70   if (info->dt == PETSC_DEFAULT)      info->dt      = .005;
71   if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax);
72   if (info->dtcol == PETSC_DEFAULT)   info->dtcol   = .01;
73   if (info->fill == PETSC_DEFAULT)    info->fill    = ((double)(n*(info->dtcount+1)))/a->nz;
74   lfill   = (PetscInt)(info->dtcount/2.0);
75   jmax    = (PetscInt)(info->fill*a->nz);
76 
77 
78   /* ------------------------------------------------------------
79      If reorder=.TRUE., then the original matrix has to be
80      reordered to reflect the user selected ordering scheme, and
81      then de-reordered so it is in it's original format.
82      Because Saad's dperm() is NOT in place, we have to copy
83      the original matrix and allocate more storage. . .
84      ------------------------------------------------------------
85   */
86 
87   /* set reorder to true if either isrow or iscol is not identity */
88   ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr);
89   if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);}
90   reorder = PetscNot(reorder);
91 
92 
93   /* storage for ilu factor */
94   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr);
95   ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr);
96   ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr);
97   ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr);
98 
99   /* ------------------------------------------------------------
100      Make sure that everything is Fortran formatted (1-Based)
101      ------------------------------------------------------------
102   */
103   for (i=old_i[0];i<old_i[n];i++) {
104     old_j[i]++;
105   }
106   for(i=0;i<n+1;i++) {
107     old_i[i]++;
108   };
109 
110 
111   if (reorder) {
112     ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr);
113     ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
114     for(i=0;i<n;i++) {
115       r[i]  = r[i]+1;
116       c[i]  = c[i]+1;
117     }
118     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr);
119     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr);
120     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr);
121     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
122     for (i=0;i<n;i++) {
123       r[i]  = r[i]-1;
124       c[i]  = c[i]-1;
125     }
126     ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr);
127     ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
128     o_a = old_a2;
129     o_j = old_j2;
130     o_i = old_i2;
131   } else {
132     o_a = old_a;
133     o_j = old_j;
134     o_i = old_i;
135   }
136 
137   /* ------------------------------------------------------------
138      Call Saad's ilutp() routine to generate the factorization
139      ------------------------------------------------------------
140   */
141 
142   ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr);
143   ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr);
144   ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr);
145 
146   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
147   if (sierr) {
148     switch (sierr) {
149       case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
150       case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
151       case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
152       case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
153       case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax);
154       default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
155     }
156   }
157 
158   ierr = PetscFree(w);CHKERRQ(ierr);
159   ierr = PetscFree(jw);CHKERRQ(ierr);
160 
161   /* ------------------------------------------------------------
162      Saad's routine gives the result in Modified Sparse Row (msr)
163      Convert to Compressed Sparse Row format (csr)
164      ------------------------------------------------------------
165   */
166 
167   ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr);
168   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr);
169 
170   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
171 
172   ierr = PetscFree(iwk);CHKERRQ(ierr);
173   ierr = PetscFree(wk);CHKERRQ(ierr);
174 
175   if (reorder) {
176     ierr = PetscFree(old_a2);CHKERRQ(ierr);
177     ierr = PetscFree(old_j2);CHKERRQ(ierr);
178     ierr = PetscFree(old_i2);CHKERRQ(ierr);
179   } else {
180     /* fix permutation of old_j that the factorization introduced */
181     for (i=old_i[0]; i<old_i[n]; i++) {
182       old_j[i-1] = iperm[old_j[i-1]-1];
183     }
184   }
185 
186   /* get rid of the shift to indices starting at 1 */
187   for (i=0; i<n+1; i++) {
188     old_i[i]--;
189   }
190   for (i=old_i[0];i<old_i[n];i++) {
191     old_j[i]--;
192   }
193 
194   /* Make the factored matrix 0-based */
195   for (i=0; i<n+1; i++) {
196     new_i[i]--;
197   }
198   for (i=new_i[0];i<new_i[n];i++) {
199     new_j[i]--;
200   }
201 
202   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
203   /*-- permute the right-hand-side and solution vectors           --*/
204   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
205   ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr);
206   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
207   for(i=0; i<n; i++) {
208     ordcol[i] = ic[iperm[i]-1];
209   };
210   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
211   ierr = ISDestroy(isicol);CHKERRQ(ierr);
212 
213   ierr = PetscFree(iperm);CHKERRQ(ierr);
214 
215   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr);
216   ierr = PetscFree(ordcol);CHKERRQ(ierr);
217 
218   /*----- put together the new matrix -----*/
219 
220   ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr);
221   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
222   ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr);
223   (*fact)->factor    = FACTOR_LU;
224   (*fact)->assembled = PETSC_TRUE;
225 
226   b = (Mat_SeqAIJ*)(*fact)->data;
227   ierr = PetscFree(b->imax);CHKERRQ(ierr);
228   b->sorted        = PETSC_FALSE;
229   b->singlemalloc  = PETSC_FALSE;
230   /* the next line frees the default space generated by the MatCreate() */
231   ierr             = PetscFree(b->a);CHKERRQ(ierr);
232   ierr             = PetscFree(b->ilen);CHKERRQ(ierr);
233   b->a             = new_a;
234   b->j             = new_j;
235   b->i             = new_i;
236   b->ilen          = 0;
237   b->imax          = 0;
238   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
239   b->row           = isirow;
240   b->col           = iscolf;
241   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
242   b->maxnz = b->nz = new_i[n];
243   ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
244   (*fact)->info.factor_mallocs = 0;
245 
246   ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
247 
248   /* check out for identical nodes. If found, use inode functions */
249   ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr);
250 
251   af = ((double)b->nz)/((double)a->nz) + .001;
252   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af);
253   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af);
254   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af);
255   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n");
256 
257   PetscFunctionReturn(0);
258 #endif
259 }
260 
261 /*
262     Factorization code for AIJ format.
263 */
264 #undef __FUNCT__
265 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
266 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
267 {
268   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
269   IS             isicol;
270   PetscErrorCode ierr;
271   PetscInt       *r,*ic,i,n = A->m,*ai = a->i,*aj = a->j;
272   PetscInt       *ainew,*ajnew,jmax,*fill,*ajtmp,nz;
273   PetscInt       *idnew,idx,row,m,fm,nnz,nzi,reallocs = 0,nzbd,*im;
274   PetscReal      f;
275 
276   PetscFunctionBegin;
277   if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
278   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
279   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
280   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
281 
282   /* get new row pointers */
283   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr);
284   ainew[0] = 0;
285   /* don't know how many column pointers are needed so estimate */
286   f = info->fill;
287   jmax  = (PetscInt)(f*ai[n]+1);
288   ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr);
289   /* fill is a linked list of nonzeros in active row */
290   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr);
291   im   = fill + n + 1;
292   /* idnew is location of diagonal in factor */
293   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&idnew);CHKERRQ(ierr);
294   idnew[0] = 0;
295 
296   for (i=0; i<n; i++) {
297     /* first copy previous fill into linked list */
298     nnz     = nz    = ai[r[i]+1] - ai[r[i]];
299     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
300     ajtmp   = aj + ai[r[i]];
301     fill[n] = n;
302     while (nz--) {
303       fm  = n;
304       idx = ic[*ajtmp++];
305       do {
306         m  = fm;
307         fm = fill[m];
308       } while (fm < idx);
309       fill[m]   = idx;
310       fill[idx] = fm;
311     }
312     row = fill[n];
313     while (row < i) {
314       ajtmp = ajnew + idnew[row] + 1;
315       nzbd  = 1 + idnew[row] - ainew[row];
316       nz    = im[row] - nzbd;
317       fm    = row;
318       while (nz-- > 0) {
319         idx = *ajtmp++ ;
320         nzbd++;
321         if (idx == i) im[row] = nzbd;
322         do {
323           m  = fm;
324           fm = fill[m];
325         } while (fm < idx);
326         if (fm != idx) {
327           fill[m]   = idx;
328           fill[idx] = fm;
329           fm        = idx;
330           nnz++;
331         }
332       }
333       row = fill[row];
334     }
335     /* copy new filled row into permanent storage */
336     ainew[i+1] = ainew[i] + nnz;
337     if (ainew[i+1] > jmax) {
338 
339       /* estimate how much additional space we will need */
340       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
341       /* just double the memory each time */
342       PetscInt maxadd = jmax;
343       /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */
344       if (maxadd < nnz) maxadd = (n-i)*(nnz+1);
345       jmax += maxadd;
346 
347       /* allocate a longer ajnew */
348       ierr  = PetscMalloc(jmax*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
349       ierr  = PetscMemcpy(ajtmp,ajnew,(ainew[i])*sizeof(PetscInt));CHKERRQ(ierr);
350       ierr  = PetscFree(ajnew);CHKERRQ(ierr);
351       ajnew = ajtmp;
352       reallocs++; /* count how many times we realloc */
353     }
354     ajtmp = ajnew + ainew[i];
355     fm    = fill[n];
356     nzi   = 0;
357     im[i] = nnz;
358     while (nnz--) {
359       if (fm < i) nzi++;
360       *ajtmp++ = fm ;
361       fm       = fill[fm];
362     }
363     idnew[i] = ainew[i] + nzi;
364   }
365   if (ai[n] != 0) {
366     PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
367     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
368     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af);
369     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af);
370     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n");
371   } else {
372     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n");
373   }
374 
375   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
376   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
377 
378   ierr = PetscFree(fill);CHKERRQ(ierr);
379 
380   /* put together the new matrix */
381   ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr);
382   ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr);
383   ierr = MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);CHKERRQ(ierr);
384   PetscLogObjectParent(*B,isicol);
385   b    = (Mat_SeqAIJ*)(*B)->data;
386   ierr = PetscFree(b->imax);CHKERRQ(ierr);
387   b->singlemalloc = PETSC_FALSE;
388   /* the next line frees the default space generated by the Create() */
389   ierr = PetscFree(b->a);CHKERRQ(ierr);
390   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
391   ierr          = PetscMalloc((ainew[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
392   b->j          = ajnew;
393   b->i          = ainew;
394   b->diag       = idnew;
395   b->ilen       = 0;
396   b->imax       = 0;
397   b->row        = isrow;
398   b->col        = iscol;
399   b->lu_damping        = info->damping;
400   b->lu_zeropivot      = info->zeropivot;
401   b->lu_shift          = info->shift;
402   b->lu_shift_fraction = info->shift_fraction;
403   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
404   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
405   b->icol       = isicol;
406   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
407   /* In b structure:  Free imax, ilen, old a, old j.
408      Allocate idnew, solve_work, new a, new j */
409   PetscLogObjectMemory(*B,(ainew[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));
410   b->maxnz = b->nz = ainew[n] ;
411 
412   (*B)->factor                 =  FACTOR_LU;
413   (*B)->info.factor_mallocs    = reallocs;
414   (*B)->info.fill_ratio_given  = f;
415   ierr = Mat_AIJ_CheckInode(*B,PETSC_FALSE);CHKERRQ(ierr);
416   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
417 
418   if (ai[n] != 0) {
419     (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
420   } else {
421     (*B)->info.fill_ratio_needed = 0.0;
422   }
423   PetscFunctionReturn(0);
424 }
425 /* ----------------------------------------------------------- */
426 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth);
427 
428 #undef __FUNCT__
429 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
430 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B)
431 {
432   Mat            C = *B;
433   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ *)C->data;
434   IS             isrow = b->row,isicol = b->icol;
435   PetscErrorCode ierr;
436   PetscInt       *r,*ic,i,j,n = A->m,*ai = b->i,*aj = b->j;
437   PetscInt       *ajtmpold,*ajtmp,nz,row,*ics;
438   PetscInt       *diag_offset = b->diag,diag,*pj,ndamp = 0, nshift=0;
439   PetscScalar    *rtmp,*v,*pc,multiplier,*pv,*rtmps;
440   PetscReal      damping = b->lu_damping, zeropivot = b->lu_zeropivot,rs,d;
441   PetscReal      row_shift,shift_fraction,shift_amount,shift_lo=0., shift_hi=1., shift_top=0.;
442   PetscTruth     damp,lushift;
443 
444   PetscFunctionBegin;
445   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
446   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
447   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
448   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
449   rtmps = rtmp; ics = ic;
450 
451   if (!a->diag) {
452     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
453   }
454 
455   if (b->lu_shift) { /* set max shift */
456     PetscInt *aai = a->i,*ddiag = a->diag;
457     shift_top = 0;
458     for (i=0; i<n; i++) {
459       d = PetscAbsScalar((a->a)[ddiag[i]]);
460       /* calculate amt of shift needed for this row */
461       if (d<=0) {
462         row_shift = 0;
463       } else {
464         row_shift = -2*d;
465       }
466       v = a->a+aai[i];
467       for (j=0; j<aai[i+1]-aai[i]; j++)
468 	row_shift += PetscAbsScalar(v[j]);
469       if (row_shift>shift_top) shift_top = row_shift;
470     }
471   }
472 
473   shift_fraction = 0; shift_amount = 0;
474   do {
475     damp    = PETSC_FALSE;
476     lushift = PETSC_FALSE;
477     for (i=0; i<n; i++) {
478       nz    = ai[i+1] - ai[i];
479       ajtmp = aj + ai[i];
480       for  (j=0; j<nz; j++) rtmps[ajtmp[j]] = 0.0;
481 
482       /* load in initial (unfactored row) */
483       nz       = a->i[r[i]+1] - a->i[r[i]];
484       ajtmpold = a->j + a->i[r[i]];
485       v        = a->a + a->i[r[i]];
486       for (j=0; j<nz; j++) {
487         rtmp[ics[ajtmpold[j]]] = v[j];
488       }
489       rtmp[ics[r[i]]] += damping + shift_amount; /* damp the diagonal of the matrix */
490 
491       row = *ajtmp++ ;
492       while  (row < i) {
493         pc = rtmp + row;
494         if (*pc != 0.0) {
495           pv         = b->a + diag_offset[row];
496           pj         = b->j + diag_offset[row] + 1;
497           multiplier = *pc / *pv++;
498           *pc        = multiplier;
499           nz         = ai[row+1] - diag_offset[row] - 1;
500           for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
501           PetscLogFlops(2*nz);
502         }
503         row = *ajtmp++;
504       }
505       /* finished row so stick it into b->a */
506       pv   = b->a + ai[i] ;
507       pj   = b->j + ai[i] ;
508       nz   = ai[i+1] - ai[i];
509       diag = diag_offset[i] - ai[i];
510       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
511       rs   = 0.0;
512       for (j=0; j<nz; j++) {
513         pv[j] = rtmps[pj[j]];
514         if (j != diag) rs += PetscAbsScalar(pv[j]);
515       }
516 #define MAX_NSHIFT 5
517       if (PetscRealPart(pv[diag]) <= zeropivot*rs && b->lu_shift) {
518 	if (nshift>MAX_NSHIFT) {
519 	  SETERRQ(PETSC_ERR_CONV_FAILED,"Unable to determine shift to enforce positive definite preconditioner");
520 	} else if (nshift==MAX_NSHIFT) {
521 	  shift_fraction = shift_hi;
522 	  lushift      = PETSC_FALSE;
523 	} else {
524 	  shift_lo = shift_fraction; shift_fraction = (shift_hi+shift_lo)/2.;
525 	  lushift      = PETSC_TRUE;
526 	}
527 	shift_amount = shift_fraction * shift_top;
528         nshift++;
529         break;
530       }
531       if (PetscAbsScalar(pv[diag]) <= zeropivot*rs) {
532         if (damping) {
533           if (ndamp) damping *= 2.0;
534           damp = PETSC_TRUE;
535           ndamp++;
536           break;
537         } else {
538           SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(pv[diag]),zeropivot,rs);
539         }
540       }
541     }
542     if (!lushift && b->lu_shift && shift_fraction>0 && nshift<MAX_NSHIFT) {
543       /*
544        * if no shift in this attempt & shifting & started shifting & can refine,
545        * then try lower shift
546        */
547       shift_hi       = shift_fraction;
548       shift_fraction = (shift_hi+shift_lo)/2.;
549       shift_amount   = shift_fraction * shift_top;
550       lushift        = PETSC_TRUE;
551       nshift++;
552     }
553   } while (damp || lushift);
554 
555   /* invert diagonal entries for simplier triangular solves */
556   for (i=0; i<n; i++) {
557     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
558   }
559 
560   ierr = PetscFree(rtmp);CHKERRQ(ierr);
561   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
562   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
563   C->factor = FACTOR_LU;
564   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
565   C->assembled = PETSC_TRUE;
566   PetscLogFlops(C->n);
567   if (ndamp) {
568     PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of damping tries %D damping value %g\n",ndamp,damping);
569   }
570   if (nshift) {
571     b->lu_shift_fraction = shift_fraction;
572     PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: diagonal shifted up by %e fraction top_value %e number shifts %D\n",shift_fraction,shift_top,nshift);
573   }
574   PetscFunctionReturn(0);
575 }
576 
577 #undef __FUNCT__
578 #define __FUNCT__ "MatUsePETSc_SeqAIJ"
579 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A)
580 {
581   PetscFunctionBegin;
582   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
583   A->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
584   PetscFunctionReturn(0);
585 }
586 
587 
588 /* ----------------------------------------------------------- */
589 #undef __FUNCT__
590 #define __FUNCT__ "MatLUFactor_SeqAIJ"
591 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
592 {
593   PetscErrorCode ierr;
594   Mat            C;
595 
596   PetscFunctionBegin;
597   ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr);
598   ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr);
599   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
600   PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);
601   PetscFunctionReturn(0);
602 }
603 /* ----------------------------------------------------------- */
604 #undef __FUNCT__
605 #define __FUNCT__ "MatSolve_SeqAIJ"
606 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
607 {
608   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
609   IS             iscol = a->col,isrow = a->row;
610   PetscErrorCode ierr;
611   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
612   PetscInt       nz,*rout,*cout;
613   PetscScalar    *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
614 
615   PetscFunctionBegin;
616   if (!n) PetscFunctionReturn(0);
617 
618   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
619   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
620   tmp  = a->solve_work;
621 
622   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
623   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
624 
625   /* forward solve the lower triangular */
626   tmp[0] = b[*r++];
627   tmps   = tmp;
628   for (i=1; i<n; i++) {
629     v   = aa + ai[i] ;
630     vi  = aj + ai[i] ;
631     nz  = a->diag[i] - ai[i];
632     sum = b[*r++];
633     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
634     tmp[i] = sum;
635   }
636 
637   /* backward solve the upper triangular */
638   for (i=n-1; i>=0; i--){
639     v   = aa + a->diag[i] + 1;
640     vi  = aj + a->diag[i] + 1;
641     nz  = ai[i+1] - a->diag[i] - 1;
642     sum = tmp[i];
643     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
644     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
645   }
646 
647   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
648   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
649   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
650   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
651   PetscLogFlops(2*a->nz - A->n);
652   PetscFunctionReturn(0);
653 }
654 
655 /* ----------------------------------------------------------- */
656 #undef __FUNCT__
657 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
658 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
659 {
660   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
661   PetscErrorCode ierr;
662   PetscInt       n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag;
663   PetscScalar    *x,*b,*aa = a->a;
664 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
665   PetscInt       adiag_i,i,*vi,nz,ai_i;
666   PetscScalar    *v,sum;
667 #endif
668 
669   PetscFunctionBegin;
670   if (!n) PetscFunctionReturn(0);
671 
672   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
673   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
674 
675 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
676   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
677 #else
678   /* forward solve the lower triangular */
679   x[0] = b[0];
680   for (i=1; i<n; i++) {
681     ai_i = ai[i];
682     v    = aa + ai_i;
683     vi   = aj + ai_i;
684     nz   = adiag[i] - ai_i;
685     sum  = b[i];
686     while (nz--) sum -= *v++ * x[*vi++];
687     x[i] = sum;
688   }
689 
690   /* backward solve the upper triangular */
691   for (i=n-1; i>=0; i--){
692     adiag_i = adiag[i];
693     v       = aa + adiag_i + 1;
694     vi      = aj + adiag_i + 1;
695     nz      = ai[i+1] - adiag_i - 1;
696     sum     = x[i];
697     while (nz--) sum -= *v++ * x[*vi++];
698     x[i]    = sum*aa[adiag_i];
699   }
700 #endif
701   PetscLogFlops(2*a->nz - A->n);
702   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
703   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
704   PetscFunctionReturn(0);
705 }
706 
707 #undef __FUNCT__
708 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
709 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
710 {
711   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
712   IS             iscol = a->col,isrow = a->row;
713   PetscErrorCode ierr;
714   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
715   PetscInt       nz,*rout,*cout;
716   PetscScalar    *x,*b,*tmp,*aa = a->a,sum,*v;
717 
718   PetscFunctionBegin;
719   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
720 
721   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
722   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
723   tmp  = a->solve_work;
724 
725   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
726   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
727 
728   /* forward solve the lower triangular */
729   tmp[0] = b[*r++];
730   for (i=1; i<n; i++) {
731     v   = aa + ai[i] ;
732     vi  = aj + ai[i] ;
733     nz  = a->diag[i] - ai[i];
734     sum = b[*r++];
735     while (nz--) sum -= *v++ * tmp[*vi++ ];
736     tmp[i] = sum;
737   }
738 
739   /* backward solve the upper triangular */
740   for (i=n-1; i>=0; i--){
741     v   = aa + a->diag[i] + 1;
742     vi  = aj + a->diag[i] + 1;
743     nz  = ai[i+1] - a->diag[i] - 1;
744     sum = tmp[i];
745     while (nz--) sum -= *v++ * tmp[*vi++ ];
746     tmp[i] = sum*aa[a->diag[i]];
747     x[*c--] += tmp[i];
748   }
749 
750   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
751   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
752   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
753   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
754   PetscLogFlops(2*a->nz);
755 
756   PetscFunctionReturn(0);
757 }
758 /* -------------------------------------------------------------------*/
759 #undef __FUNCT__
760 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
761 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
762 {
763   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
764   IS             iscol = a->col,isrow = a->row;
765   PetscErrorCode ierr;
766   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
767   PetscInt       nz,*rout,*cout,*diag = a->diag;
768   PetscScalar    *x,*b,*tmp,*aa = a->a,*v,s1;
769 
770   PetscFunctionBegin;
771   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
772   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
773   tmp  = a->solve_work;
774 
775   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
776   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
777 
778   /* copy the b into temp work space according to permutation */
779   for (i=0; i<n; i++) tmp[i] = b[c[i]];
780 
781   /* forward solve the U^T */
782   for (i=0; i<n; i++) {
783     v   = aa + diag[i] ;
784     vi  = aj + diag[i] + 1;
785     nz  = ai[i+1] - diag[i] - 1;
786     s1  = tmp[i];
787     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
788     while (nz--) {
789       tmp[*vi++ ] -= (*v++)*s1;
790     }
791     tmp[i] = s1;
792   }
793 
794   /* backward solve the L^T */
795   for (i=n-1; i>=0; i--){
796     v   = aa + diag[i] - 1 ;
797     vi  = aj + diag[i] - 1 ;
798     nz  = diag[i] - ai[i];
799     s1  = tmp[i];
800     while (nz--) {
801       tmp[*vi-- ] -= (*v--)*s1;
802     }
803   }
804 
805   /* copy tmp into x according to permutation */
806   for (i=0; i<n; i++) x[r[i]] = tmp[i];
807 
808   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
809   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
810   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
811   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
812 
813   PetscLogFlops(2*a->nz-A->n);
814   PetscFunctionReturn(0);
815 }
816 
817 #undef __FUNCT__
818 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
819 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
820 {
821   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
822   IS             iscol = a->col,isrow = a->row;
823   PetscErrorCode ierr;
824   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
825   PetscInt       nz,*rout,*cout,*diag = a->diag;
826   PetscScalar    *x,*b,*tmp,*aa = a->a,*v;
827 
828   PetscFunctionBegin;
829   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
830 
831   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
832   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
833   tmp = a->solve_work;
834 
835   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
836   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
837 
838   /* copy the b into temp work space according to permutation */
839   for (i=0; i<n; i++) tmp[i] = b[c[i]];
840 
841   /* forward solve the U^T */
842   for (i=0; i<n; i++) {
843     v   = aa + diag[i] ;
844     vi  = aj + diag[i] + 1;
845     nz  = ai[i+1] - diag[i] - 1;
846     tmp[i] *= *v++;
847     while (nz--) {
848       tmp[*vi++ ] -= (*v++)*tmp[i];
849     }
850   }
851 
852   /* backward solve the L^T */
853   for (i=n-1; i>=0; i--){
854     v   = aa + diag[i] - 1 ;
855     vi  = aj + diag[i] - 1 ;
856     nz  = diag[i] - ai[i];
857     while (nz--) {
858       tmp[*vi-- ] -= (*v--)*tmp[i];
859     }
860   }
861 
862   /* copy tmp into x according to permutation */
863   for (i=0; i<n; i++) x[r[i]] += tmp[i];
864 
865   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
866   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
867   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
868   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
869 
870   PetscLogFlops(2*a->nz);
871   PetscFunctionReturn(0);
872 }
873 /* ----------------------------------------------------------------*/
874 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat);
875 
876 #undef __FUNCT__
877 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
878 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
879 {
880   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
881   IS             isicol;
882   PetscErrorCode ierr;
883   PetscInt       *r,*ic,prow,n = A->m,*ai = a->i,*aj = a->j;
884   PetscInt       *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev;
885   PetscInt       *dloc,idx,row,m,fm,nzf,nzi,len, reallocs = 0,dcount = 0;
886   PetscInt       incrlev,nnz,i,levels,diagonal_fill;
887   PetscTruth     col_identity,row_identity;
888   PetscReal      f;
889 
890   PetscFunctionBegin;
891   f             = info->fill;
892   levels        = (PetscInt)info->levels;
893   diagonal_fill = (PetscInt)info->diagonal_fill;
894   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
895 
896   /* special case that simply copies fill pattern */
897   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
898   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
899   if (!levels && row_identity && col_identity) {
900     ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
901     (*fact)->factor = FACTOR_LU;
902     b               = (Mat_SeqAIJ*)(*fact)->data;
903     if (!b->diag) {
904       ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
905     }
906     ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
907     b->row              = isrow;
908     b->col              = iscol;
909     b->icol             = isicol;
910     b->lu_damping       = info->damping;
911     b->lu_zeropivot     = info->zeropivot;
912     b->lu_shift         = info->shift;
913     b->lu_shift_fraction= info->shift_fraction;
914     ierr                = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
915     (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
916     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
917     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
918     PetscFunctionReturn(0);
919   }
920 
921   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
922   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
923 
924   /* get new row pointers */
925   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr);
926   ainew[0] = 0;
927   /* don't know how many column pointers are needed so estimate */
928   jmax = (PetscInt)(f*(ai[n]+1));
929   ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr);
930   /* ajfill is level of fill for each fill entry */
931   ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajfill);CHKERRQ(ierr);
932   /* fill is a linked list of nonzeros in active row */
933   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr);
934   /* im is level for each filled value */
935   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&im);CHKERRQ(ierr);
936   /* dloc is location of diagonal in factor */
937   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&dloc);CHKERRQ(ierr);
938   dloc[0]  = 0;
939   for (prow=0; prow<n; prow++) {
940 
941     /* copy current row into linked list */
942     nzf     = nz  = ai[r[prow]+1] - ai[r[prow]];
943     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
944     xi      = aj + ai[r[prow]] ;
945     fill[n]    = n;
946     fill[prow] = -1; /* marker to indicate if diagonal exists */
947     while (nz--) {
948       fm  = n;
949       idx = ic[*xi++ ];
950       do {
951         m  = fm;
952         fm = fill[m];
953       } while (fm < idx);
954       fill[m]   = idx;
955       fill[idx] = fm;
956       im[idx]   = 0;
957     }
958 
959     /* make sure diagonal entry is included */
960     if (diagonal_fill && fill[prow] == -1) {
961       fm = n;
962       while (fill[fm] < prow) fm = fill[fm];
963       fill[prow] = fill[fm]; /* insert diagonal into linked list */
964       fill[fm]   = prow;
965       im[prow]   = 0;
966       nzf++;
967       dcount++;
968     }
969 
970     nzi = 0;
971     row = fill[n];
972     while (row < prow) {
973       incrlev = im[row] + 1;
974       nz      = dloc[row];
975       xi      = ajnew  + ainew[row]  + nz + 1;
976       flev    = ajfill + ainew[row]  + nz + 1;
977       nnz     = ainew[row+1] - ainew[row] - nz - 1;
978       fm      = row;
979       while (nnz-- > 0) {
980         idx = *xi++ ;
981         if (*flev + incrlev > levels) {
982           flev++;
983           continue;
984         }
985         do {
986           m  = fm;
987           fm = fill[m];
988         } while (fm < idx);
989         if (fm != idx) {
990           im[idx]   = *flev + incrlev;
991           fill[m]   = idx;
992           fill[idx] = fm;
993           fm        = idx;
994           nzf++;
995         } else {
996           if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev;
997         }
998         flev++;
999       }
1000       row = fill[row];
1001       nzi++;
1002     }
1003     /* copy new filled row into permanent storage */
1004     ainew[prow+1] = ainew[prow] + nzf;
1005     if (ainew[prow+1] > jmax) {
1006 
1007       /* estimate how much additional space we will need */
1008       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
1009       /* just double the memory each time */
1010       /*  maxadd = (PetscInt)((f*(ai[n]+!shift)*(n-prow+5))/n); */
1011       PetscInt maxadd = jmax;
1012       if (maxadd < nzf) maxadd = (n-prow)*(nzf+1);
1013       jmax += maxadd;
1014 
1015       /* allocate a longer ajnew and ajfill */
1016       ierr   = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr);
1017       ierr   = PetscMemcpy(xi,ajnew,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr);
1018       ierr   = PetscFree(ajnew);CHKERRQ(ierr);
1019       ajnew  = xi;
1020       ierr   = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr);
1021       ierr   = PetscMemcpy(xi,ajfill,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr);
1022       ierr   = PetscFree(ajfill);CHKERRQ(ierr);
1023       ajfill = xi;
1024       reallocs++; /* count how many times we realloc */
1025     }
1026     xi          = ajnew + ainew[prow] ;
1027     flev        = ajfill + ainew[prow] ;
1028     dloc[prow]  = nzi;
1029     fm          = fill[n];
1030     while (nzf--) {
1031       *xi++   = fm ;
1032       *flev++ = im[fm];
1033       fm      = fill[fm];
1034     }
1035     /* make sure row has diagonal entry */
1036     if (ajnew[ainew[prow]+dloc[prow]] != prow) {
1037       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
1038     try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow);
1039     }
1040   }
1041   ierr = PetscFree(ajfill);CHKERRQ(ierr);
1042   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1043   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1044   ierr = PetscFree(fill);CHKERRQ(ierr);
1045   ierr = PetscFree(im);CHKERRQ(ierr);
1046 
1047   {
1048     PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
1049     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
1050     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af);
1051     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af);
1052     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n");
1053     if (diagonal_fill) {
1054       PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount);
1055     }
1056   }
1057 
1058   /* put together the new matrix */
1059   ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr);
1060   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
1061   ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr);
1062   PetscLogObjectParent(*fact,isicol);
1063   b = (Mat_SeqAIJ*)(*fact)->data;
1064   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1065   b->singlemalloc = PETSC_FALSE;
1066   len = (ainew[n] )*sizeof(PetscScalar);
1067   /* the next line frees the default space generated by the Create() */
1068   ierr = PetscFree(b->a);CHKERRQ(ierr);
1069   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1070   ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr);
1071   b->j          = ajnew;
1072   b->i          = ainew;
1073   for (i=0; i<n; i++) dloc[i] += ainew[i];
1074   b->diag       = dloc;
1075   b->ilen       = 0;
1076   b->imax       = 0;
1077   b->row        = isrow;
1078   b->col        = iscol;
1079   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1080   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1081   b->icol       = isicol;
1082   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1083   /* In b structure:  Free imax, ilen, old a, old j.
1084      Allocate dloc, solve_work, new a, new j */
1085   PetscLogObjectMemory(*fact,(ainew[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));
1086   b->maxnz             = b->nz = ainew[n] ;
1087   b->lu_damping        = info->damping;
1088   b->lu_shift          = info->shift;
1089   b->lu_shift_fraction = info->shift_fraction;
1090   b->lu_zeropivot = info->zeropivot;
1091   (*fact)->factor = FACTOR_LU;
1092   ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr);
1093   (*fact)->ops->lufactornumeric =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
1094 
1095   (*fact)->info.factor_mallocs    = reallocs;
1096   (*fact)->info.fill_ratio_given  = f;
1097   (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]);
1098   PetscFunctionReturn(0);
1099 }
1100 
1101 #undef __FUNCT__
1102 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
1103 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,Mat *fact)
1104 {
1105   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1106   PetscErrorCode ierr,(*f)(Mat,Mat*);
1107 
1108   PetscFunctionBegin;
1109   if (!a->sbaijMat){
1110     ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr);
1111   }
1112   ierr = PetscObjectQueryFunction((PetscObject) *fact,"MatCholeskyFactorNumeric",(void (**)(void))&f);CHKERRQ(ierr);
1113   ierr = (*f)(a->sbaijMat,fact);CHKERRQ(ierr);
1114   ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);
1115   a->sbaijMat = PETSC_NULL;
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 #undef __FUNCT__
1120 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
1121 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1122 {
1123   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1124   PetscErrorCode ierr;
1125   PetscTruth     perm_identity;
1126 
1127   PetscFunctionBegin;
1128   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1129   if (!perm_identity){
1130     SETERRQ(PETSC_ERR_SUP,"Non-identity permutation is not supported yet");
1131   }
1132   if (!a->sbaijMat){
1133     ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr);
1134   }
1135 
1136   ierr = MatICCFactorSymbolic(a->sbaijMat,perm,info,fact);CHKERRQ(ierr);
1137   ierr = PetscObjectComposeFunction((PetscObject) *fact,"MatCholeskyFactorNumeric","dummyname",(FCNVOID)(*fact)->ops->choleskyfactornumeric);CHKERRQ(ierr);
1138   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1139 
1140   PetscFunctionReturn(0);
1141 }
1142 
1143 #include "petscbt.h"
1144 #include "src/mat/utils/freespace.h"
1145 #include "src/mat/impls/sbaij/seq/sbaij.h"
1146 #undef __FUNCT__
1147 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
1148 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1149 {
1150   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1151   Mat_SeqSBAIJ   *b;
1152   PetscErrorCode ierr;
1153   PetscTruth     perm_identity;
1154   PetscReal      fill = info->fill;
1155   PetscInt       *rip,i,am=A->m,*ai,*aj,reallocs=0,prow;
1156   PetscInt       *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1157   PetscInt       nlnk,*lnk,ncols,*cols,*uj,**uj_ptr;
1158   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1159   PetscBT        lnkbt;
1160 
1161   PetscFunctionBegin;
1162   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1163   if (!perm_identity){
1164     SETERRQ(PETSC_ERR_SUP,"Non-identity permutation is not supported yet");
1165   }
1166 
1167   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1168   ai = a->i; aj = a->j;
1169   ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
1170 
1171   /* initialization */
1172   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1173   ui[0] = 0;
1174 
1175   /* jl: linked list for storing indices of the pivot rows
1176      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1177   ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt*),&jl);CHKERRQ(ierr);
1178   il     = jl + am;
1179   uj_ptr = (PetscInt**)(il + am);
1180   for (i=0; i<am; i++){
1181     jl[i] = am; il[i] = 0;
1182   }
1183 
1184   /* create and initialize a linked list for storing column indices of the active row k */
1185   nlnk = am + 1;
1186   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1187 
1188   /* initial FreeSpace size is fill*(ai[am]+1) */
1189   ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1190   current_space = free_space;
1191 
1192   for (k=0; k<am; k++){  /* for each active row k */
1193     /* initialize lnk by the column indices of row rip[k] of A */
1194     nzk     = 0;
1195     ncols    = ai[rip[k]+1] - a->diag[rip[k]];
1196     cols = aj + a->diag[rip[k]];
1197     ierr = PetscLLAdd(ncols,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1198     nzk += nlnk;
1199 
1200     /* update lnk by computing fill-in for each pivot row to be merged in */
1201     prow = jl[k]; /* 1st pivot row */
1202 
1203     while (prow < k){
1204       nextprow = jl[prow];
1205 
1206       /* merge prow into k-th row */
1207       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1208       jmax = ui[prow+1];
1209       ncols = jmax-jmin;
1210       cols = uj_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1211       ierr = PetscLLAdd(ncols,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1212       nzk += nlnk;
1213 
1214       /* update il and jl for prow */
1215       if (jmin < jmax){
1216         il[prow] = jmin;
1217         j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1218       }
1219       prow = nextprow;
1220     }
1221 
1222     /* if free space is not available, make more free space */
1223     if (current_space->local_remaining<nzk) {
1224       i = am - k + 1; /* num of unfactored rows */
1225       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1226       ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
1227       reallocs++;
1228     }
1229 
1230     /* copy data into free space, then initialize lnk */
1231     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1232 
1233     /* add the k-th row into il and jl */
1234     if (nzk-1 > 0){
1235       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1236       jl[k] = jl[i]; jl[i] = k;
1237       il[k] = ui[k] + 1;
1238     }
1239     uj_ptr[k] = current_space->array;
1240     current_space->array           += nzk;
1241     current_space->local_used      += nzk;
1242     current_space->local_remaining -= nzk;
1243 
1244     ui[k+1] = ui[k] + nzk;
1245   }
1246 
1247   if (ai[am] != 0) {
1248     PetscReal af = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1249     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af);
1250     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af);
1251     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af);
1252   } else {
1253      PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n");
1254   }
1255 
1256   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1257   ierr = PetscFree(jl);CHKERRQ(ierr);
1258 
1259   /* destroy list of free space and other temporary array(s) */
1260   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1261   ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1262   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1263 
1264   /* put together the new matrix in MATSEQSBAIJ format */
1265   ierr = MatCreate(A->comm,am,am,am,am,fact);CHKERRQ(ierr);
1266   ierr = MatSetType(*fact,MATSEQSBAIJ);CHKERRQ(ierr);
1267   ierr = MatSeqSBAIJSetPreallocation(*fact,1,0,PETSC_NULL);CHKERRQ(ierr);
1268 
1269   b = (Mat_SeqSBAIJ*)(*fact)->data;
1270   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1271   b->singlemalloc = PETSC_FALSE;
1272   /* the next line frees the default space generated by the Create() */
1273   ierr = PetscFree(b->a);CHKERRQ(ierr);
1274   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1275   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1276   b->j    = uj;
1277   b->i    = ui;
1278   b->diag = 0;
1279   b->ilen = 0;
1280   b->imax = 0;
1281   b->row  = perm;
1282   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1283   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1284   b->icol = perm;
1285   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1286   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1287   /* In b structure:  Free imax, ilen, old a, old j.
1288      Allocate idnew, solve_work, new a, new j */
1289   PetscLogObjectMemory(*fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
1290   b->maxnz = b->nz = ui[am];
1291 
1292   (*fact)->factor                 = FACTOR_CHOLESKY;
1293   (*fact)->info.factor_mallocs    = reallocs;
1294   (*fact)->info.fill_ratio_given  = fill;
1295   if (ai[am] != 0) {
1296     (*fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1297   } else {
1298     (*fact)->info.fill_ratio_needed = 0.0;
1299   }
1300 
1301   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
1302   (*fact)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1303   (*fact)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1304 
1305   /* -------- end of new ---------------------------*/
1306   ierr = PetscObjectComposeFunction((PetscObject) *fact,"MatCholeskyFactorNumeric","dummyname",(FCNVOID)(*fact)->ops->choleskyfactornumeric);CHKERRQ(ierr);
1307   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1308 
1309   PetscFunctionReturn(0);
1310 }
1311