xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision 35bd34fa3344fb53ae05f23bf99eddd2f9bb15e6)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/seq/aij.h"
4 #include "src/inline/dot.h"
5 #include "src/inline/spops.h"
6 #include "petscbt.h"
7 #include "src/mat/utils/freespace.h"
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ"
11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
12 {
13   PetscFunctionBegin;
14 
15   SETERRQ(PETSC_ERR_SUP,"Code not written");
16 #if !defined(PETSC_USE_DEBUG)
17   PetscFunctionReturn(0);
18 #endif
19 }
20 
21 
22 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
23 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
24 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
25 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,MatScalar*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*);
26 #endif
27 
28 #undef __FUNCT__
29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
30   /* ------------------------------------------------------------
31 
32           This interface was contribed by Tony Caola
33 
34      This routine is an interface to the pivoting drop-tolerance
35      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
36      SPARSEKIT2.
37 
38      The SPARSEKIT2 routines used here are covered by the GNU
39      copyright; see the file gnu in this directory.
40 
41      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
42      help in getting this routine ironed out.
43 
44      The major drawback to this routine is that if info->fill is
45      not large enough it fails rather than allocating more space;
46      this can be fixed by hacking/improving the f2c version of
47      Yousef Saad's code.
48 
49      ------------------------------------------------------------
50 */
51 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
52 {
53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
54   PetscFunctionBegin;
55   SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
56   You can obtain the drop tolerance routines by installing PETSc from\n\
57   www.mcs.anl.gov/petsc\n");
58 #else
59   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
60   IS             iscolf,isicol,isirow;
61   PetscTruth     reorder;
62   PetscErrorCode ierr,sierr;
63   const PetscInt *c,*r,*ic;
64   PetscInt       i,n = A->rmap->n,*cc,*cr;
65   PetscInt       *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
66   PetscInt       *ordcol,*iwk,*iperm,*jw;
67   PetscInt       jmax,lfill,job,*o_i,*o_j;
68   MatScalar      *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
69   PetscReal      af,dt,dtcount,dtcol,fill;
70 
71   PetscFunctionBegin;
72 
73   if (info->dt == PETSC_DEFAULT)      dt      = .005; else dt = info->dt;
74   if (info->dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax);  else dtcount = info->dtcount;
75   if (info->dtcol == PETSC_DEFAULT)   dtcol   = .01; else dtcol = info->dtcol;
76   if (info->fill == PETSC_DEFAULT)    fill    = ((double)(n*(info->dtcount+1)))/a->nz; else fill = info->fill;
77   lfill   = (PetscInt)(dtcount/2.0);
78   jmax    = (PetscInt)(fill*a->nz);
79 
80 
81   /* ------------------------------------------------------------
82      If reorder=.TRUE., then the original matrix has to be
83      reordered to reflect the user selected ordering scheme, and
84      then de-reordered so it is in it's original format.
85      Because Saad's dperm() is NOT in place, we have to copy
86      the original matrix and allocate more storage. . .
87      ------------------------------------------------------------
88   */
89 
90   /* set reorder to true if either isrow or iscol is not identity */
91   ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr);
92   if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);}
93   reorder = PetscNot(reorder);
94 
95 
96   /* storage for ilu factor */
97   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr);
98   ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr);
99   ierr = PetscMalloc(jmax*sizeof(MatScalar),&new_a);CHKERRQ(ierr);
100   ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr);
101 
102   /* ------------------------------------------------------------
103      Make sure that everything is Fortran formatted (1-Based)
104      ------------------------------------------------------------
105   */
106   for (i=old_i[0];i<old_i[n];i++) {
107     old_j[i]++;
108   }
109   for(i=0;i<n+1;i++) {
110     old_i[i]++;
111   };
112 
113 
114   if (reorder) {
115     ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr);
116     ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
117     ierr = PetscMalloc2(n,PetscInt,&cc,n,PetscInt,&cr);CHKERRQ(ierr);
118     for(i=0;i<n;i++) {
119       cr[i]  = r[i]+1;
120       cc[i]  = c[i]+1;
121     }
122     ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr);
123     ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
124     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr);
125     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr);
126     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(MatScalar),&old_a2);CHKERRQ(ierr);
127     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,cr,cc,&job);
128     ierr = PetscFree2(cc,cr);CHKERRQ(ierr);
129     o_a = old_a2;
130     o_j = old_j2;
131     o_i = old_i2;
132   } else {
133     o_a = old_a;
134     o_j = old_j;
135     o_i = old_i;
136   }
137 
138   /* ------------------------------------------------------------
139      Call Saad's ilutp() routine to generate the factorization
140      ------------------------------------------------------------
141   */
142 
143   ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr);
144   ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr);
145   ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr);
146 
147   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)dt,&dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
148   if (sierr) {
149     switch (sierr) {
150       case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger fill current fill %G space allocated %D",fill,jmax);
151       case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger fill current fill %G space allocated %D",fill,jmax);
152       case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
153       case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
154       case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal fill value %D",jmax);
155       default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
156     }
157   }
158 
159   ierr = PetscFree(w);CHKERRQ(ierr);
160   ierr = PetscFree(jw);CHKERRQ(ierr);
161 
162   /* ------------------------------------------------------------
163      Saad's routine gives the result in Modified Sparse Row (msr)
164      Convert to Compressed Sparse Row format (csr)
165      ------------------------------------------------------------
166   */
167 
168   ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr);
169   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr);
170 
171   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
172 
173   ierr = PetscFree(iwk);CHKERRQ(ierr);
174   ierr = PetscFree(wk);CHKERRQ(ierr);
175 
176   if (reorder) {
177     ierr = PetscFree(old_a2);CHKERRQ(ierr);
178     ierr = PetscFree(old_j2);CHKERRQ(ierr);
179     ierr = PetscFree(old_i2);CHKERRQ(ierr);
180   } else {
181     /* fix permutation of old_j that the factorization introduced */
182     for (i=old_i[0]; i<old_i[n]; i++) {
183       old_j[i-1] = iperm[old_j[i-1]-1];
184     }
185   }
186 
187   /* get rid of the shift to indices starting at 1 */
188   for (i=0; i<n+1; i++) {
189     old_i[i]--;
190   }
191   for (i=old_i[0];i<old_i[n];i++) {
192     old_j[i]--;
193   }
194 
195   /* Make the factored matrix 0-based */
196   for (i=0; i<n+1; i++) {
197     new_i[i]--;
198   }
199   for (i=new_i[0];i<new_i[n];i++) {
200     new_j[i]--;
201   }
202 
203   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
204   /*-- permute the right-hand-side and solution vectors           --*/
205   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
206   ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr);
207   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
208   for(i=0; i<n; i++) {
209     ordcol[i] = ic[iperm[i]-1];
210   };
211   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
212   ierr = ISDestroy(isicol);CHKERRQ(ierr);
213 
214   ierr = PetscFree(iperm);CHKERRQ(ierr);
215 
216   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr);
217   ierr = PetscFree(ordcol);CHKERRQ(ierr);
218 
219   /*----- put together the new matrix -----*/
220 
221   ierr = MatCreate(((PetscObject)A)->comm,fact);CHKERRQ(ierr);
222   ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr);
223   ierr = MatSetType(*fact,((PetscObject)A)->type_name);CHKERRQ(ierr);
224   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
225   (*fact)->factor    = MAT_FACTOR_LU;
226   (*fact)->assembled = PETSC_TRUE;
227 
228   b = (Mat_SeqAIJ*)(*fact)->data;
229   b->free_a        = PETSC_TRUE;
230   b->free_ij       = PETSC_TRUE;
231   b->singlemalloc  = PETSC_FALSE;
232   b->a             = new_a;
233   b->j             = new_j;
234   b->i             = new_i;
235   b->ilen          = 0;
236   b->imax          = 0;
237   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
238   b->row           = isirow;
239   b->col           = iscolf;
240   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
241   b->maxnz = b->nz = new_i[n];
242   ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
243   (*fact)->info.factor_mallocs = 0;
244 
245   af = ((double)b->nz)/((double)a->nz) + .001;
246   ierr = PetscInfo2(A,"Fill ratio:given %G needed %G\n",fill,af);CHKERRQ(ierr);
247   ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
248   ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
249   ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
250 
251   ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr);
252 
253   PetscFunctionReturn(0);
254 #endif
255 }
256 
257 EXTERN_C_BEGIN
258 #undef __FUNCT__
259 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc"
260 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscTruth *flg)
261 {
262   PetscFunctionBegin;
263   *flg = PETSC_TRUE;
264   PetscFunctionReturn(0);
265 }
266 EXTERN_C_END
267 
268 EXTERN_C_BEGIN
269 #undef __FUNCT__
270 #define __FUNCT__ "MatGetFactor_seqaij_petsc"
271 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
272 {
273   PetscInt           n = A->rmap->n;
274   PetscErrorCode     ierr;
275 
276   PetscFunctionBegin;
277   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
278   ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr);
279   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) {
280     ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr);
281     if (ftype == MAT_FACTOR_ILU) {
282       (*B)->ops->ilufactorsymbolic= MatILUFactorSymbolic_SeqAIJ;
283     } else {
284       (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
285     }
286   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
287     ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr);
288     ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
289     if (ftype == MAT_FACTOR_ICC) {
290       (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
291     } else {
292       (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
293     }
294   } else SETERRQ(PETSC_ERR_SUP,"Factor type not supported");
295   (*B)->factor = ftype;
296   PetscFunctionReturn(0);
297 }
298 EXTERN_C_END
299 
300 #undef __FUNCT__
301 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
302 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
303 {
304   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
305   IS                 isicol;
306   PetscErrorCode     ierr;
307   const PetscInt     *r,*ic;
308   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j;
309   PetscInt           *bi,*bj,*ajtmp;
310   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
311   PetscReal          f;
312   PetscInt           nlnk,*lnk,k,**bi_ptr;
313   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
314   PetscBT            lnkbt;
315 
316   PetscFunctionBegin;
317   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
318   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
319   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
320   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
321 
322   /* get new row pointers */
323   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
324   bi[0] = 0;
325 
326   /* bdiag is location of diagonal in factor */
327   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
328   bdiag[0] = 0;
329 
330   /* linked list for storing column indices of the active row */
331   nlnk = n + 1;
332   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
333 
334   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
335 
336   /* initial FreeSpace size is f*(ai[n]+1) */
337   f = info->fill;
338   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
339   current_space = free_space;
340 
341   for (i=0; i<n; i++) {
342     /* copy previous fill into linked list */
343     nzi = 0;
344     nnz = ai[r[i]+1] - ai[r[i]];
345     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
346     ajtmp = aj + ai[r[i]];
347     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
348     nzi += nlnk;
349 
350     /* add pivot rows into linked list */
351     row = lnk[n];
352     while (row < i) {
353       nzbd    = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
354       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
355       ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
356       nzi += nlnk;
357       row  = lnk[row];
358     }
359     bi[i+1] = bi[i] + nzi;
360     im[i]   = nzi;
361 
362     /* mark bdiag */
363     nzbd = 0;
364     nnz  = nzi;
365     k    = lnk[n];
366     while (nnz-- && k < i){
367       nzbd++;
368       k = lnk[k];
369     }
370     bdiag[i] = bi[i] + nzbd;
371 
372     /* if free space is not available, make more free space */
373     if (current_space->local_remaining<nzi) {
374       nnz = (n - i)*nzi; /* estimated and max additional space needed */
375       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
376       reallocs++;
377     }
378 
379     /* copy data into free space, then initialize lnk */
380     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
381     bi_ptr[i] = current_space->array;
382     current_space->array           += nzi;
383     current_space->local_used      += nzi;
384     current_space->local_remaining -= nzi;
385   }
386 #if defined(PETSC_USE_INFO)
387   if (ai[n] != 0) {
388     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
389     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
390     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
391     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
392     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
393   } else {
394     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
395   }
396 #endif
397 
398   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
399   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
400 
401   /* destroy list of free space and other temporary array(s) */
402   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
403   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
404   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
405   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
406 
407   /* put together the new matrix */
408   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
409   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
410   b    = (Mat_SeqAIJ*)(B)->data;
411   b->free_a       = PETSC_TRUE;
412   b->free_ij      = PETSC_TRUE;
413   b->singlemalloc = PETSC_FALSE;
414   ierr          = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
415   b->j          = bj;
416   b->i          = bi;
417   b->diag       = bdiag;
418   b->ilen       = 0;
419   b->imax       = 0;
420   b->row        = isrow;
421   b->col        = iscol;
422   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
423   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
424   b->icol       = isicol;
425   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
426 
427   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
428   ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
429   b->maxnz = b->nz = bi[n] ;
430 
431   (B)->factor                 =  MAT_FACTOR_LU;
432   (B)->info.factor_mallocs    = reallocs;
433   (B)->info.fill_ratio_given  = f;
434 
435   if (ai[n] != 0) {
436     (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
437   } else {
438     (B)->info.fill_ratio_needed = 0.0;
439   }
440   (B)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
441   (B)->ops->solve            = MatSolve_SeqAIJ;
442   (B)->ops->solvetranspose   = MatSolveTranspose_SeqAIJ;
443   /* switch to inodes if appropriate */
444   ierr = MatLUFactorSymbolic_Inode(B,A,isrow,iscol,info);CHKERRQ(ierr);
445   PetscFunctionReturn(0);
446 }
447 
448 /*
449     Trouble in factorization, should we dump the original matrix?
450 */
451 #undef __FUNCT__
452 #define __FUNCT__ "MatFactorDumpMatrix"
453 PetscErrorCode MatFactorDumpMatrix(Mat A)
454 {
455   PetscErrorCode ierr;
456   PetscTruth     flg;
457 
458   PetscFunctionBegin;
459   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_factor_dump_on_error",&flg);CHKERRQ(ierr);
460   if (flg) {
461     PetscViewer viewer;
462     char        filename[PETSC_MAX_PATH_LEN];
463 
464     ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr);
465     ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr);
466     ierr = MatView(A,viewer);CHKERRQ(ierr);
467     ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr);
468   }
469   PetscFunctionReturn(0);
470 }
471 
472 extern PetscErrorCode MatSolve_Inode(Mat,Vec,Vec);
473 
474 /* ----------------------------------------------------------- */
475 #undef __FUNCT__
476 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
477 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
478 {
479   Mat            C=B;
480   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
481   IS             isrow = b->row,isicol = b->icol;
482   PetscErrorCode ierr;
483   const PetscInt  *r,*ic,*ics;
484   PetscInt       i,j,n=A->rmap->n,*bi=b->i,*bj=b->j;
485   PetscInt       *ajtmp,*bjtmp,nz,row;
486   PetscInt       *diag_offset = b->diag,diag,*pj;
487   PetscScalar    *rtmp,*pc,multiplier,*rtmps;
488   MatScalar      *v,*pv;
489   PetscScalar    d;
490   PetscReal      rs;
491   LUShift_Ctx    sctx;
492   PetscInt       newshift,*ddiag;
493 
494   PetscFunctionBegin;
495   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
496   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
497   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
498   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
499   rtmps = rtmp; ics = ic;
500 
501   sctx.shift_top      = 0;
502   sctx.nshift_max     = 0;
503   sctx.shift_lo       = 0;
504   sctx.shift_hi       = 0;
505   sctx.shift_fraction = 0;
506 
507   /* if both shift schemes are chosen by user, only use info->shiftpd */
508   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
509     PetscInt *aai = a->i;
510     ddiag          = a->diag;
511     sctx.shift_top = 0;
512     for (i=0; i<n; i++) {
513       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
514       d  = (a->a)[ddiag[i]];
515       rs = -PetscAbsScalar(d) - PetscRealPart(d);
516       v  = a->a+aai[i];
517       nz = aai[i+1] - aai[i];
518       for (j=0; j<nz; j++)
519 	rs += PetscAbsScalar(v[j]);
520       if (rs>sctx.shift_top) sctx.shift_top = rs;
521     }
522     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
523     sctx.shift_top    *= 1.1;
524     sctx.nshift_max   = 5;
525     sctx.shift_lo     = 0.;
526     sctx.shift_hi     = 1.;
527   }
528 
529   sctx.shift_amount = 0;
530   sctx.nshift       = 0;
531   do {
532     sctx.lushift = PETSC_FALSE;
533     for (i=0; i<n; i++){
534       nz    = bi[i+1] - bi[i];
535       bjtmp = bj + bi[i];
536       for  (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0;
537 
538       /* load in initial (unfactored row) */
539       nz    = a->i[r[i]+1] - a->i[r[i]];
540       ajtmp = a->j + a->i[r[i]];
541       v     = a->a + a->i[r[i]];
542       for (j=0; j<nz; j++) {
543         rtmp[ics[ajtmp[j]]] = v[j];
544       }
545       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
546 
547       row = *bjtmp++;
548       while  (row < i) {
549         pc = rtmp + row;
550         if (*pc != 0.0) {
551           pv         = b->a + diag_offset[row];
552           pj         = b->j + diag_offset[row] + 1;
553           multiplier = *pc / *pv++;
554           *pc        = multiplier;
555           nz         = bi[row+1] - diag_offset[row] - 1;
556           for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
557           ierr = PetscLogFlops(2*nz);CHKERRQ(ierr);
558         }
559         row = *bjtmp++;
560       }
561       /* finished row so stick it into b->a */
562       pv   = b->a + bi[i] ;
563       pj   = b->j + bi[i] ;
564       nz   = bi[i+1] - bi[i];
565       diag = diag_offset[i] - bi[i];
566       rs   = 0.0;
567       for (j=0; j<nz; j++) {
568         pv[j] = rtmps[pj[j]];
569         if (j != diag) rs += PetscAbsScalar(pv[j]);
570       }
571 
572       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
573       sctx.rs  = rs;
574       sctx.pv  = pv[diag];
575       ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr);
576       if (newshift == 1) break;
577     }
578 
579     if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
580       /*
581        * if no shift in this attempt & shifting & started shifting & can refine,
582        * then try lower shift
583        */
584       sctx.shift_hi        = sctx.shift_fraction;
585       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
586       sctx.shift_amount    = sctx.shift_fraction * sctx.shift_top;
587       sctx.lushift         = PETSC_TRUE;
588       sctx.nshift++;
589     }
590   } while (sctx.lushift);
591 
592   /* invert diagonal entries for simplier triangular solves */
593   for (i=0; i<n; i++) {
594     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
595   }
596   ierr = PetscFree(rtmp);CHKERRQ(ierr);
597   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
598   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
599   if (b->inode.use) {
600     C->ops->solve   = MatSolve_Inode;
601   } else {
602     PetscTruth row_identity, col_identity;
603     ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
604     ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
605     if (row_identity && col_identity) {
606       C->ops->solve   = MatSolve_SeqAIJ_NaturalOrdering;
607     } else {
608       C->ops->solve   = MatSolve_SeqAIJ;
609     }
610   }
611   C->ops->solveadd           = MatSolveAdd_SeqAIJ;
612   C->ops->solvetranspose     = MatSolveTranspose_SeqAIJ;
613   C->ops->solvetransposeadd  = MatSolveTransposeAdd_SeqAIJ;
614   C->ops->matsolve           = MatMatSolve_SeqAIJ;
615   C->assembled    = PETSC_TRUE;
616   C->preallocated = PETSC_TRUE;
617   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
618   if (sctx.nshift){
619      if (info->shiftpd) {
620       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
621     } else if (info->shiftnz) {
622       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
623     }
624   }
625   PetscFunctionReturn(0);
626 }
627 
628 /*
629    This routine implements inplace ILU(0) with row or/and column permutations.
630    Input:
631      A - original matrix
632    Output;
633      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
634          a->j (col index) is permuted by the inverse of colperm, then sorted
635          a->a reordered accordingly with a->j
636          a->diag (ptr to diagonal elements) is updated.
637 */
638 #undef __FUNCT__
639 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm"
640 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info)
641 {
642   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
643   IS             isrow = a->row,isicol = a->icol;
644   PetscErrorCode ierr;
645   const PetscInt *r,*ic,*ics;
646   PetscInt       i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
647   PetscInt       *ajtmp,nz,row;
648   PetscInt       *diag = a->diag,nbdiag,*pj;
649   PetscScalar    *rtmp,*pc,multiplier,d;
650   MatScalar      *v,*pv;
651   PetscReal      rs;
652   LUShift_Ctx    sctx;
653   PetscInt       newshift;
654 
655   PetscFunctionBegin;
656   if (A != B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
657   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
658   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
659   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
660   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
661   ics = ic;
662 
663   sctx.shift_top      = 0;
664   sctx.nshift_max     = 0;
665   sctx.shift_lo       = 0;
666   sctx.shift_hi       = 0;
667   sctx.shift_fraction = 0;
668 
669   /* if both shift schemes are chosen by user, only use info->shiftpd */
670   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
671     sctx.shift_top = 0;
672     for (i=0; i<n; i++) {
673       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
674       d  = (a->a)[diag[i]];
675       rs = -PetscAbsScalar(d) - PetscRealPart(d);
676       v  = a->a+ai[i];
677       nz = ai[i+1] - ai[i];
678       for (j=0; j<nz; j++)
679 	rs += PetscAbsScalar(v[j]);
680       if (rs>sctx.shift_top) sctx.shift_top = rs;
681     }
682     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
683     sctx.shift_top    *= 1.1;
684     sctx.nshift_max   = 5;
685     sctx.shift_lo     = 0.;
686     sctx.shift_hi     = 1.;
687   }
688 
689   sctx.shift_amount = 0;
690   sctx.nshift       = 0;
691   do {
692     sctx.lushift = PETSC_FALSE;
693     for (i=0; i<n; i++){
694       /* load in initial unfactored row */
695       nz    = ai[r[i]+1] - ai[r[i]];
696       ajtmp = aj + ai[r[i]];
697       v     = a->a + ai[r[i]];
698       /* sort permuted ajtmp and values v accordingly */
699       for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
700       ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr);
701 
702       diag[r[i]] = ai[r[i]];
703       for (j=0; j<nz; j++) {
704         rtmp[ajtmp[j]] = v[j];
705         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
706       }
707       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
708 
709       row = *ajtmp++;
710       while  (row < i) {
711         pc = rtmp + row;
712         if (*pc != 0.0) {
713           pv         = a->a + diag[r[row]];
714           pj         = aj + diag[r[row]] + 1;
715 
716           multiplier = *pc / *pv++;
717           *pc        = multiplier;
718           nz         = ai[r[row]+1] - diag[r[row]] - 1;
719           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
720           ierr = PetscLogFlops(2*nz);CHKERRQ(ierr);
721         }
722         row = *ajtmp++;
723       }
724       /* finished row so overwrite it onto a->a */
725       pv   = a->a + ai[r[i]] ;
726       pj   = aj + ai[r[i]] ;
727       nz   = ai[r[i]+1] - ai[r[i]];
728       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
729 
730       rs   = 0.0;
731       for (j=0; j<nz; j++) {
732         pv[j] = rtmp[pj[j]];
733         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
734       }
735 
736       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
737       sctx.rs  = rs;
738       sctx.pv  = pv[nbdiag];
739       ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr);
740       if (newshift == 1) break;
741     }
742 
743     if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
744       /*
745        * if no shift in this attempt & shifting & started shifting & can refine,
746        * then try lower shift
747        */
748       sctx.shift_hi        = sctx.shift_fraction;
749       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
750       sctx.shift_amount    = sctx.shift_fraction * sctx.shift_top;
751       sctx.lushift         = PETSC_TRUE;
752       sctx.nshift++;
753     }
754   } while (sctx.lushift);
755 
756   /* invert diagonal entries for simplier triangular solves */
757   for (i=0; i<n; i++) {
758     a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
759   }
760 
761   ierr = PetscFree(rtmp);CHKERRQ(ierr);
762   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
763   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
764   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
765   A->ops->solveadd          = MatSolveAdd_SeqAIJ;
766   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
767   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
768   A->assembled = PETSC_TRUE;
769   A->preallocated = PETSC_TRUE;
770   ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr);
771   if (sctx.nshift){
772     if (info->shiftpd) {
773       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
774     } else if (info->shiftnz) {
775       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
776     }
777   }
778   PetscFunctionReturn(0);
779 }
780 
781 /* ----------------------------------------------------------- */
782 #undef __FUNCT__
783 #define __FUNCT__ "MatLUFactor_SeqAIJ"
784 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
785 {
786   PetscErrorCode ierr;
787   Mat            C;
788 
789   PetscFunctionBegin;
790   ierr = MatGetFactor(A,MAT_SOLVER_PETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr);
791   ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr);
792   ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr);
793   A->ops->solve            = C->ops->solve;
794   A->ops->solvetranspose   = C->ops->solvetranspose;
795   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
796   ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr);
797   PetscFunctionReturn(0);
798 }
799 /* ----------------------------------------------------------- */
800 #undef __FUNCT__
801 #define __FUNCT__ "MatSolve_SeqAIJ"
802 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
803 {
804   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
805   IS                iscol = a->col,isrow = a->row;
806   PetscErrorCode    ierr;
807   PetscInt          i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
808   PetscInt          nz;
809   const PetscInt    *rout,*cout,*r,*c;
810   PetscScalar       *x,*tmp,*tmps,sum;
811   const PetscScalar *b;
812   const MatScalar   *aa = a->a,*v;
813 
814   PetscFunctionBegin;
815   if (!n) PetscFunctionReturn(0);
816 
817   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
818   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
819   tmp  = a->solve_work;
820 
821   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
822   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
823 
824   /* forward solve the lower triangular */
825   tmp[0] = b[*r++];
826   tmps   = tmp;
827   for (i=1; i<n; i++) {
828     v   = aa + ai[i] ;
829     vi  = aj + ai[i] ;
830     nz  = a->diag[i] - ai[i];
831     sum = b[*r++];
832     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
833     tmp[i] = sum;
834   }
835 
836   /* backward solve the upper triangular */
837   for (i=n-1; i>=0; i--){
838     v   = aa + a->diag[i] + 1;
839     vi  = aj + a->diag[i] + 1;
840     nz  = ai[i+1] - a->diag[i] - 1;
841     sum = tmp[i];
842     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
843     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
844   }
845 
846   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
847   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
848   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
849   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
850   ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr);
851   PetscFunctionReturn(0);
852 }
853 
854 #undef __FUNCT__
855 #define __FUNCT__ "MatMatSolve_SeqAIJ"
856 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
857 {
858   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
859   IS              iscol = a->col,isrow = a->row;
860   PetscErrorCode  ierr;
861   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
862   PetscInt        nz,neq;
863   const PetscInt  *rout,*cout,*r,*c;
864   PetscScalar     *x,*b,*tmp,*tmps,sum;
865   const MatScalar *aa = a->a,*v;
866   PetscTruth      bisdense,xisdense;
867 
868   PetscFunctionBegin;
869   if (!n) PetscFunctionReturn(0);
870 
871   ierr = PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr);
872   if (!bisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
873   ierr = PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr);
874   if (!xisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
875 
876   ierr = MatGetArray(B,&b);CHKERRQ(ierr);
877   ierr = MatGetArray(X,&x);CHKERRQ(ierr);
878 
879   tmp  = a->solve_work;
880   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
881   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
882 
883   for (neq=0; neq<B->cmap->n; neq++){
884     /* forward solve the lower triangular */
885     tmp[0] = b[r[0]];
886     tmps   = tmp;
887     for (i=1; i<n; i++) {
888       v   = aa + ai[i] ;
889       vi  = aj + ai[i] ;
890       nz  = a->diag[i] - ai[i];
891       sum = b[r[i]];
892       SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
893       tmp[i] = sum;
894     }
895     /* backward solve the upper triangular */
896     for (i=n-1; i>=0; i--){
897       v   = aa + a->diag[i] + 1;
898       vi  = aj + a->diag[i] + 1;
899       nz  = ai[i+1] - a->diag[i] - 1;
900       sum = tmp[i];
901       SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
902       x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
903     }
904 
905     b += n;
906     x += n;
907   }
908   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
909   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
910   ierr = MatRestoreArray(B,&b);CHKERRQ(ierr);
911   ierr = MatRestoreArray(X,&x);CHKERRQ(ierr);
912   ierr = PetscLogFlops(n*(2*a->nz - n));CHKERRQ(ierr);
913   PetscFunctionReturn(0);
914 }
915 
916 #undef __FUNCT__
917 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm"
918 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
919 {
920   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
921   IS              iscol = a->col,isrow = a->row;
922   PetscErrorCode  ierr;
923   const PetscInt  *r,*c,*rout,*cout;
924   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
925   PetscInt        nz,row;
926   PetscScalar     *x,*b,*tmp,*tmps,sum;
927   const MatScalar *aa = a->a,*v;
928 
929   PetscFunctionBegin;
930   if (!n) PetscFunctionReturn(0);
931 
932   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
933   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
934   tmp  = a->solve_work;
935 
936   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
937   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
938 
939   /* forward solve the lower triangular */
940   tmp[0] = b[*r++];
941   tmps   = tmp;
942   for (row=1; row<n; row++) {
943     i   = rout[row]; /* permuted row */
944     v   = aa + ai[i] ;
945     vi  = aj + ai[i] ;
946     nz  = a->diag[i] - ai[i];
947     sum = b[*r++];
948     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
949     tmp[row] = sum;
950   }
951 
952   /* backward solve the upper triangular */
953   for (row=n-1; row>=0; row--){
954     i   = rout[row]; /* permuted row */
955     v   = aa + a->diag[i] + 1;
956     vi  = aj + a->diag[i] + 1;
957     nz  = ai[i+1] - a->diag[i] - 1;
958     sum = tmp[row];
959     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
960     x[*c--] = tmp[row] = sum*aa[a->diag[i]];
961   }
962 
963   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
964   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
965   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
966   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
967   ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr);
968   PetscFunctionReturn(0);
969 }
970 
971 /* ----------------------------------------------------------- */
972 #undef __FUNCT__
973 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
974 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
975 {
976   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
977   PetscErrorCode    ierr;
978   PetscInt          n = A->rmap->n;
979   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
980   PetscScalar       *x;
981   const PetscScalar *b;
982   const MatScalar   *aa = a->a;
983 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
984   PetscInt          adiag_i,i,nz,ai_i;
985   const MatScalar   *v;
986   PetscScalar       sum;
987 #endif
988 
989   PetscFunctionBegin;
990   if (!n) PetscFunctionReturn(0);
991 
992   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
993   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
994 
995 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
996   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
997 #else
998   /* forward solve the lower triangular */
999   x[0] = b[0];
1000   for (i=1; i<n; i++) {
1001     ai_i = ai[i];
1002     v    = aa + ai_i;
1003     vi   = aj + ai_i;
1004     nz   = adiag[i] - ai_i;
1005     sum  = b[i];
1006     while (nz--) sum -= *v++ * x[*vi++];
1007     x[i] = sum;
1008   }
1009 
1010   /* backward solve the upper triangular */
1011   for (i=n-1; i>=0; i--){
1012     adiag_i = adiag[i];
1013     v       = aa + adiag_i + 1;
1014     vi      = aj + adiag_i + 1;
1015     nz      = ai[i+1] - adiag_i - 1;
1016     sum     = x[i];
1017     while (nz--) sum -= *v++ * x[*vi++];
1018     x[i]    = sum*aa[adiag_i];
1019   }
1020 #endif
1021   ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr);
1022   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
1023   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1024   PetscFunctionReturn(0);
1025 }
1026 
1027 #undef __FUNCT__
1028 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
1029 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
1030 {
1031   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1032   IS              iscol = a->col,isrow = a->row;
1033   PetscErrorCode  ierr;
1034   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1035   PetscInt        nz;
1036   const PetscInt  *rout,*cout,*r,*c;
1037   PetscScalar     *x,*b,*tmp,sum;
1038   const MatScalar *aa = a->a,*v;
1039 
1040   PetscFunctionBegin;
1041   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
1042 
1043   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1044   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1045   tmp  = a->solve_work;
1046 
1047   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1048   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1049 
1050   /* forward solve the lower triangular */
1051   tmp[0] = b[*r++];
1052   for (i=1; i<n; i++) {
1053     v   = aa + ai[i] ;
1054     vi  = aj + ai[i] ;
1055     nz  = a->diag[i] - ai[i];
1056     sum = b[*r++];
1057     while (nz--) sum -= *v++ * tmp[*vi++ ];
1058     tmp[i] = sum;
1059   }
1060 
1061   /* backward solve the upper triangular */
1062   for (i=n-1; i>=0; i--){
1063     v   = aa + a->diag[i] + 1;
1064     vi  = aj + a->diag[i] + 1;
1065     nz  = ai[i+1] - a->diag[i] - 1;
1066     sum = tmp[i];
1067     while (nz--) sum -= *v++ * tmp[*vi++ ];
1068     tmp[i] = sum*aa[a->diag[i]];
1069     x[*c--] += tmp[i];
1070   }
1071 
1072   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1073   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1074   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1075   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1076   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
1077 
1078   PetscFunctionReturn(0);
1079 }
1080 /* -------------------------------------------------------------------*/
1081 #undef __FUNCT__
1082 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
1083 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1084 {
1085   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1086   IS              iscol = a->col,isrow = a->row;
1087   PetscErrorCode  ierr;
1088   const PetscInt  *rout,*cout,*r,*c;
1089   PetscInt        i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1090   PetscInt        nz,*diag = a->diag;
1091   PetscScalar     *x,*b,*tmp,s1;
1092   const MatScalar *aa = a->a,*v;
1093 
1094   PetscFunctionBegin;
1095   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1096   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1097   tmp  = a->solve_work;
1098 
1099   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1100   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1101 
1102   /* copy the b into temp work space according to permutation */
1103   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1104 
1105   /* forward solve the U^T */
1106   for (i=0; i<n; i++) {
1107     v   = aa + diag[i] ;
1108     vi  = aj + diag[i] + 1;
1109     nz  = ai[i+1] - diag[i] - 1;
1110     s1  = tmp[i];
1111     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1112     while (nz--) {
1113       tmp[*vi++ ] -= (*v++)*s1;
1114     }
1115     tmp[i] = s1;
1116   }
1117 
1118   /* backward solve the L^T */
1119   for (i=n-1; i>=0; i--){
1120     v   = aa + diag[i] - 1 ;
1121     vi  = aj + diag[i] - 1 ;
1122     nz  = diag[i] - ai[i];
1123     s1  = tmp[i];
1124     while (nz--) {
1125       tmp[*vi-- ] -= (*v--)*s1;
1126     }
1127   }
1128 
1129   /* copy tmp into x according to permutation */
1130   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1131 
1132   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1133   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1134   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1135   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1136 
1137   ierr = PetscLogFlops(2*a->nz-A->cmap->n);CHKERRQ(ierr);
1138   PetscFunctionReturn(0);
1139 }
1140 
1141 #undef __FUNCT__
1142 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
1143 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1144 {
1145   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1146   IS              iscol = a->col,isrow = a->row;
1147   PetscErrorCode  ierr;
1148   const PetscInt  *r,*c,*rout,*cout;
1149   PetscInt        i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1150   PetscInt        nz,*diag = a->diag;
1151   PetscScalar     *x,*b,*tmp;
1152   const MatScalar *aa = a->a,*v;
1153 
1154   PetscFunctionBegin;
1155   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
1156 
1157   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1158   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1159   tmp = a->solve_work;
1160 
1161   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1162   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1163 
1164   /* copy the b into temp work space according to permutation */
1165   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1166 
1167   /* forward solve the U^T */
1168   for (i=0; i<n; i++) {
1169     v   = aa + diag[i] ;
1170     vi  = aj + diag[i] + 1;
1171     nz  = ai[i+1] - diag[i] - 1;
1172     tmp[i] *= *v++;
1173     while (nz--) {
1174       tmp[*vi++ ] -= (*v++)*tmp[i];
1175     }
1176   }
1177 
1178   /* backward solve the L^T */
1179   for (i=n-1; i>=0; i--){
1180     v   = aa + diag[i] - 1 ;
1181     vi  = aj + diag[i] - 1 ;
1182     nz  = diag[i] - ai[i];
1183     while (nz--) {
1184       tmp[*vi-- ] -= (*v--)*tmp[i];
1185     }
1186   }
1187 
1188   /* copy tmp into x according to permutation */
1189   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1190 
1191   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1192   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1193   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1194   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1195 
1196   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
1197   PetscFunctionReturn(0);
1198 }
1199 /* ----------------------------------------------------------------*/
1200 EXTERN PetscErrorCode Mat_CheckInode(Mat,PetscTruth);
1201 EXTERN PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption);
1202 
1203 #undef __FUNCT__
1204 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
1205 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1206 {
1207   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1208   IS                 isicol;
1209   PetscErrorCode     ierr;
1210   const PetscInt     *r,*ic;
1211   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j,d;
1212   PetscInt           *bi,*cols,nnz,*cols_lvl;
1213   PetscInt           *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0;
1214   PetscInt           i,levels,diagonal_fill;
1215   PetscTruth         col_identity,row_identity;
1216   PetscReal          f;
1217   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1218   PetscBT            lnkbt;
1219   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1220   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1221   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1222   PetscTruth         missing;
1223 
1224   PetscFunctionBegin;
1225   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1226   f             = info->fill;
1227   levels        = (PetscInt)info->levels;
1228   diagonal_fill = (PetscInt)info->diagonal_fill;
1229   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1230 
1231   /* special case that simply copies fill pattern */
1232   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1233   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
1234   if (!levels && row_identity && col_identity) {
1235     ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES);CHKERRQ(ierr);
1236     fact->factor = MAT_FACTOR_ILU;
1237     (fact)->info.factor_mallocs    = 0;
1238     (fact)->info.fill_ratio_given  = info->fill;
1239     (fact)->info.fill_ratio_needed = 1.0;
1240     b               = (Mat_SeqAIJ*)(fact)->data;
1241     ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
1242     if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1243     b->row              = isrow;
1244     b->col              = iscol;
1245     b->icol             = isicol;
1246     ierr                = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1247     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1248     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1249     (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ;
1250     ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr);
1251     PetscFunctionReturn(0);
1252   }
1253 
1254   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1255   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1256 
1257   /* get new row pointers */
1258   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
1259   bi[0] = 0;
1260   /* bdiag is location of diagonal in factor */
1261   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
1262   bdiag[0]  = 0;
1263 
1264   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr);
1265   bjlvl_ptr = (PetscInt**)(bj_ptr + n);
1266 
1267   /* create a linked list for storing column indices of the active row */
1268   nlnk = n + 1;
1269   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1270 
1271   /* initial FreeSpace size is f*(ai[n]+1) */
1272   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
1273   current_space = free_space;
1274   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
1275   current_space_lvl = free_space_lvl;
1276 
1277   for (i=0; i<n; i++) {
1278     nzi = 0;
1279     /* copy current row into linked list */
1280     nnz  = ai[r[i]+1] - ai[r[i]];
1281     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
1282     cols = aj + ai[r[i]];
1283     lnk[i] = -1; /* marker to indicate if diagonal exists */
1284     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1285     nzi += nlnk;
1286 
1287     /* make sure diagonal entry is included */
1288     if (diagonal_fill && lnk[i] == -1) {
1289       fm = n;
1290       while (lnk[fm] < i) fm = lnk[fm];
1291       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1292       lnk[fm]    = i;
1293       lnk_lvl[i] = 0;
1294       nzi++; dcount++;
1295     }
1296 
1297     /* add pivot rows into the active row */
1298     nzbd = 0;
1299     prow = lnk[n];
1300     while (prow < i) {
1301       nnz      = bdiag[prow];
1302       cols     = bj_ptr[prow] + nnz + 1;
1303       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1304       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1305       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1306       nzi += nlnk;
1307       prow = lnk[prow];
1308       nzbd++;
1309     }
1310     bdiag[i] = nzbd;
1311     bi[i+1]  = bi[i] + nzi;
1312 
1313     /* if free space is not available, make more free space */
1314     if (current_space->local_remaining<nzi) {
1315       nnz = nzi*(n - i); /* estimated and max additional space needed */
1316       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
1317       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
1318       reallocs++;
1319     }
1320 
1321     /* copy data into free_space and free_space_lvl, then initialize lnk */
1322     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1323     bj_ptr[i]    = current_space->array;
1324     bjlvl_ptr[i] = current_space_lvl->array;
1325 
1326     /* make sure the active row i has diagonal entry */
1327     if (*(bj_ptr[i]+bdiag[i]) != i) {
1328       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
1329     try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1330     }
1331 
1332     current_space->array           += nzi;
1333     current_space->local_used      += nzi;
1334     current_space->local_remaining -= nzi;
1335     current_space_lvl->array           += nzi;
1336     current_space_lvl->local_used      += nzi;
1337     current_space_lvl->local_remaining -= nzi;
1338   }
1339 
1340   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1341   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1342 
1343   /* destroy list of free space and other temporary arrays */
1344   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
1345   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
1346   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1347   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1348   ierr = PetscFree(bj_ptr);CHKERRQ(ierr);
1349 
1350 #if defined(PETSC_USE_INFO)
1351   {
1352     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1353     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
1354     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1355     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr);
1356     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
1357     if (diagonal_fill) {
1358       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr);
1359     }
1360   }
1361 #endif
1362 
1363   /* put together the new matrix */
1364   ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr);
1365   b = (Mat_SeqAIJ*)(fact)->data;
1366   b->free_a       = PETSC_TRUE;
1367   b->free_ij      = PETSC_TRUE;
1368   b->singlemalloc = PETSC_FALSE;
1369   len = (bi[n] )*sizeof(PetscScalar);
1370   ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr);
1371   b->j          = bj;
1372   b->i          = bi;
1373   for (i=0; i<n; i++) bdiag[i] += bi[i];
1374   b->diag       = bdiag;
1375   b->ilen       = 0;
1376   b->imax       = 0;
1377   b->row        = isrow;
1378   b->col        = iscol;
1379   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1380   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1381   b->icol       = isicol;
1382   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1383   /* In b structure:  Free imax, ilen, old a, old j.
1384      Allocate bdiag, solve_work, new a, new j */
1385   ierr = PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
1386   b->maxnz             = b->nz = bi[n] ;
1387   (fact)->info.factor_mallocs    = reallocs;
1388   (fact)->info.fill_ratio_given  = f;
1389   (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1390   (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ;
1391   ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr);
1392   PetscFunctionReturn(0);
1393 }
1394 
1395 #include "src/mat/impls/sbaij/seq/sbaij.h"
1396 #undef __FUNCT__
1397 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
1398 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
1399 {
1400   Mat            C = B;
1401   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1402   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
1403   IS             ip=b->row,iip = b->icol;
1404   PetscErrorCode ierr;
1405   const PetscInt *rip,*riip;
1406   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol;
1407   PetscInt       *ai=a->i,*aj=a->j;
1408   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1409   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1410   PetscReal      zeropivot,rs,shiftnz;
1411   PetscReal      shiftpd;
1412   ChShift_Ctx    sctx;
1413   PetscInt       newshift;
1414   PetscTruth     perm_identity;
1415 
1416   PetscFunctionBegin;
1417 
1418   shiftnz   = info->shiftnz;
1419   shiftpd   = info->shiftpd;
1420   zeropivot = info->zeropivot;
1421 
1422   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
1423   ierr  = ISGetIndices(iip,&riip);CHKERRQ(ierr);
1424 
1425   /* initialization */
1426   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1427   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
1428   jl   = il + mbs;
1429   rtmp = (MatScalar*)(jl + mbs);
1430 
1431   sctx.shift_amount = 0;
1432   sctx.nshift       = 0;
1433   do {
1434     sctx.chshift = PETSC_FALSE;
1435     for (i=0; i<mbs; i++) {
1436       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1437     }
1438 
1439     for (k = 0; k<mbs; k++){
1440       bval = ba + bi[k];
1441       /* initialize k-th row by the perm[k]-th row of A */
1442       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1443       for (j = jmin; j < jmax; j++){
1444         col = riip[aj[j]];
1445         if (col >= k){ /* only take upper triangular entry */
1446           rtmp[col] = aa[j];
1447           *bval++  = 0.0; /* for in-place factorization */
1448         }
1449       }
1450       /* shift the diagonal of the matrix */
1451       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1452 
1453       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1454       dk = rtmp[k];
1455       i = jl[k]; /* first row to be added to k_th row  */
1456 
1457       while (i < k){
1458         nexti = jl[i]; /* next row to be added to k_th row */
1459 
1460         /* compute multiplier, update diag(k) and U(i,k) */
1461         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1462         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
1463         dk += uikdi*ba[ili];
1464         ba[ili] = uikdi; /* -U(i,k) */
1465 
1466         /* add multiple of row i to k-th row */
1467         jmin = ili + 1; jmax = bi[i+1];
1468         if (jmin < jmax){
1469           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1470           /* update il and jl for row i */
1471           il[i] = jmin;
1472           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1473         }
1474         i = nexti;
1475       }
1476 
1477       /* shift the diagonals when zero pivot is detected */
1478       /* compute rs=sum of abs(off-diagonal) */
1479       rs   = 0.0;
1480       jmin = bi[k]+1;
1481       nz   = bi[k+1] - jmin;
1482       bcol = bj + jmin;
1483       while (nz--){
1484         rs += PetscAbsScalar(rtmp[*bcol]);
1485         bcol++;
1486       }
1487 
1488       sctx.rs = rs;
1489       sctx.pv = dk;
1490       ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr);
1491 
1492       if (newshift == 1) {
1493         if (!sctx.shift_amount) {
1494           sctx.shift_amount = 1e-5;
1495         }
1496         break;
1497       }
1498 
1499       /* copy data into U(k,:) */
1500       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1501       jmin = bi[k]+1; jmax = bi[k+1];
1502       if (jmin < jmax) {
1503         for (j=jmin; j<jmax; j++){
1504           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1505         }
1506         /* add the k-th row into il and jl */
1507         il[k] = jmin;
1508         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1509       }
1510     }
1511   } while (sctx.chshift);
1512   ierr = PetscFree(il);CHKERRQ(ierr);
1513 
1514   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
1515   ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr);
1516 
1517   ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr);
1518   if (perm_identity){
1519     (B)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1520     (B)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1521     (B)->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
1522     (B)->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
1523   } else {
1524     (B)->ops->solve           = MatSolve_SeqSBAIJ_1;
1525     (B)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1;
1526     (B)->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1;
1527     (B)->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1;
1528   }
1529 
1530   C->assembled    = PETSC_TRUE;
1531   C->preallocated = PETSC_TRUE;
1532   ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr);
1533   if (sctx.nshift){
1534     if (shiftnz) {
1535       ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
1536     } else if (shiftpd) {
1537       ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
1538     }
1539   }
1540   PetscFunctionReturn(0);
1541 }
1542 
1543 #undef __FUNCT__
1544 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
1545 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
1546 {
1547   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
1548   Mat_SeqSBAIJ       *b;
1549   PetscErrorCode     ierr;
1550   PetscTruth         perm_identity,missing;
1551   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui;
1552   const PetscInt     *rip,*riip;
1553   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1554   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL,d;
1555   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
1556   PetscReal          fill=info->fill,levels=info->levels;
1557   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1558   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1559   PetscBT            lnkbt;
1560   IS                 iperm;
1561 
1562   PetscFunctionBegin;
1563   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1564   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
1565   if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1566   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1567   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1568 
1569   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1570   ui[0] = 0;
1571 
1572   /* ICC(0) without matrix ordering: simply copies fill pattern */
1573   if (!levels && perm_identity) {
1574 
1575     for (i=0; i<am; i++) {
1576       ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1577     }
1578     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1579     cols = uj;
1580     for (i=0; i<am; i++) {
1581       aj    = a->j + a->diag[i];
1582       ncols = ui[i+1] - ui[i];
1583       for (j=0; j<ncols; j++) *cols++ = *aj++;
1584     }
1585   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1586     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1587     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1588 
1589     /* initialization */
1590     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
1591 
1592     /* jl: linked list for storing indices of the pivot rows
1593        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1594     ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1595     il         = jl + am;
1596     uj_ptr     = (PetscInt**)(il + am);
1597     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1598     for (i=0; i<am; i++){
1599       jl[i] = am; il[i] = 0;
1600     }
1601 
1602     /* create and initialize a linked list for storing column indices of the active row k */
1603     nlnk = am + 1;
1604     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1605 
1606     /* initial FreeSpace size is fill*(ai[am]+1) */
1607     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1608     current_space = free_space;
1609     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
1610     current_space_lvl = free_space_lvl;
1611 
1612     for (k=0; k<am; k++){  /* for each active row k */
1613       /* initialize lnk by the column indices of row rip[k] of A */
1614       nzk   = 0;
1615       ncols = ai[rip[k]+1] - ai[rip[k]];
1616       if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix");
1617       ncols_upper = 0;
1618       for (j=0; j<ncols; j++){
1619         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
1620         if (riip[i] >= k){ /* only take upper triangular entry */
1621           ajtmp[ncols_upper] = i;
1622           ncols_upper++;
1623         }
1624       }
1625       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1626       nzk += nlnk;
1627 
1628       /* update lnk by computing fill-in for each pivot row to be merged in */
1629       prow = jl[k]; /* 1st pivot row */
1630 
1631       while (prow < k){
1632         nextprow = jl[prow];
1633 
1634         /* merge prow into k-th row */
1635         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1636         jmax = ui[prow+1];
1637         ncols = jmax-jmin;
1638         i     = jmin - ui[prow];
1639         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1640         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
1641         j     = *(uj - 1);
1642         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
1643         nzk += nlnk;
1644 
1645         /* update il and jl for prow */
1646         if (jmin < jmax){
1647           il[prow] = jmin;
1648           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1649         }
1650         prow = nextprow;
1651       }
1652 
1653       /* if free space is not available, make more free space */
1654       if (current_space->local_remaining<nzk) {
1655         i = am - k + 1; /* num of unfactored rows */
1656         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1657         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
1658         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
1659         reallocs++;
1660       }
1661 
1662       /* copy data into free_space and free_space_lvl, then initialize lnk */
1663       if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
1664       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1665 
1666       /* add the k-th row into il and jl */
1667       if (nzk > 1){
1668         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1669         jl[k] = jl[i]; jl[i] = k;
1670         il[k] = ui[k] + 1;
1671       }
1672       uj_ptr[k]     = current_space->array;
1673       uj_lvl_ptr[k] = current_space_lvl->array;
1674 
1675       current_space->array           += nzk;
1676       current_space->local_used      += nzk;
1677       current_space->local_remaining -= nzk;
1678 
1679       current_space_lvl->array           += nzk;
1680       current_space_lvl->local_used      += nzk;
1681       current_space_lvl->local_remaining -= nzk;
1682 
1683       ui[k+1] = ui[k] + nzk;
1684     }
1685 
1686 #if defined(PETSC_USE_INFO)
1687     if (ai[am] != 0) {
1688       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1689       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
1690       ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1691       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
1692     } else {
1693       ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
1694     }
1695 #endif
1696 
1697     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1698     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1699     ierr = PetscFree(jl);CHKERRQ(ierr);
1700     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
1701 
1702     /* destroy list of free space and other temporary array(s) */
1703     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1704     ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1705     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1706     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1707 
1708   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
1709 
1710   /* put together the new matrix in MATSEQSBAIJ format */
1711 
1712   b    = (Mat_SeqSBAIJ*)(fact)->data;
1713   b->singlemalloc = PETSC_FALSE;
1714   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1715   b->j    = uj;
1716   b->i    = ui;
1717   b->diag = 0;
1718   b->ilen = 0;
1719   b->imax = 0;
1720   b->row  = perm;
1721   b->col  = perm;
1722   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1723   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1724   b->icol = iperm;
1725   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1726   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1727   ierr = PetscLogObjectMemory((fact),(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1728   b->maxnz   = b->nz = ui[am];
1729   b->free_a  = PETSC_TRUE;
1730   b->free_ij = PETSC_TRUE;
1731 
1732   (fact)->info.factor_mallocs    = reallocs;
1733   (fact)->info.fill_ratio_given  = fill;
1734   if (ai[am] != 0) {
1735     (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1736   } else {
1737     (fact)->info.fill_ratio_needed = 0.0;
1738   }
1739   (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1740   PetscFunctionReturn(0);
1741 }
1742 
1743 #undef __FUNCT__
1744 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
1745 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
1746 {
1747   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
1748   Mat_SeqSBAIJ       *b;
1749   PetscErrorCode     ierr;
1750   PetscTruth         perm_identity;
1751   PetscReal          fill = info->fill;
1752   const PetscInt     *rip,*riip;
1753   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
1754   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1755   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1756   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1757   PetscBT            lnkbt;
1758   IS                 iperm;
1759 
1760   PetscFunctionBegin;
1761   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1762   /* check whether perm is the identity mapping */
1763   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1764   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1765   ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1766   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1767 
1768   /* initialization */
1769   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1770   ui[0] = 0;
1771 
1772   /* jl: linked list for storing indices of the pivot rows
1773      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1774   ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1775   il     = jl + am;
1776   cols   = il + am;
1777   ui_ptr = (PetscInt**)(cols + am);
1778   for (i=0; i<am; i++){
1779     jl[i] = am; il[i] = 0;
1780   }
1781 
1782   /* create and initialize a linked list for storing column indices of the active row k */
1783   nlnk = am + 1;
1784   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1785 
1786   /* initial FreeSpace size is fill*(ai[am]+1) */
1787   ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1788   current_space = free_space;
1789 
1790   for (k=0; k<am; k++){  /* for each active row k */
1791     /* initialize lnk by the column indices of row rip[k] of A */
1792     nzk   = 0;
1793     ncols = ai[rip[k]+1] - ai[rip[k]];
1794     if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix");
1795     ncols_upper = 0;
1796     for (j=0; j<ncols; j++){
1797       i = riip[*(aj + ai[rip[k]] + j)];
1798       if (i >= k){ /* only take upper triangular entry */
1799         cols[ncols_upper] = i;
1800         ncols_upper++;
1801       }
1802     }
1803     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1804     nzk += nlnk;
1805 
1806     /* update lnk by computing fill-in for each pivot row to be merged in */
1807     prow = jl[k]; /* 1st pivot row */
1808 
1809     while (prow < k){
1810       nextprow = jl[prow];
1811       /* merge prow into k-th row */
1812       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1813       jmax = ui[prow+1];
1814       ncols = jmax-jmin;
1815       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1816       ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1817       nzk += nlnk;
1818 
1819       /* update il and jl for prow */
1820       if (jmin < jmax){
1821         il[prow] = jmin;
1822         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1823       }
1824       prow = nextprow;
1825     }
1826 
1827     /* if free space is not available, make more free space */
1828     if (current_space->local_remaining<nzk) {
1829       i = am - k + 1; /* num of unfactored rows */
1830       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1831       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
1832       reallocs++;
1833     }
1834 
1835     /* copy data into free space, then initialize lnk */
1836     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1837 
1838     /* add the k-th row into il and jl */
1839     if (nzk-1 > 0){
1840       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1841       jl[k] = jl[i]; jl[i] = k;
1842       il[k] = ui[k] + 1;
1843     }
1844     ui_ptr[k] = current_space->array;
1845     current_space->array           += nzk;
1846     current_space->local_used      += nzk;
1847     current_space->local_remaining -= nzk;
1848 
1849     ui[k+1] = ui[k] + nzk;
1850   }
1851 
1852 #if defined(PETSC_USE_INFO)
1853   if (ai[am] != 0) {
1854     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
1855     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
1856     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1857     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
1858   } else {
1859      ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
1860   }
1861 #endif
1862 
1863   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1864   ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1865   ierr = PetscFree(jl);CHKERRQ(ierr);
1866 
1867   /* destroy list of free space and other temporary array(s) */
1868   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1869   ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1870   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1871 
1872   /* put together the new matrix in MATSEQSBAIJ format */
1873 
1874   b = (Mat_SeqSBAIJ*)(fact)->data;
1875   b->singlemalloc = PETSC_FALSE;
1876   b->free_a       = PETSC_TRUE;
1877   b->free_ij      = PETSC_TRUE;
1878   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1879   b->j    = uj;
1880   b->i    = ui;
1881   b->diag = 0;
1882   b->ilen = 0;
1883   b->imax = 0;
1884   b->row  = perm;
1885   b->col  = perm;
1886   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1887   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1888   b->icol = iperm;
1889   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1890   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1891   ierr    = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1892   b->maxnz = b->nz = ui[am];
1893 
1894   (fact)->info.factor_mallocs    = reallocs;
1895   (fact)->info.fill_ratio_given  = fill;
1896   if (ai[am] != 0) {
1897     (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1898   } else {
1899     (fact)->info.fill_ratio_needed = 0.0;
1900   }
1901   (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1902   PetscFunctionReturn(0);
1903 }
1904