xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision bcddec3d902ae35bfc71108667abc18931194fcc)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/seq/aij.h"
4 #include "src/inline/dot.h"
5 #include "src/inline/spops.h"
6 #include "petscbt.h"
7 #include "src/mat/utils/freespace.h"
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ"
11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
12 {
13   PetscFunctionBegin;
14 
15   SETERRQ(PETSC_ERR_SUP,"Code not written");
16 #if !defined(PETSC_USE_DEBUG)
17   PetscFunctionReturn(0);
18 #endif
19 }
20 
21 
22 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
23 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
24 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
25 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,MatScalar*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*);
26 #endif
27 
28 #undef __FUNCT__
29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
30   /* ------------------------------------------------------------
31 
32           This interface was contribed by Tony Caola
33 
34      This routine is an interface to the pivoting drop-tolerance
35      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
36      SPARSEKIT2.
37 
38      The SPARSEKIT2 routines used here are covered by the GNU
39      copyright; see the file gnu in this directory.
40 
41      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
42      help in getting this routine ironed out.
43 
44      The major drawback to this routine is that if info->fill is
45      not large enough it fails rather than allocating more space;
46      this can be fixed by hacking/improving the f2c version of
47      Yousef Saad's code.
48 
49      ------------------------------------------------------------
50 */
51 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
52 {
53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
54   PetscFunctionBegin;
55   SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
56   You can obtain the drop tolerance routines by installing PETSc from\n\
57   www.mcs.anl.gov/petsc\n");
58 #else
59   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
60   IS             iscolf,isicol,isirow;
61   PetscTruth     reorder;
62   PetscErrorCode ierr,sierr;
63   const PetscInt *c,*r,*ic;
64   PetscInt       i,n = A->rmap->n,*cc,*cr;
65   PetscInt       *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
66   PetscInt       *ordcol,*iwk,*iperm,*jw;
67   PetscInt       jmax,lfill,job,*o_i,*o_j;
68   MatScalar      *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
69   PetscReal      af,dt,dtcount,dtcol,fill;
70 
71   PetscFunctionBegin;
72 
73   if (info->dt == PETSC_DEFAULT)      dt      = .005; else dt = info->dt;
74   if (info->dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax);  else dtcount = info->dtcount;
75   if (info->dtcol == PETSC_DEFAULT)   dtcol   = .01; else dtcol = info->dtcol;
76   if (info->fill == PETSC_DEFAULT)    fill    = ((double)(n*(info->dtcount+1)))/a->nz; else fill = info->fill;
77   lfill   = (PetscInt)(dtcount/2.0);
78   jmax    = (PetscInt)(fill*a->nz);
79 
80 
81   /* ------------------------------------------------------------
82      If reorder=.TRUE., then the original matrix has to be
83      reordered to reflect the user selected ordering scheme, and
84      then de-reordered so it is in it's original format.
85      Because Saad's dperm() is NOT in place, we have to copy
86      the original matrix and allocate more storage. . .
87      ------------------------------------------------------------
88   */
89 
90   /* set reorder to true if either isrow or iscol is not identity */
91   ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr);
92   if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);}
93   reorder = PetscNot(reorder);
94 
95 
96   /* storage for ilu factor */
97   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr);
98   ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr);
99   ierr = PetscMalloc(jmax*sizeof(MatScalar),&new_a);CHKERRQ(ierr);
100   ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr);
101 
102   /* ------------------------------------------------------------
103      Make sure that everything is Fortran formatted (1-Based)
104      ------------------------------------------------------------
105   */
106   for (i=old_i[0];i<old_i[n];i++) {
107     old_j[i]++;
108   }
109   for(i=0;i<n+1;i++) {
110     old_i[i]++;
111   };
112 
113 
114   if (reorder) {
115     ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr);
116     ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
117     ierr = PetscMalloc2(n,PetscInt,&cc,n,PetscInt,&cr);CHKERRQ(ierr);
118     for(i=0;i<n;i++) {
119       cr[i]  = r[i]+1;
120       cc[i]  = c[i]+1;
121     }
122     ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr);
123     ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
124     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr);
125     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr);
126     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(MatScalar),&old_a2);CHKERRQ(ierr);
127     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,cr,cc,&job);
128     ierr = PetscFree2(cc,cr);CHKERRQ(ierr);
129     o_a = old_a2;
130     o_j = old_j2;
131     o_i = old_i2;
132   } else {
133     o_a = old_a;
134     o_j = old_j;
135     o_i = old_i;
136   }
137 
138   /* ------------------------------------------------------------
139      Call Saad's ilutp() routine to generate the factorization
140      ------------------------------------------------------------
141   */
142 
143   ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr);
144   ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr);
145   ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr);
146 
147   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)dt,&dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
148   if (sierr) {
149     switch (sierr) {
150       case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger fill current fill %G space allocated %D",fill,jmax);
151       case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger fill current fill %G space allocated %D",fill,jmax);
152       case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
153       case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
154       case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal fill value %D",jmax);
155       default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
156     }
157   }
158 
159   ierr = PetscFree(w);CHKERRQ(ierr);
160   ierr = PetscFree(jw);CHKERRQ(ierr);
161 
162   /* ------------------------------------------------------------
163      Saad's routine gives the result in Modified Sparse Row (msr)
164      Convert to Compressed Sparse Row format (csr)
165      ------------------------------------------------------------
166   */
167 
168   ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr);
169   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr);
170 
171   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
172 
173   ierr = PetscFree(iwk);CHKERRQ(ierr);
174   ierr = PetscFree(wk);CHKERRQ(ierr);
175 
176   if (reorder) {
177     ierr = PetscFree(old_a2);CHKERRQ(ierr);
178     ierr = PetscFree(old_j2);CHKERRQ(ierr);
179     ierr = PetscFree(old_i2);CHKERRQ(ierr);
180   } else {
181     /* fix permutation of old_j that the factorization introduced */
182     for (i=old_i[0]; i<old_i[n]; i++) {
183       old_j[i-1] = iperm[old_j[i-1]-1];
184     }
185   }
186 
187   /* get rid of the shift to indices starting at 1 */
188   for (i=0; i<n+1; i++) {
189     old_i[i]--;
190   }
191   for (i=old_i[0];i<old_i[n];i++) {
192     old_j[i]--;
193   }
194 
195   /* Make the factored matrix 0-based */
196   for (i=0; i<n+1; i++) {
197     new_i[i]--;
198   }
199   for (i=new_i[0];i<new_i[n];i++) {
200     new_j[i]--;
201   }
202 
203   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
204   /*-- permute the right-hand-side and solution vectors           --*/
205   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
206   ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr);
207   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
208   for(i=0; i<n; i++) {
209     ordcol[i] = ic[iperm[i]-1];
210   };
211   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
212   ierr = ISDestroy(isicol);CHKERRQ(ierr);
213 
214   ierr = PetscFree(iperm);CHKERRQ(ierr);
215 
216   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr);
217   ierr = PetscFree(ordcol);CHKERRQ(ierr);
218 
219   /*----- put together the new matrix -----*/
220 
221   ierr = MatCreate(((PetscObject)A)->comm,fact);CHKERRQ(ierr);
222   ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr);
223   ierr = MatSetType(*fact,((PetscObject)A)->type_name);CHKERRQ(ierr);
224   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
225   (*fact)->factor    = MAT_FACTOR_LU;
226   (*fact)->assembled = PETSC_TRUE;
227 
228   b = (Mat_SeqAIJ*)(*fact)->data;
229   b->free_a        = PETSC_TRUE;
230   b->free_ij       = PETSC_TRUE;
231   b->singlemalloc  = PETSC_FALSE;
232   b->a             = new_a;
233   b->j             = new_j;
234   b->i             = new_i;
235   b->ilen          = 0;
236   b->imax          = 0;
237   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
238   b->row           = isirow;
239   b->col           = iscolf;
240   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
241   b->maxnz = b->nz = new_i[n];
242   ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
243   (*fact)->info.factor_mallocs = 0;
244 
245   af = ((double)b->nz)/((double)a->nz) + .001;
246   ierr = PetscInfo2(A,"Fill ratio:given %G needed %G\n",fill,af);CHKERRQ(ierr);
247   ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
248   ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
249   ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
250 
251   ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr);
252 
253   PetscFunctionReturn(0);
254 #endif
255 }
256 
257 EXTERN_C_BEGIN
258 #undef __FUNCT__
259 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc"
260 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscTruth *flg)
261 {
262   PetscFunctionBegin;
263   *flg = PETSC_TRUE;
264   PetscFunctionReturn(0);
265 }
266 EXTERN_C_END
267 
268 EXTERN_C_BEGIN
269 #undef __FUNCT__
270 #define __FUNCT__ "MatGetFactor_seqaij_petsc"
271 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
272 {
273   PetscInt           n = A->rmap->n;
274   PetscErrorCode     ierr;
275 
276   PetscFunctionBegin;
277   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
278   ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr);
279   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) {
280     ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr);
281     (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
282     (*B)->ops->ilufactorsymbolic= MatILUFactorSymbolic_SeqAIJ;
283   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
284     ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr);
285     ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
286     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
287     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
288   } else SETERRQ(PETSC_ERR_SUP,"Factor type not supported");
289   (*B)->factor = ftype;
290   PetscFunctionReturn(0);
291 }
292 EXTERN_C_END
293 
294 #undef __FUNCT__
295 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
296 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
297 {
298   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
299   IS                 isicol;
300   PetscErrorCode     ierr;
301   const PetscInt     *r,*ic;
302   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j;
303   PetscInt           *bi,*bj,*ajtmp;
304   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
305   PetscReal          f;
306   PetscInt           nlnk,*lnk,k,**bi_ptr;
307   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
308   PetscBT            lnkbt;
309 
310   PetscFunctionBegin;
311   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
312   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
313   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
314   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
315 
316   /* get new row pointers */
317   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
318   bi[0] = 0;
319 
320   /* bdiag is location of diagonal in factor */
321   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
322   bdiag[0] = 0;
323 
324   /* linked list for storing column indices of the active row */
325   nlnk = n + 1;
326   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
327 
328   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
329 
330   /* initial FreeSpace size is f*(ai[n]+1) */
331   f = info->fill;
332   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
333   current_space = free_space;
334 
335   for (i=0; i<n; i++) {
336     /* copy previous fill into linked list */
337     nzi = 0;
338     nnz = ai[r[i]+1] - ai[r[i]];
339     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
340     ajtmp = aj + ai[r[i]];
341     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
342     nzi += nlnk;
343 
344     /* add pivot rows into linked list */
345     row = lnk[n];
346     while (row < i) {
347       nzbd    = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
348       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
349       ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
350       nzi += nlnk;
351       row  = lnk[row];
352     }
353     bi[i+1] = bi[i] + nzi;
354     im[i]   = nzi;
355 
356     /* mark bdiag */
357     nzbd = 0;
358     nnz  = nzi;
359     k    = lnk[n];
360     while (nnz-- && k < i){
361       nzbd++;
362       k = lnk[k];
363     }
364     bdiag[i] = bi[i] + nzbd;
365 
366     /* if free space is not available, make more free space */
367     if (current_space->local_remaining<nzi) {
368       nnz = (n - i)*nzi; /* estimated and max additional space needed */
369       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
370       reallocs++;
371     }
372 
373     /* copy data into free space, then initialize lnk */
374     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
375     bi_ptr[i] = current_space->array;
376     current_space->array           += nzi;
377     current_space->local_used      += nzi;
378     current_space->local_remaining -= nzi;
379   }
380 #if defined(PETSC_USE_INFO)
381   if (ai[n] != 0) {
382     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
383     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
384     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
385     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
386     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
387   } else {
388     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
389   }
390 #endif
391 
392   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
393   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
394 
395   /* destroy list of free space and other temporary array(s) */
396   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
397   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
398   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
399   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
400 
401   /* put together the new matrix */
402   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
403   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
404   b    = (Mat_SeqAIJ*)(B)->data;
405   b->free_a       = PETSC_TRUE;
406   b->free_ij      = PETSC_TRUE;
407   b->singlemalloc = PETSC_FALSE;
408   ierr          = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
409   b->j          = bj;
410   b->i          = bi;
411   b->diag       = bdiag;
412   b->ilen       = 0;
413   b->imax       = 0;
414   b->row        = isrow;
415   b->col        = iscol;
416   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
417   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
418   b->icol       = isicol;
419   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
420 
421   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
422   ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
423   b->maxnz = b->nz = bi[n] ;
424 
425   (B)->factor                 =  MAT_FACTOR_LU;
426   (B)->info.factor_mallocs    = reallocs;
427   (B)->info.fill_ratio_given  = f;
428 
429   if (ai[n] != 0) {
430     (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
431   } else {
432     (B)->info.fill_ratio_needed = 0.0;
433   }
434   (B)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
435   (B)->ops->solve            = MatSolve_SeqAIJ;
436   (B)->ops->solvetranspose   = MatSolveTranspose_SeqAIJ;
437   /* switch to inodes if appropriate */
438   ierr = MatLUFactorSymbolic_Inode(B,A,isrow,iscol,info);CHKERRQ(ierr);
439   PetscFunctionReturn(0);
440 }
441 
442 /*
443     Trouble in factorization, should we dump the original matrix?
444 */
445 #undef __FUNCT__
446 #define __FUNCT__ "MatFactorDumpMatrix"
447 PetscErrorCode MatFactorDumpMatrix(Mat A)
448 {
449   PetscErrorCode ierr;
450   PetscTruth     flg;
451 
452   PetscFunctionBegin;
453   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_factor_dump_on_error",&flg);CHKERRQ(ierr);
454   if (flg) {
455     PetscViewer viewer;
456     char        filename[PETSC_MAX_PATH_LEN];
457 
458     ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr);
459     ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr);
460     ierr = MatView(A,viewer);CHKERRQ(ierr);
461     ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr);
462   }
463   PetscFunctionReturn(0);
464 }
465 
466 extern PetscErrorCode MatSolve_Inode(Mat,Vec,Vec);
467 
468 /* ----------------------------------------------------------- */
469 #undef __FUNCT__
470 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
471 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
472 {
473   Mat            C=B;
474   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
475   IS             isrow = b->row,isicol = b->icol;
476   PetscErrorCode ierr;
477   const PetscInt  *r,*ic,*ics;
478   PetscInt       i,j,n=A->rmap->n,*bi=b->i,*bj=b->j;
479   PetscInt       *ajtmp,*bjtmp,nz,row;
480   PetscInt       *diag_offset = b->diag,diag,*pj;
481   PetscScalar    *rtmp,*pc,multiplier,*rtmps;
482   MatScalar      *v,*pv;
483   PetscScalar    d;
484   PetscReal      rs;
485   LUShift_Ctx    sctx;
486   PetscInt       newshift,*ddiag;
487 
488   PetscFunctionBegin;
489   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
490   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
491   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
492   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
493   rtmps = rtmp; ics = ic;
494 
495   sctx.shift_top      = 0;
496   sctx.nshift_max     = 0;
497   sctx.shift_lo       = 0;
498   sctx.shift_hi       = 0;
499   sctx.shift_fraction = 0;
500 
501   /* if both shift schemes are chosen by user, only use info->shiftpd */
502   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
503     PetscInt *aai = a->i;
504     ddiag          = a->diag;
505     sctx.shift_top = 0;
506     for (i=0; i<n; i++) {
507       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
508       d  = (a->a)[ddiag[i]];
509       rs = -PetscAbsScalar(d) - PetscRealPart(d);
510       v  = a->a+aai[i];
511       nz = aai[i+1] - aai[i];
512       for (j=0; j<nz; j++)
513 	rs += PetscAbsScalar(v[j]);
514       if (rs>sctx.shift_top) sctx.shift_top = rs;
515     }
516     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
517     sctx.shift_top    *= 1.1;
518     sctx.nshift_max   = 5;
519     sctx.shift_lo     = 0.;
520     sctx.shift_hi     = 1.;
521   }
522 
523   sctx.shift_amount = 0;
524   sctx.nshift       = 0;
525   do {
526     sctx.lushift = PETSC_FALSE;
527     for (i=0; i<n; i++){
528       nz    = bi[i+1] - bi[i];
529       bjtmp = bj + bi[i];
530       for  (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0;
531 
532       /* load in initial (unfactored row) */
533       nz    = a->i[r[i]+1] - a->i[r[i]];
534       ajtmp = a->j + a->i[r[i]];
535       v     = a->a + a->i[r[i]];
536       for (j=0; j<nz; j++) {
537         rtmp[ics[ajtmp[j]]] = v[j];
538       }
539       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
540 
541       row = *bjtmp++;
542       while  (row < i) {
543         pc = rtmp + row;
544         if (*pc != 0.0) {
545           pv         = b->a + diag_offset[row];
546           pj         = b->j + diag_offset[row] + 1;
547           multiplier = *pc / *pv++;
548           *pc        = multiplier;
549           nz         = bi[row+1] - diag_offset[row] - 1;
550           for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
551           ierr = PetscLogFlops(2*nz);CHKERRQ(ierr);
552         }
553         row = *bjtmp++;
554       }
555       /* finished row so stick it into b->a */
556       pv   = b->a + bi[i] ;
557       pj   = b->j + bi[i] ;
558       nz   = bi[i+1] - bi[i];
559       diag = diag_offset[i] - bi[i];
560       rs   = 0.0;
561       for (j=0; j<nz; j++) {
562         pv[j] = rtmps[pj[j]];
563         if (j != diag) rs += PetscAbsScalar(pv[j]);
564       }
565 
566       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
567       sctx.rs  = rs;
568       sctx.pv  = pv[diag];
569       ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr);
570       if (newshift == 1) break;
571     }
572 
573     if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
574       /*
575        * if no shift in this attempt & shifting & started shifting & can refine,
576        * then try lower shift
577        */
578       sctx.shift_hi        = sctx.shift_fraction;
579       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
580       sctx.shift_amount    = sctx.shift_fraction * sctx.shift_top;
581       sctx.lushift         = PETSC_TRUE;
582       sctx.nshift++;
583     }
584   } while (sctx.lushift);
585 
586   /* invert diagonal entries for simplier triangular solves */
587   for (i=0; i<n; i++) {
588     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
589   }
590   ierr = PetscFree(rtmp);CHKERRQ(ierr);
591   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
592   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
593   if (b->inode.use) {
594     C->ops->solve   = MatSolve_Inode;
595   } else {
596     PetscTruth row_identity, col_identity;
597     ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
598     ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
599     if (row_identity && col_identity) {
600       C->ops->solve   = MatSolve_SeqAIJ_NaturalOrdering;
601     } else {
602       C->ops->solve   = MatSolve_SeqAIJ;
603     }
604   }
605   C->ops->solveadd           = MatSolveAdd_SeqAIJ;
606   C->ops->solvetranspose     = MatSolveTranspose_SeqAIJ;
607   C->ops->solvetransposeadd  = MatSolveTransposeAdd_SeqAIJ;
608   C->ops->matsolve           = MatMatSolve_SeqAIJ;
609   C->assembled    = PETSC_TRUE;
610   C->preallocated = PETSC_TRUE;
611   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
612   if (sctx.nshift){
613      if (info->shiftpd) {
614       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
615     } else if (info->shiftnz) {
616       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
617     }
618   }
619   PetscFunctionReturn(0);
620 }
621 
622 /*
623    This routine implements inplace ILU(0) with row or/and column permutations.
624    Input:
625      A - original matrix
626    Output;
627      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
628          a->j (col index) is permuted by the inverse of colperm, then sorted
629          a->a reordered accordingly with a->j
630          a->diag (ptr to diagonal elements) is updated.
631 */
632 #undef __FUNCT__
633 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm"
634 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info)
635 {
636   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
637   IS             isrow = a->row,isicol = a->icol;
638   PetscErrorCode ierr;
639   const PetscInt *r,*ic,*ics;
640   PetscInt       i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
641   PetscInt       *ajtmp,nz,row;
642   PetscInt       *diag = a->diag,nbdiag,*pj;
643   PetscScalar    *rtmp,*pc,multiplier,d;
644   MatScalar      *v,*pv;
645   PetscReal      rs;
646   LUShift_Ctx    sctx;
647   PetscInt       newshift;
648 
649   PetscFunctionBegin;
650   if (A != B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
651   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
652   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
653   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
654   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
655   ics = ic;
656 
657   sctx.shift_top      = 0;
658   sctx.nshift_max     = 0;
659   sctx.shift_lo       = 0;
660   sctx.shift_hi       = 0;
661   sctx.shift_fraction = 0;
662 
663   /* if both shift schemes are chosen by user, only use info->shiftpd */
664   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
665     sctx.shift_top = 0;
666     for (i=0; i<n; i++) {
667       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
668       d  = (a->a)[diag[i]];
669       rs = -PetscAbsScalar(d) - PetscRealPart(d);
670       v  = a->a+ai[i];
671       nz = ai[i+1] - ai[i];
672       for (j=0; j<nz; j++)
673 	rs += PetscAbsScalar(v[j]);
674       if (rs>sctx.shift_top) sctx.shift_top = rs;
675     }
676     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
677     sctx.shift_top    *= 1.1;
678     sctx.nshift_max   = 5;
679     sctx.shift_lo     = 0.;
680     sctx.shift_hi     = 1.;
681   }
682 
683   sctx.shift_amount = 0;
684   sctx.nshift       = 0;
685   do {
686     sctx.lushift = PETSC_FALSE;
687     for (i=0; i<n; i++){
688       /* load in initial unfactored row */
689       nz    = ai[r[i]+1] - ai[r[i]];
690       ajtmp = aj + ai[r[i]];
691       v     = a->a + ai[r[i]];
692       /* sort permuted ajtmp and values v accordingly */
693       for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
694       ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr);
695 
696       diag[r[i]] = ai[r[i]];
697       for (j=0; j<nz; j++) {
698         rtmp[ajtmp[j]] = v[j];
699         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
700       }
701       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
702 
703       row = *ajtmp++;
704       while  (row < i) {
705         pc = rtmp + row;
706         if (*pc != 0.0) {
707           pv         = a->a + diag[r[row]];
708           pj         = aj + diag[r[row]] + 1;
709 
710           multiplier = *pc / *pv++;
711           *pc        = multiplier;
712           nz         = ai[r[row]+1] - diag[r[row]] - 1;
713           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
714           ierr = PetscLogFlops(2*nz);CHKERRQ(ierr);
715         }
716         row = *ajtmp++;
717       }
718       /* finished row so overwrite it onto a->a */
719       pv   = a->a + ai[r[i]] ;
720       pj   = aj + ai[r[i]] ;
721       nz   = ai[r[i]+1] - ai[r[i]];
722       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
723 
724       rs   = 0.0;
725       for (j=0; j<nz; j++) {
726         pv[j] = rtmp[pj[j]];
727         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
728       }
729 
730       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
731       sctx.rs  = rs;
732       sctx.pv  = pv[nbdiag];
733       ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr);
734       if (newshift == 1) break;
735     }
736 
737     if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
738       /*
739        * if no shift in this attempt & shifting & started shifting & can refine,
740        * then try lower shift
741        */
742       sctx.shift_hi        = sctx.shift_fraction;
743       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
744       sctx.shift_amount    = sctx.shift_fraction * sctx.shift_top;
745       sctx.lushift         = PETSC_TRUE;
746       sctx.nshift++;
747     }
748   } while (sctx.lushift);
749 
750   /* invert diagonal entries for simplier triangular solves */
751   for (i=0; i<n; i++) {
752     a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
753   }
754 
755   ierr = PetscFree(rtmp);CHKERRQ(ierr);
756   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
757   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
758   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
759   A->ops->solveadd          = MatSolveAdd_SeqAIJ;
760   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
761   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
762   A->assembled = PETSC_TRUE;
763   A->preallocated = PETSC_TRUE;
764   ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr);
765   if (sctx.nshift){
766     if (info->shiftpd) {
767       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
768     } else if (info->shiftnz) {
769       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
770     }
771   }
772   PetscFunctionReturn(0);
773 }
774 
775 /* ----------------------------------------------------------- */
776 #undef __FUNCT__
777 #define __FUNCT__ "MatLUFactor_SeqAIJ"
778 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
779 {
780   PetscErrorCode ierr;
781   Mat            C;
782 
783   PetscFunctionBegin;
784   ierr = MatGetFactor(A,MAT_SOLVER_PETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr);
785   ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr);
786   ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr);
787   A->ops->solve            = C->ops->solve;
788   A->ops->solvetranspose   = C->ops->solvetranspose;
789   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
790   ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr);
791   PetscFunctionReturn(0);
792 }
793 /* ----------------------------------------------------------- */
794 #undef __FUNCT__
795 #define __FUNCT__ "MatSolve_SeqAIJ"
796 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
797 {
798   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
799   IS                iscol = a->col,isrow = a->row;
800   PetscErrorCode    ierr;
801   PetscInt          i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
802   PetscInt          nz;
803   const PetscInt    *rout,*cout,*r,*c;
804   PetscScalar       *x,*tmp,*tmps,sum;
805   const PetscScalar *b;
806   const MatScalar   *aa = a->a,*v;
807 
808   PetscFunctionBegin;
809   if (!n) PetscFunctionReturn(0);
810 
811   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
812   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
813   tmp  = a->solve_work;
814 
815   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
816   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
817 
818   /* forward solve the lower triangular */
819   tmp[0] = b[*r++];
820   tmps   = tmp;
821   for (i=1; i<n; i++) {
822     v   = aa + ai[i] ;
823     vi  = aj + ai[i] ;
824     nz  = a->diag[i] - ai[i];
825     sum = b[*r++];
826     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
827     tmp[i] = sum;
828   }
829 
830   /* backward solve the upper triangular */
831   for (i=n-1; i>=0; i--){
832     v   = aa + a->diag[i] + 1;
833     vi  = aj + a->diag[i] + 1;
834     nz  = ai[i+1] - a->diag[i] - 1;
835     sum = tmp[i];
836     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
837     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
838   }
839 
840   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
841   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
842   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
843   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
844   ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr);
845   PetscFunctionReturn(0);
846 }
847 
848 #undef __FUNCT__
849 #define __FUNCT__ "MatMatSolve_SeqAIJ"
850 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
851 {
852   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
853   IS              iscol = a->col,isrow = a->row;
854   PetscErrorCode  ierr;
855   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
856   PetscInt        nz,neq;
857   const PetscInt  *rout,*cout,*r,*c;
858   PetscScalar     *x,*b,*tmp,*tmps,sum;
859   const MatScalar *aa = a->a,*v;
860   PetscTruth      bisdense,xisdense;
861 
862   PetscFunctionBegin;
863   if (!n) PetscFunctionReturn(0);
864 
865   ierr = PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr);
866   if (!bisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
867   ierr = PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr);
868   if (!xisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
869 
870   ierr = MatGetArray(B,&b);CHKERRQ(ierr);
871   ierr = MatGetArray(X,&x);CHKERRQ(ierr);
872 
873   tmp  = a->solve_work;
874   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
875   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
876 
877   for (neq=0; neq<B->cmap->n; neq++){
878     /* forward solve the lower triangular */
879     tmp[0] = b[r[0]];
880     tmps   = tmp;
881     for (i=1; i<n; i++) {
882       v   = aa + ai[i] ;
883       vi  = aj + ai[i] ;
884       nz  = a->diag[i] - ai[i];
885       sum = b[r[i]];
886       SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
887       tmp[i] = sum;
888     }
889     /* backward solve the upper triangular */
890     for (i=n-1; i>=0; i--){
891       v   = aa + a->diag[i] + 1;
892       vi  = aj + a->diag[i] + 1;
893       nz  = ai[i+1] - a->diag[i] - 1;
894       sum = tmp[i];
895       SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
896       x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
897     }
898 
899     b += n;
900     x += n;
901   }
902   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
903   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
904   ierr = MatRestoreArray(B,&b);CHKERRQ(ierr);
905   ierr = MatRestoreArray(X,&x);CHKERRQ(ierr);
906   ierr = PetscLogFlops(n*(2*a->nz - n));CHKERRQ(ierr);
907   PetscFunctionReturn(0);
908 }
909 
910 #undef __FUNCT__
911 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm"
912 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
913 {
914   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
915   IS              iscol = a->col,isrow = a->row;
916   PetscErrorCode  ierr;
917   const PetscInt  *r,*c,*rout,*cout;
918   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
919   PetscInt        nz,row;
920   PetscScalar     *x,*b,*tmp,*tmps,sum;
921   const MatScalar *aa = a->a,*v;
922 
923   PetscFunctionBegin;
924   if (!n) PetscFunctionReturn(0);
925 
926   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
927   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
928   tmp  = a->solve_work;
929 
930   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
931   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
932 
933   /* forward solve the lower triangular */
934   tmp[0] = b[*r++];
935   tmps   = tmp;
936   for (row=1; row<n; row++) {
937     i   = rout[row]; /* permuted row */
938     v   = aa + ai[i] ;
939     vi  = aj + ai[i] ;
940     nz  = a->diag[i] - ai[i];
941     sum = b[*r++];
942     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
943     tmp[row] = sum;
944   }
945 
946   /* backward solve the upper triangular */
947   for (row=n-1; row>=0; row--){
948     i   = rout[row]; /* permuted row */
949     v   = aa + a->diag[i] + 1;
950     vi  = aj + a->diag[i] + 1;
951     nz  = ai[i+1] - a->diag[i] - 1;
952     sum = tmp[row];
953     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
954     x[*c--] = tmp[row] = sum*aa[a->diag[i]];
955   }
956 
957   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
958   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
959   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
960   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
961   ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr);
962   PetscFunctionReturn(0);
963 }
964 
965 /* ----------------------------------------------------------- */
966 #undef __FUNCT__
967 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
968 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
969 {
970   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
971   PetscErrorCode    ierr;
972   PetscInt          n = A->rmap->n;
973   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
974   PetscScalar       *x;
975   const PetscScalar *b;
976   const MatScalar   *aa = a->a;
977 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
978   PetscInt          adiag_i,i,nz,ai_i;
979   const MatScalar   *v;
980   PetscScalar       sum;
981 #endif
982 
983   PetscFunctionBegin;
984   if (!n) PetscFunctionReturn(0);
985 
986   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
987   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
988 
989 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
990   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
991 #else
992   /* forward solve the lower triangular */
993   x[0] = b[0];
994   for (i=1; i<n; i++) {
995     ai_i = ai[i];
996     v    = aa + ai_i;
997     vi   = aj + ai_i;
998     nz   = adiag[i] - ai_i;
999     sum  = b[i];
1000     while (nz--) sum -= *v++ * x[*vi++];
1001     x[i] = sum;
1002   }
1003 
1004   /* backward solve the upper triangular */
1005   for (i=n-1; i>=0; i--){
1006     adiag_i = adiag[i];
1007     v       = aa + adiag_i + 1;
1008     vi      = aj + adiag_i + 1;
1009     nz      = ai[i+1] - adiag_i - 1;
1010     sum     = x[i];
1011     while (nz--) sum -= *v++ * x[*vi++];
1012     x[i]    = sum*aa[adiag_i];
1013   }
1014 #endif
1015   ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr);
1016   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
1017   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1018   PetscFunctionReturn(0);
1019 }
1020 
1021 #undef __FUNCT__
1022 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
1023 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
1024 {
1025   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1026   IS              iscol = a->col,isrow = a->row;
1027   PetscErrorCode  ierr;
1028   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1029   PetscInt        nz;
1030   const PetscInt  *rout,*cout,*r,*c;
1031   PetscScalar     *x,*b,*tmp,sum;
1032   const MatScalar *aa = a->a,*v;
1033 
1034   PetscFunctionBegin;
1035   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
1036 
1037   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1038   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1039   tmp  = a->solve_work;
1040 
1041   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1042   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1043 
1044   /* forward solve the lower triangular */
1045   tmp[0] = b[*r++];
1046   for (i=1; i<n; i++) {
1047     v   = aa + ai[i] ;
1048     vi  = aj + ai[i] ;
1049     nz  = a->diag[i] - ai[i];
1050     sum = b[*r++];
1051     while (nz--) sum -= *v++ * tmp[*vi++ ];
1052     tmp[i] = sum;
1053   }
1054 
1055   /* backward solve the upper triangular */
1056   for (i=n-1; i>=0; i--){
1057     v   = aa + a->diag[i] + 1;
1058     vi  = aj + a->diag[i] + 1;
1059     nz  = ai[i+1] - a->diag[i] - 1;
1060     sum = tmp[i];
1061     while (nz--) sum -= *v++ * tmp[*vi++ ];
1062     tmp[i] = sum*aa[a->diag[i]];
1063     x[*c--] += tmp[i];
1064   }
1065 
1066   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1067   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1068   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1069   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1070   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
1071 
1072   PetscFunctionReturn(0);
1073 }
1074 /* -------------------------------------------------------------------*/
1075 #undef __FUNCT__
1076 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
1077 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1078 {
1079   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1080   IS              iscol = a->col,isrow = a->row;
1081   PetscErrorCode  ierr;
1082   const PetscInt  *rout,*cout,*r,*c;
1083   PetscInt        i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1084   PetscInt        nz,*diag = a->diag;
1085   PetscScalar     *x,*b,*tmp,s1;
1086   const MatScalar *aa = a->a,*v;
1087 
1088   PetscFunctionBegin;
1089   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1090   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1091   tmp  = a->solve_work;
1092 
1093   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1094   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1095 
1096   /* copy the b into temp work space according to permutation */
1097   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1098 
1099   /* forward solve the U^T */
1100   for (i=0; i<n; i++) {
1101     v   = aa + diag[i] ;
1102     vi  = aj + diag[i] + 1;
1103     nz  = ai[i+1] - diag[i] - 1;
1104     s1  = tmp[i];
1105     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1106     while (nz--) {
1107       tmp[*vi++ ] -= (*v++)*s1;
1108     }
1109     tmp[i] = s1;
1110   }
1111 
1112   /* backward solve the L^T */
1113   for (i=n-1; i>=0; i--){
1114     v   = aa + diag[i] - 1 ;
1115     vi  = aj + diag[i] - 1 ;
1116     nz  = diag[i] - ai[i];
1117     s1  = tmp[i];
1118     while (nz--) {
1119       tmp[*vi-- ] -= (*v--)*s1;
1120     }
1121   }
1122 
1123   /* copy tmp into x according to permutation */
1124   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1125 
1126   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1127   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1128   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1129   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1130 
1131   ierr = PetscLogFlops(2*a->nz-A->cmap->n);CHKERRQ(ierr);
1132   PetscFunctionReturn(0);
1133 }
1134 
1135 #undef __FUNCT__
1136 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
1137 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1138 {
1139   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1140   IS              iscol = a->col,isrow = a->row;
1141   PetscErrorCode  ierr;
1142   const PetscInt  *r,*c,*rout,*cout;
1143   PetscInt        i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1144   PetscInt        nz,*diag = a->diag;
1145   PetscScalar     *x,*b,*tmp;
1146   const MatScalar *aa = a->a,*v;
1147 
1148   PetscFunctionBegin;
1149   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
1150 
1151   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1152   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1153   tmp = a->solve_work;
1154 
1155   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1156   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1157 
1158   /* copy the b into temp work space according to permutation */
1159   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1160 
1161   /* forward solve the U^T */
1162   for (i=0; i<n; i++) {
1163     v   = aa + diag[i] ;
1164     vi  = aj + diag[i] + 1;
1165     nz  = ai[i+1] - diag[i] - 1;
1166     tmp[i] *= *v++;
1167     while (nz--) {
1168       tmp[*vi++ ] -= (*v++)*tmp[i];
1169     }
1170   }
1171 
1172   /* backward solve the L^T */
1173   for (i=n-1; i>=0; i--){
1174     v   = aa + diag[i] - 1 ;
1175     vi  = aj + diag[i] - 1 ;
1176     nz  = diag[i] - ai[i];
1177     while (nz--) {
1178       tmp[*vi-- ] -= (*v--)*tmp[i];
1179     }
1180   }
1181 
1182   /* copy tmp into x according to permutation */
1183   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1184 
1185   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1186   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1187   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1188   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1189 
1190   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
1191   PetscFunctionReturn(0);
1192 }
1193 /* ----------------------------------------------------------------*/
1194 EXTERN PetscErrorCode Mat_CheckInode(Mat,PetscTruth);
1195 EXTERN PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption);
1196 
1197 #undef __FUNCT__
1198 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
1199 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1200 {
1201   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1202   IS                 isicol;
1203   PetscErrorCode     ierr;
1204   const PetscInt     *r,*ic;
1205   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j,d;
1206   PetscInt           *bi,*cols,nnz,*cols_lvl;
1207   PetscInt           *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0;
1208   PetscInt           i,levels,diagonal_fill;
1209   PetscTruth         col_identity,row_identity;
1210   PetscReal          f;
1211   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1212   PetscBT            lnkbt;
1213   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1214   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1215   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1216   PetscTruth         missing;
1217 
1218   PetscFunctionBegin;
1219   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1220   f             = info->fill;
1221   levels        = (PetscInt)info->levels;
1222   diagonal_fill = (PetscInt)info->diagonal_fill;
1223   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1224 
1225   /* special case that simply copies fill pattern */
1226   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1227   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
1228   if (!levels && row_identity && col_identity) {
1229     ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES);CHKERRQ(ierr);
1230     fact->factor = MAT_FACTOR_ILU;
1231     (fact)->info.factor_mallocs    = 0;
1232     (fact)->info.fill_ratio_given  = info->fill;
1233     (fact)->info.fill_ratio_needed = 1.0;
1234     b               = (Mat_SeqAIJ*)(fact)->data;
1235     ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
1236     if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1237     b->row              = isrow;
1238     b->col              = iscol;
1239     b->icol             = isicol;
1240     ierr                = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1241     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1242     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1243     (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ;
1244     ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr);
1245     PetscFunctionReturn(0);
1246   }
1247 
1248   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1249   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1250 
1251   /* get new row pointers */
1252   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
1253   bi[0] = 0;
1254   /* bdiag is location of diagonal in factor */
1255   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
1256   bdiag[0]  = 0;
1257 
1258   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr);
1259   bjlvl_ptr = (PetscInt**)(bj_ptr + n);
1260 
1261   /* create a linked list for storing column indices of the active row */
1262   nlnk = n + 1;
1263   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1264 
1265   /* initial FreeSpace size is f*(ai[n]+1) */
1266   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
1267   current_space = free_space;
1268   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
1269   current_space_lvl = free_space_lvl;
1270 
1271   for (i=0; i<n; i++) {
1272     nzi = 0;
1273     /* copy current row into linked list */
1274     nnz  = ai[r[i]+1] - ai[r[i]];
1275     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
1276     cols = aj + ai[r[i]];
1277     lnk[i] = -1; /* marker to indicate if diagonal exists */
1278     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1279     nzi += nlnk;
1280 
1281     /* make sure diagonal entry is included */
1282     if (diagonal_fill && lnk[i] == -1) {
1283       fm = n;
1284       while (lnk[fm] < i) fm = lnk[fm];
1285       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1286       lnk[fm]    = i;
1287       lnk_lvl[i] = 0;
1288       nzi++; dcount++;
1289     }
1290 
1291     /* add pivot rows into the active row */
1292     nzbd = 0;
1293     prow = lnk[n];
1294     while (prow < i) {
1295       nnz      = bdiag[prow];
1296       cols     = bj_ptr[prow] + nnz + 1;
1297       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1298       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1299       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1300       nzi += nlnk;
1301       prow = lnk[prow];
1302       nzbd++;
1303     }
1304     bdiag[i] = nzbd;
1305     bi[i+1]  = bi[i] + nzi;
1306 
1307     /* if free space is not available, make more free space */
1308     if (current_space->local_remaining<nzi) {
1309       nnz = nzi*(n - i); /* estimated and max additional space needed */
1310       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
1311       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
1312       reallocs++;
1313     }
1314 
1315     /* copy data into free_space and free_space_lvl, then initialize lnk */
1316     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1317     bj_ptr[i]    = current_space->array;
1318     bjlvl_ptr[i] = current_space_lvl->array;
1319 
1320     /* make sure the active row i has diagonal entry */
1321     if (*(bj_ptr[i]+bdiag[i]) != i) {
1322       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
1323     try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1324     }
1325 
1326     current_space->array           += nzi;
1327     current_space->local_used      += nzi;
1328     current_space->local_remaining -= nzi;
1329     current_space_lvl->array           += nzi;
1330     current_space_lvl->local_used      += nzi;
1331     current_space_lvl->local_remaining -= nzi;
1332   }
1333 
1334   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1335   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1336 
1337   /* destroy list of free space and other temporary arrays */
1338   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
1339   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
1340   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1341   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1342   ierr = PetscFree(bj_ptr);CHKERRQ(ierr);
1343 
1344 #if defined(PETSC_USE_INFO)
1345   {
1346     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1347     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
1348     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1349     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr);
1350     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
1351     if (diagonal_fill) {
1352       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr);
1353     }
1354   }
1355 #endif
1356 
1357   /* put together the new matrix */
1358   ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr);
1359   b = (Mat_SeqAIJ*)(fact)->data;
1360   b->free_a       = PETSC_TRUE;
1361   b->free_ij      = PETSC_TRUE;
1362   b->singlemalloc = PETSC_FALSE;
1363   len = (bi[n] )*sizeof(PetscScalar);
1364   ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr);
1365   b->j          = bj;
1366   b->i          = bi;
1367   for (i=0; i<n; i++) bdiag[i] += bi[i];
1368   b->diag       = bdiag;
1369   b->ilen       = 0;
1370   b->imax       = 0;
1371   b->row        = isrow;
1372   b->col        = iscol;
1373   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1374   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1375   b->icol       = isicol;
1376   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1377   /* In b structure:  Free imax, ilen, old a, old j.
1378      Allocate bdiag, solve_work, new a, new j */
1379   ierr = PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
1380   b->maxnz             = b->nz = bi[n] ;
1381   (fact)->info.factor_mallocs    = reallocs;
1382   (fact)->info.fill_ratio_given  = f;
1383   (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1384   (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ;
1385   ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr);
1386   PetscFunctionReturn(0);
1387 }
1388 
1389 #include "src/mat/impls/sbaij/seq/sbaij.h"
1390 #undef __FUNCT__
1391 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
1392 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
1393 {
1394   Mat            C = B;
1395   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1396   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
1397   IS             ip=b->row,iip = b->icol;
1398   PetscErrorCode ierr;
1399   const PetscInt *rip,*riip;
1400   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol;
1401   PetscInt       *ai=a->i,*aj=a->j;
1402   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1403   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1404   PetscReal      zeropivot,rs,shiftnz;
1405   PetscReal      shiftpd;
1406   ChShift_Ctx    sctx;
1407   PetscInt       newshift;
1408   PetscTruth     perm_identity;
1409 
1410   PetscFunctionBegin;
1411 
1412   shiftnz   = info->shiftnz;
1413   shiftpd   = info->shiftpd;
1414   zeropivot = info->zeropivot;
1415 
1416   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
1417   ierr  = ISGetIndices(iip,&riip);CHKERRQ(ierr);
1418 
1419   /* initialization */
1420   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1421   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
1422   jl   = il + mbs;
1423   rtmp = (MatScalar*)(jl + mbs);
1424 
1425   sctx.shift_amount = 0;
1426   sctx.nshift       = 0;
1427   do {
1428     sctx.chshift = PETSC_FALSE;
1429     for (i=0; i<mbs; i++) {
1430       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1431     }
1432 
1433     for (k = 0; k<mbs; k++){
1434       bval = ba + bi[k];
1435       /* initialize k-th row by the perm[k]-th row of A */
1436       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1437       for (j = jmin; j < jmax; j++){
1438         col = riip[aj[j]];
1439         if (col >= k){ /* only take upper triangular entry */
1440           rtmp[col] = aa[j];
1441           *bval++  = 0.0; /* for in-place factorization */
1442         }
1443       }
1444       /* shift the diagonal of the matrix */
1445       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1446 
1447       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1448       dk = rtmp[k];
1449       i = jl[k]; /* first row to be added to k_th row  */
1450 
1451       while (i < k){
1452         nexti = jl[i]; /* next row to be added to k_th row */
1453 
1454         /* compute multiplier, update diag(k) and U(i,k) */
1455         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1456         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
1457         dk += uikdi*ba[ili];
1458         ba[ili] = uikdi; /* -U(i,k) */
1459 
1460         /* add multiple of row i to k-th row */
1461         jmin = ili + 1; jmax = bi[i+1];
1462         if (jmin < jmax){
1463           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1464           /* update il and jl for row i */
1465           il[i] = jmin;
1466           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1467         }
1468         i = nexti;
1469       }
1470 
1471       /* shift the diagonals when zero pivot is detected */
1472       /* compute rs=sum of abs(off-diagonal) */
1473       rs   = 0.0;
1474       jmin = bi[k]+1;
1475       nz   = bi[k+1] - jmin;
1476       bcol = bj + jmin;
1477       while (nz--){
1478         rs += PetscAbsScalar(rtmp[*bcol]);
1479         bcol++;
1480       }
1481 
1482       sctx.rs = rs;
1483       sctx.pv = dk;
1484       ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr);
1485 
1486       if (newshift == 1) {
1487         if (!sctx.shift_amount) {
1488           sctx.shift_amount = 1e-5;
1489         }
1490         break;
1491       }
1492 
1493       /* copy data into U(k,:) */
1494       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1495       jmin = bi[k]+1; jmax = bi[k+1];
1496       if (jmin < jmax) {
1497         for (j=jmin; j<jmax; j++){
1498           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1499         }
1500         /* add the k-th row into il and jl */
1501         il[k] = jmin;
1502         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1503       }
1504     }
1505   } while (sctx.chshift);
1506   ierr = PetscFree(il);CHKERRQ(ierr);
1507 
1508   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
1509   ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr);
1510 
1511   ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr);
1512   if (perm_identity){
1513     (B)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1514     (B)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1515     (B)->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
1516     (B)->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
1517   } else {
1518     (B)->ops->solve           = MatSolve_SeqSBAIJ_1;
1519     (B)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1;
1520     (B)->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1;
1521     (B)->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1;
1522   }
1523 
1524   C->assembled    = PETSC_TRUE;
1525   C->preallocated = PETSC_TRUE;
1526   ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr);
1527   if (sctx.nshift){
1528     if (shiftnz) {
1529       ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
1530     } else if (shiftpd) {
1531       ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
1532     }
1533   }
1534   PetscFunctionReturn(0);
1535 }
1536 
1537 #undef __FUNCT__
1538 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
1539 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
1540 {
1541   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
1542   Mat_SeqSBAIJ       *b;
1543   PetscErrorCode     ierr;
1544   PetscTruth         perm_identity,missing;
1545   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui;
1546   const PetscInt     *rip,*riip;
1547   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1548   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL,d;
1549   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
1550   PetscReal          fill=info->fill,levels=info->levels;
1551   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1552   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1553   PetscBT            lnkbt;
1554   IS                 iperm;
1555 
1556   PetscFunctionBegin;
1557   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1558   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
1559   if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1560   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1561   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1562 
1563   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1564   ui[0] = 0;
1565 
1566   /* ICC(0) without matrix ordering: simply copies fill pattern */
1567   if (!levels && perm_identity) {
1568 
1569     for (i=0; i<am; i++) {
1570       ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1571     }
1572     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1573     cols = uj;
1574     for (i=0; i<am; i++) {
1575       aj    = a->j + a->diag[i];
1576       ncols = ui[i+1] - ui[i];
1577       for (j=0; j<ncols; j++) *cols++ = *aj++;
1578     }
1579   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1580     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1581     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1582 
1583     /* initialization */
1584     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
1585 
1586     /* jl: linked list for storing indices of the pivot rows
1587        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1588     ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1589     il         = jl + am;
1590     uj_ptr     = (PetscInt**)(il + am);
1591     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1592     for (i=0; i<am; i++){
1593       jl[i] = am; il[i] = 0;
1594     }
1595 
1596     /* create and initialize a linked list for storing column indices of the active row k */
1597     nlnk = am + 1;
1598     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1599 
1600     /* initial FreeSpace size is fill*(ai[am]+1) */
1601     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1602     current_space = free_space;
1603     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
1604     current_space_lvl = free_space_lvl;
1605 
1606     for (k=0; k<am; k++){  /* for each active row k */
1607       /* initialize lnk by the column indices of row rip[k] of A */
1608       nzk   = 0;
1609       ncols = ai[rip[k]+1] - ai[rip[k]];
1610       if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix");
1611       ncols_upper = 0;
1612       for (j=0; j<ncols; j++){
1613         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
1614         if (riip[i] >= k){ /* only take upper triangular entry */
1615           ajtmp[ncols_upper] = i;
1616           ncols_upper++;
1617         }
1618       }
1619       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1620       nzk += nlnk;
1621 
1622       /* update lnk by computing fill-in for each pivot row to be merged in */
1623       prow = jl[k]; /* 1st pivot row */
1624 
1625       while (prow < k){
1626         nextprow = jl[prow];
1627 
1628         /* merge prow into k-th row */
1629         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1630         jmax = ui[prow+1];
1631         ncols = jmax-jmin;
1632         i     = jmin - ui[prow];
1633         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1634         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
1635         j     = *(uj - 1);
1636         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
1637         nzk += nlnk;
1638 
1639         /* update il and jl for prow */
1640         if (jmin < jmax){
1641           il[prow] = jmin;
1642           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1643         }
1644         prow = nextprow;
1645       }
1646 
1647       /* if free space is not available, make more free space */
1648       if (current_space->local_remaining<nzk) {
1649         i = am - k + 1; /* num of unfactored rows */
1650         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1651         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
1652         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
1653         reallocs++;
1654       }
1655 
1656       /* copy data into free_space and free_space_lvl, then initialize lnk */
1657       if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
1658       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1659 
1660       /* add the k-th row into il and jl */
1661       if (nzk > 1){
1662         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1663         jl[k] = jl[i]; jl[i] = k;
1664         il[k] = ui[k] + 1;
1665       }
1666       uj_ptr[k]     = current_space->array;
1667       uj_lvl_ptr[k] = current_space_lvl->array;
1668 
1669       current_space->array           += nzk;
1670       current_space->local_used      += nzk;
1671       current_space->local_remaining -= nzk;
1672 
1673       current_space_lvl->array           += nzk;
1674       current_space_lvl->local_used      += nzk;
1675       current_space_lvl->local_remaining -= nzk;
1676 
1677       ui[k+1] = ui[k] + nzk;
1678     }
1679 
1680 #if defined(PETSC_USE_INFO)
1681     if (ai[am] != 0) {
1682       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1683       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
1684       ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1685       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
1686     } else {
1687       ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
1688     }
1689 #endif
1690 
1691     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1692     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1693     ierr = PetscFree(jl);CHKERRQ(ierr);
1694     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
1695 
1696     /* destroy list of free space and other temporary array(s) */
1697     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1698     ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1699     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1700     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1701 
1702   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
1703 
1704   /* put together the new matrix in MATSEQSBAIJ format */
1705 
1706   b    = (Mat_SeqSBAIJ*)(fact)->data;
1707   b->singlemalloc = PETSC_FALSE;
1708   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1709   b->j    = uj;
1710   b->i    = ui;
1711   b->diag = 0;
1712   b->ilen = 0;
1713   b->imax = 0;
1714   b->row  = perm;
1715   b->col  = perm;
1716   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1717   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1718   b->icol = iperm;
1719   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1720   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1721   ierr = PetscLogObjectMemory((fact),(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1722   b->maxnz   = b->nz = ui[am];
1723   b->free_a  = PETSC_TRUE;
1724   b->free_ij = PETSC_TRUE;
1725 
1726   (fact)->info.factor_mallocs    = reallocs;
1727   (fact)->info.fill_ratio_given  = fill;
1728   if (ai[am] != 0) {
1729     (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1730   } else {
1731     (fact)->info.fill_ratio_needed = 0.0;
1732   }
1733   (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1734   PetscFunctionReturn(0);
1735 }
1736 
1737 #undef __FUNCT__
1738 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
1739 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
1740 {
1741   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
1742   Mat_SeqSBAIJ       *b;
1743   PetscErrorCode     ierr;
1744   PetscTruth         perm_identity;
1745   PetscReal          fill = info->fill;
1746   const PetscInt     *rip,*riip;
1747   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
1748   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1749   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1750   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1751   PetscBT            lnkbt;
1752   IS                 iperm;
1753 
1754   PetscFunctionBegin;
1755   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1756   /* check whether perm is the identity mapping */
1757   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1758   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1759   ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1760   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1761 
1762   /* initialization */
1763   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1764   ui[0] = 0;
1765 
1766   /* jl: linked list for storing indices of the pivot rows
1767      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1768   ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1769   il     = jl + am;
1770   cols   = il + am;
1771   ui_ptr = (PetscInt**)(cols + am);
1772   for (i=0; i<am; i++){
1773     jl[i] = am; il[i] = 0;
1774   }
1775 
1776   /* create and initialize a linked list for storing column indices of the active row k */
1777   nlnk = am + 1;
1778   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1779 
1780   /* initial FreeSpace size is fill*(ai[am]+1) */
1781   ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1782   current_space = free_space;
1783 
1784   for (k=0; k<am; k++){  /* for each active row k */
1785     /* initialize lnk by the column indices of row rip[k] of A */
1786     nzk   = 0;
1787     ncols = ai[rip[k]+1] - ai[rip[k]];
1788     if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix");
1789     ncols_upper = 0;
1790     for (j=0; j<ncols; j++){
1791       i = riip[*(aj + ai[rip[k]] + j)];
1792       if (i >= k){ /* only take upper triangular entry */
1793         cols[ncols_upper] = i;
1794         ncols_upper++;
1795       }
1796     }
1797     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1798     nzk += nlnk;
1799 
1800     /* update lnk by computing fill-in for each pivot row to be merged in */
1801     prow = jl[k]; /* 1st pivot row */
1802 
1803     while (prow < k){
1804       nextprow = jl[prow];
1805       /* merge prow into k-th row */
1806       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1807       jmax = ui[prow+1];
1808       ncols = jmax-jmin;
1809       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1810       ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1811       nzk += nlnk;
1812 
1813       /* update il and jl for prow */
1814       if (jmin < jmax){
1815         il[prow] = jmin;
1816         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1817       }
1818       prow = nextprow;
1819     }
1820 
1821     /* if free space is not available, make more free space */
1822     if (current_space->local_remaining<nzk) {
1823       i = am - k + 1; /* num of unfactored rows */
1824       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1825       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
1826       reallocs++;
1827     }
1828 
1829     /* copy data into free space, then initialize lnk */
1830     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1831 
1832     /* add the k-th row into il and jl */
1833     if (nzk-1 > 0){
1834       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1835       jl[k] = jl[i]; jl[i] = k;
1836       il[k] = ui[k] + 1;
1837     }
1838     ui_ptr[k] = current_space->array;
1839     current_space->array           += nzk;
1840     current_space->local_used      += nzk;
1841     current_space->local_remaining -= nzk;
1842 
1843     ui[k+1] = ui[k] + nzk;
1844   }
1845 
1846 #if defined(PETSC_USE_INFO)
1847   if (ai[am] != 0) {
1848     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
1849     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
1850     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1851     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
1852   } else {
1853      ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
1854   }
1855 #endif
1856 
1857   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1858   ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1859   ierr = PetscFree(jl);CHKERRQ(ierr);
1860 
1861   /* destroy list of free space and other temporary array(s) */
1862   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1863   ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1864   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1865 
1866   /* put together the new matrix in MATSEQSBAIJ format */
1867 
1868   b = (Mat_SeqSBAIJ*)(fact)->data;
1869   b->singlemalloc = PETSC_FALSE;
1870   b->free_a       = PETSC_TRUE;
1871   b->free_ij      = PETSC_TRUE;
1872   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1873   b->j    = uj;
1874   b->i    = ui;
1875   b->diag = 0;
1876   b->ilen = 0;
1877   b->imax = 0;
1878   b->row  = perm;
1879   b->col  = perm;
1880   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1881   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1882   b->icol = iperm;
1883   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1884   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1885   ierr    = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1886   b->maxnz = b->nz = ui[am];
1887 
1888   (fact)->info.factor_mallocs    = reallocs;
1889   (fact)->info.fill_ratio_given  = fill;
1890   if (ai[am] != 0) {
1891     (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1892   } else {
1893     (fact)->info.fill_ratio_needed = 0.0;
1894   }
1895   (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1896   PetscFunctionReturn(0);
1897 }
1898